1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """module that provides modified DOM functionality for our needs
24
25 Note that users of ourdom should ensure that no code might still use classes
26 directly from minidom, like minidom.Element, minidom.Document or methods such
27 as minidom.parseString, since the functionality provided here will not be in
28 those objects.
29 """
30
31 from xml.dom import minidom
32 from xml.dom import expatbuilder
33
34
35
36
38 """A replacement for writexml that formats it like typical XML files.
39 Nodes are intendented but text nodes, where whitespace can be significant, are not indented."""
40
41
42
43 writer.write(indent+"<" + self.tagName)
44
45 attrs = self._get_attributes()
46 a_names = attrs.keys()
47 a_names.sort()
48
49 for a_name in a_names:
50 writer.write(" %s=\"" % a_name)
51 minidom._write_data(writer, attrs[a_name].value)
52 writer.write("\"")
53 if self.childNodes:
54
55
56
57
58
59
60 haveText = False
61 for childNode in self.childNodes:
62 if childNode.nodeType == self.TEXT_NODE and childNode.data.strip():
63 haveText = True
64 break
65 if haveText:
66 writer.write(">")
67 for node in self.childNodes:
68 node.writexml(writer, "", "", "")
69 writer.write("</%s>%s" % (self.tagName, newl))
70 else:
71
72 writer.write(">%s"%(newl))
73 for node in self.childNodes:
74 if node.nodeType != self.TEXT_NODE:
75 node.writexml(writer, indent+addindent, addindent, newl)
76 writer.write("%s</%s>%s" % (indent, self.tagName, newl))
77 else:
78 writer.write("/>%s"%(newl))
79
80
82 """A reimplementation of getElementsByTagName as an iterator.
83
84 Note that this is not compatible with getElementsByTagName that returns a
85 list, therefore, the class below exposes this through yieldElementsByTagName"""
86
87 for node in parent.childNodes:
88 if node.nodeType == minidom.Node.ELEMENT_NODE and \
89 (name == "*" or node.tagName == name):
90 yield node
91 if node.hasChildNodes():
92 for othernode in node.getElementsByTagName(name):
93 yield othernode
94
95
97 """limits the search to within tags occuring in onlysearch"""
98 for node in parent.childNodes:
99 if node.nodeType == minidom.Node.ELEMENT_NODE and \
100 (name == "*" or node.tagName == name):
101 yield node
102 if node.nodeType == minidom.Node.ELEMENT_NODE and node.tagName in onlysearch:
103 for node in node.searchElementsByTagName(name, onlysearch):
104 yield node
105
106
108 results = node.yieldElementsByTagName(name)
109
110
111
112
113
114 try:
115 result = results.next()
116 return result
117 except StopIteration:
118 return None
119
120
121 -def getnodetext(node):
122 """returns the node's text by iterating through the child nodes"""
123 if node is None:
124 return ""
125 return "".join([t.data for t in node.childNodes if t.nodeType == t.TEXT_NODE])
126
127
128
129
134
135
137
140
143
144 - def writexml(self, writer, indent, addindent, newl):
146
147
167
168 theDOMImplementation = DOMImplementation()
169
170
171
172
174
176 """Free all data structures used during DOM construction."""
177 self.document = theDOMImplementation.createDocument(
178 expatbuilder.EMPTY_NAMESPACE, None, None)
179 self.curNode = self.document
180 self._elem_info = self.document._elem_info
181 self._cdata = False
182 self._initNamespaces()
183
185
186
187 if ' ' in name:
188 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, name)
189 else:
190 uri = expatbuilder.EMPTY_NAMESPACE
191 qname = name
192 localname = None
193 prefix = expatbuilder.EMPTY_PREFIX
194 node = Element(qname, uri, prefix, localname)
195 node.ownerDocument = self.document
196 expatbuilder._append_child(self.curNode, node)
197 self.curNode = node
198
199 if self._ns_ordered_prefixes:
200 for prefix, uri in self._ns_ordered_prefixes:
201 if prefix:
202 a = minidom.Attr(expatbuilder._intern(self, 'xmlns:' + prefix),
203 expatbuilder.XMLNS_NAMESPACE, prefix, "xmlns")
204 else:
205 a = minidom.Attr("xmlns", expatbuilder.XMLNS_NAMESPACE,
206 "xmlns", expatbuilder.EMPTY_PREFIX)
207 d = a.childNodes[0].__dict__
208 d['data'] = d['nodeValue'] = uri
209 d = a.__dict__
210 d['value'] = d['nodeValue'] = uri
211 d['ownerDocument'] = self.document
212 expatbuilder._set_attribute_node(node, a)
213 del self._ns_ordered_prefixes[:]
214
215 if attributes:
216 _attrs = node._attrs
217 _attrsNS = node._attrsNS
218 for i in range(0, len(attributes), 2):
219 aname = attributes[i]
220 value = attributes[i+1]
221 if ' ' in aname:
222 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, aname)
223 a = minidom.Attr(qname, uri, localname, prefix)
224 _attrs[qname] = a
225 _attrsNS[(uri, localname)] = a
226 else:
227 a = minidom.Attr(aname, expatbuilder.EMPTY_NAMESPACE,
228 aname, expatbuilder.EMPTY_PREFIX)
229 _attrs[aname] = a
230 _attrsNS[(expatbuilder.EMPTY_NAMESPACE, aname)] = a
231 d = a.childNodes[0].__dict__
232 d['data'] = d['nodeValue'] = value
233 d = a.__dict__
234 d['ownerDocument'] = self.document
235 d['value'] = d['nodeValue'] = value
236 d['ownerElement'] = node
237
238 if __debug__:
239
240
241
242
243
245 curNode = self.curNode
246 if ' ' in name:
247 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, name)
248 assert (curNode.namespaceURI == uri
249 and curNode.localName == localname
250 and curNode.prefix == prefix), \
251 "element stack messed up! (namespace)"
252 else:
253 assert curNode.nodeName == name, \
254 "element stack messed up - bad nodeName"
255 assert curNode.namespaceURI == expatbuilder.EMPTY_NAMESPACE, \
256 "element stack messed up - bad namespaceURI"
257 self.curNode = curNode.parentNode
258 self._finish_end_element(curNode)
259
260
261
262
263 -def parse(file, parser=None, bufsize=None):
264 """Parse a file into a DOM by filename or file object."""
265 builder = ExpatBuilderNS()
266 if isinstance(file, basestring):
267 fp = open(file, 'rb')
268 try:
269 result = builder.parseFile(fp)
270 finally:
271 fp.close()
272 else:
273 result = builder.parseFile(file)
274 return result
275
276
281