Package translate :: Package misc :: Module ourdom
[hide private]
[frames] | no frames]

Source Code for Module translate.misc.ourdom

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2004-2007 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21  # 
 22   
 23  """module that provides modified DOM functionality for our needs 
 24   
 25  Note that users of ourdom should ensure that no code might still use classes  
 26  directly from minidom, like minidom.Element, minidom.Document or methods such  
 27  as minidom.parseString, since the functionality provided here will not be in  
 28  those objects. 
 29  """ 
 30   
 31  from xml.dom import minidom 
 32  from xml.dom import expatbuilder 
 33   
 34  # helper functions we use to do xml the way we want, used by modified classes below 
 35   
36 -def writexml_helper(self, writer, indent="", addindent="", newl=""):
37 """A replacement for writexml that formats it like typical XML files. 38 Nodes are intendented but text nodes, where whitespace can be significant, are not indented.""" 39 # indent = current indentation 40 # addindent = indentation to add to higher levels 41 # newl = newline string 42 writer.write(indent+"<" + self.tagName) 43 44 attrs = self._get_attributes() 45 a_names = attrs.keys() 46 a_names.sort() 47 48 for a_name in a_names: 49 writer.write(" %s=\"" % a_name) 50 minidom._write_data(writer, attrs[a_name].value) 51 writer.write("\"") 52 if self.childNodes: 53 # We need to write text nodes without newline and indentation, so 54 # we handle them differently. Note that we here assume that "empty" 55 # text nodes can be done away with (see the strip()). Note also that 56 # nested tags in a text node (like ph tags in xliff) should also not 57 # have newlines and indentation or an extra newline, since that will 58 # alter the text node. 59 haveText = False 60 for childNode in self.childNodes: 61 if childNode.nodeType == self.TEXT_NODE and childNode.data.strip(): 62 haveText = True 63 break 64 if haveText: 65 writer.write(">") 66 for node in self.childNodes: 67 node.writexml(writer,"","","") 68 writer.write("</%s>%s" % (self.tagName,newl)) 69 else: 70 # This is the normal case that we do with pretty layout 71 writer.write(">%s"%(newl)) 72 for node in self.childNodes: 73 if node.nodeType != self.TEXT_NODE: 74 node.writexml(writer,indent+addindent,addindent,newl) 75 writer.write("%s</%s>%s" % (indent,self.tagName,newl)) 76 else: 77 writer.write("/>%s"%(newl))
78
79 -def getElementsByTagName_helper(parent, name, dummy=None):
80 """A reimplementation of getElementsByTagName as an iterator. 81 82 Note that this is not compatible with getElementsByTagName that returns a 83 list, therefore, the class below exposes this through yieldElementsByTagName""" 84 85 for node in parent.childNodes: 86 if node.nodeType == minidom.Node.ELEMENT_NODE and \ 87 (name == "*" or node.tagName == name): 88 yield node 89 if node.hasChildNodes(): 90 for othernode in node.getElementsByTagName(name): 91 yield othernode
92
93 -def searchElementsByTagName_helper(parent, name, onlysearch):
94 """limits the search to within tags occuring in onlysearch""" 95 for node in parent.childNodes: 96 if node.nodeType == minidom.Node.ELEMENT_NODE and \ 97 (name == "*" or node.tagName == name): 98 yield node 99 if node.nodeType == minidom.Node.ELEMENT_NODE and node.tagName in onlysearch: 100 for node in node.searchElementsByTagName(name, onlysearch): 101 yield node
102
103 -def getFirstElementByTagName(node, name):
104 results = node.yieldElementsByTagName(name) 105 # if isinstance(results, list): 106 # if len(results) == 0: 107 # return None 108 # else: 109 # return results[0] 110 try: 111 result = results.next() 112 return result 113 except StopIteration: 114 return None
115
116 -def getnodetext(node):
117 """returns the node's text by iterating through the child nodes""" 118 if node is None: return "" 119 return "".join([t.data for t in node.childNodes if t.nodeType == t.TEXT_NODE])
120 121 # various modifications to minidom classes to add functionality we like 122
123 -class DOMImplementation(minidom.DOMImplementation):
124 - def _create_document(self):
125 return Document()
126
127 -class Element(minidom.Element):
128 - def yieldElementsByTagName(self, name):
130 - def searchElementsByTagName(self, name, onlysearch):
131 return searchElementsByTagName_helper(self, name, onlysearch)
132 - def writexml(self, writer, indent, addindent, newl):
133 return writexml_helper(self, writer, indent, addindent, newl)
134
135 -class Document(minidom.Document):
136 implementation = DOMImplementation()
137 - def yieldElementsByTagName(self, name):
139 - def searchElementsByTagName(self, name, onlysearch):
140 return searchElementsByTagName_helper(self, name, onlysearch)
141 - def createElement(self, tagName):
142 e = Element(tagName) 143 e.ownerDocument = self 144 return e
145 - def createElementNS(self, namespaceURI, qualifiedName):
146 prefix, localName = _nssplit(qualifiedName) 147 e = Element(qualifiedName, namespaceURI, prefix) 148 e.ownerDocument = self 149 return e
150 151 theDOMImplementation = DOMImplementation() 152 153 # an ExpatBuilder that allows us to use the above modifications 154
155 -class ExpatBuilderNS(expatbuilder.ExpatBuilderNS):
156 - def reset(self):
157 """Free all data structures used during DOM construction.""" 158 self.document = theDOMImplementation.createDocument( 159 expatbuilder.EMPTY_NAMESPACE, None, None) 160 self.curNode = self.document 161 self._elem_info = self.document._elem_info 162 self._cdata = False 163 self._initNamespaces()
164
165 - def start_element_handler(self, name, attributes):
166 # all we want to do is construct our own Element instead of minidom.Element 167 # unfortunately the only way to do this is to copy this whole function from expatbuilder.py 168 if ' ' in name: 169 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, name) 170 else: 171 uri = expatbuilder.EMPTY_NAMESPACE 172 qname = name 173 localname = None 174 prefix = expatbuilder.EMPTY_PREFIX 175 node = Element(qname, uri, prefix, localname) 176 node.ownerDocument = self.document 177 expatbuilder._append_child(self.curNode, node) 178 self.curNode = node 179 180 if self._ns_ordered_prefixes: 181 for prefix, uri in self._ns_ordered_prefixes: 182 if prefix: 183 a = minidom.Attr(expatbuilder._intern(self, 'xmlns:' + prefix), 184 expatbuilder.XMLNS_NAMESPACE, prefix, "xmlns") 185 else: 186 a = minidom.Attr("xmlns", expatbuilder.XMLNS_NAMESPACE, 187 "xmlns", expatbuilder.EMPTY_PREFIX) 188 d = a.childNodes[0].__dict__ 189 d['data'] = d['nodeValue'] = uri 190 d = a.__dict__ 191 d['value'] = d['nodeValue'] = uri 192 d['ownerDocument'] = self.document 193 expatbuilder._set_attribute_node(node, a) 194 del self._ns_ordered_prefixes[:] 195 196 if attributes: 197 _attrs = node._attrs 198 _attrsNS = node._attrsNS 199 for i in range(0, len(attributes), 2): 200 aname = attributes[i] 201 value = attributes[i+1] 202 if ' ' in aname: 203 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, aname) 204 a = minidom.Attr(qname, uri, localname, prefix) 205 _attrs[qname] = a 206 _attrsNS[(uri, localname)] = a 207 else: 208 a = minidom.Attr(aname, expatbuilder.EMPTY_NAMESPACE, 209 aname, expatbuilder.EMPTY_PREFIX) 210 _attrs[aname] = a 211 _attrsNS[(expatbuilder.EMPTY_NAMESPACE, aname)] = a 212 d = a.childNodes[0].__dict__ 213 d['data'] = d['nodeValue'] = value 214 d = a.__dict__ 215 d['ownerDocument'] = self.document 216 d['value'] = d['nodeValue'] = value 217 d['ownerElement'] = node
218 219 if __debug__: 220 # This only adds some asserts to the original 221 # end_element_handler(), so we only define this when -O is not 222 # used. If changing one, be sure to check the other to see if 223 # it needs to be changed as well. 224 #
225 - def end_element_handler(self, name):
226 curNode = self.curNode 227 if ' ' in name: 228 uri, localname, prefix, qname = expatbuilder._parse_ns_name(self, name) 229 assert (curNode.namespaceURI == uri 230 and curNode.localName == localname 231 and curNode.prefix == prefix), \ 232 "element stack messed up! (namespace)" 233 else: 234 assert curNode.nodeName == name, \ 235 "element stack messed up - bad nodeName" 236 assert curNode.namespaceURI == expatbuilder.EMPTY_NAMESPACE, \ 237 "element stack messed up - bad namespaceURI" 238 self.curNode = curNode.parentNode 239 self._finish_end_element(curNode)
240 241 # parser methods that use our modified xml classes 242
243 -def parse(file, parser=None, bufsize=None):
244 """Parse a file into a DOM by filename or file object.""" 245 builder = ExpatBuilderNS() 246 if isinstance(file, basestring): 247 fp = open(file, 'rb') 248 try: 249 result = builder.parseFile(fp) 250 finally: 251 fp.close() 252 else: 253 result = builder.parseFile(file) 254 return result
255
256 -def parseString(string, parser=None):
257 """Parse a file into a DOM from a string.""" 258 builder = ExpatBuilderNS() 259 return builder.parseString(string)
260