1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Parent class for LISA standards (TMX, TBX, XLIFF)"""
22
23 import re
24
25 try:
26 from lxml import etree
27 from translate.misc.xml_helpers import getText, getXMLlang, setXMLlang, \
28 getXMLspace, setXMLspace, namespaced
29 except ImportError, e:
30 raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.")
31
32 from translate.storage import base
33 from translate.lang import data
34
35
37 """
38 A single unit in the file. Provisional work is done to make several
39 languages possible.
40 """
41
42
43 rootNode = ""
44
45
46 languageNode = ""
47
48 textNode = ""
49
50 namespace = None
51 _default_xml_space = "preserve"
52 """The default handling of spacing in the absense of an xml:space
53 attribute.
54
55 This is mostly for correcting XLIFF behaviour."""
56
57 - def __init__(self, source, empty=False, **kwargs):
67
87
89 """Returns name in Clark notation.
90
91 For example namespaced("source") in an XLIFF document might return::
92 {urn:oasis:names:tc:xliff:document:1.1}source
93 This is needed throughout lxml.
94 """
95 return namespaced(self.namespace, name)
96
98 languageNodes = self.getlanguageNodes()
99 if len(languageNodes) > 0:
100 self.xmlelement.replace(languageNodes[0], dom_node)
101 else:
102 self.xmlelement.append(dom_node)
103
106 source_dom = property(get_source_dom, set_source_dom)
107
113
118 source = property(getsource, setsource)
119
121 languageNodes = self.getlanguageNodes()
122 assert len(languageNodes) > 0
123 if dom_node is not None:
124 if append or len(languageNodes) == 0:
125 self.xmlelement.append(dom_node)
126 else:
127 self.xmlelement.insert(1, dom_node)
128 if not append and len(languageNodes) > 1:
129 self.xmlelement.remove(languageNodes[1])
130
136 target_dom = property(get_target_dom)
137
138 - def settarget(self, text, lang='xx', append=False):
165
172 target = property(gettarget, settarget)
173
175 """Returns a xml Element setup with given parameters to represent a
176 single language entry. Has to be overridden."""
177 return None
178
180 """Create the text node in parent containing all the ph tags"""
181 matches = _getPhMatches(text)
182 if not matches:
183 parent.text = text
184 return
185
186
187 start = matches[0].start()
188 pretext = text[:start]
189 if pretext:
190 parent.text = pretext
191 lasttag = parent
192 for i, m in enumerate(matches):
193
194 pretext = text[start:m.start()]
195
196 if pretext:
197 lasttag.tail = pretext
198
199 phnode = etree.SubElement(parent, self.namespaced("ph"))
200 phnode.set("id", str(i+1))
201 phnode.text = m.group()
202 lasttag = phnode
203 start = m.end()
204
205 if text[start:]:
206 lasttag.tail = text[start:]
207
209 """Returns a list of all nodes that contain per language information.
210 """
211 return list(self.xmlelement.iterchildren(self.namespaced(self.languageNode)))
212
214 """Retrieves a languageNode either by language or by index"""
215 if lang is None and index is None:
216 raise KeyError("No criterea for languageNode given")
217 languageNodes = self.getlanguageNodes()
218 if lang:
219 for set in languageNodes:
220 if getXMLlang(set) == lang:
221 return set
222 else:
223 if index >= len(languageNodes):
224 return None
225 else:
226 return languageNodes[index]
227 return None
228
229 - def getNodeText(self, languageNode, xml_space="preserve"):
230 """Retrieves the term from the given languageNode"""
231 if languageNode is None:
232 return None
233 if self.textNode:
234 terms = languageNode.iterdescendants(self.namespaced(self.textNode))
235 if terms is None:
236 return None
237 else:
238 return getText(terms.next(), xml_space)
239 else:
240 return getText(languageNode, xml_space)
241
243 return etree.tostring(self.xmlelement, pretty_print=True,
244 encoding='utf-8')
245
248
249 xid = property(lambda self: self.xmlelement.attrib[self.namespaced('xid')],
250 lambda self, value: self._set_property(self.namespaced('xid'), value))
251
252 rid = property(lambda self: self.xmlelement.attrib[self.namespaced('rid')],
253 lambda self, value: self._set_property(self.namespaced('rid'), value))
254
256 term = cls(None, empty=True)
257 term.xmlelement = element
258 return term
259 createfromxmlElement = classmethod(createfromxmlElement)
260
261
263 """A class representing a file store for one of the LISA file formats."""
264 UnitClass = LISAunit
265
266 rootNode = ""
267
268 bodyNode = ""
269
270 XMLskeleton = ""
271
272 namespace = None
273
274 - def __init__(self, inputfile=None, sourcelanguage='en',
275 targetlanguage=None, unitclass=None):
288
290 """Method to be overridden to initialise headers, etc."""
291 pass
292
294 """Returns name in Clark notation.
295
296 For example namespaced("source") in an XLIFF document might return::
297 {urn:oasis:names:tc:xliff:document:1.1}source
298 This is needed throughout lxml.
299 """
300 return namespaced(self.namespace, name)
301
302 - def initbody(self):
303 """Initialises self.body so it never needs to be retrieved from the
304 XML again."""
305 self.namespace = self.document.getroot().nsmap.get(None, None)
306 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
307
309
310 """Adds and returns a new unit with the given string as first entry."""
311 newunit = self.UnitClass(source)
312 self.addunit(newunit)
313 return newunit
314
315 - def addunit(self, unit, new=True):
320
322 """Converts to a string containing the file's XML"""
323 return etree.tostring(self.document, pretty_print=True,
324 xml_declaration=True, encoding='utf-8')
325
327 """Populates this object from the given xml string"""
328 if not hasattr(self, 'filename'):
329 self.filename = getattr(xml, 'name', '')
330 if hasattr(xml, "read"):
331 xml.seek(0)
332 posrc = xml.read()
333 xml = posrc
334 if etree.LXML_VERSION >= (2, 1, 0):
335
336
337 parser = etree.XMLParser(strip_cdata=False)
338 else:
339 parser = etree.XMLParser()
340 self.document = etree.fromstring(xml, parser).getroottree()
341 self._encoding = self.document.docinfo.encoding
342 self.initbody()
343 assert self.document.getroot().tag == self.namespaced(self.rootNode)
344 for entry in self.document.getroot().iterdescendants(self.namespaced(self.UnitClass.rootNode)):
345 term = self.UnitClass.createfromxmlElement(entry)
346 self.addunit(term, new=False)
347