1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Parent class for LISA standards (TMX, TBX, XLIFF)"""
22
23 import re
24
25 from translate.storage import base
26 from translate.lang import data
27 try:
28 from lxml import etree
29 from translate.misc.xml_helpers import *
30 except ImportError, e:
31 raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.")
32
33
35 """generate match objects for all L{re_obj} matches in L{text}."""
36 start = 0
37 max = len(text)
38 while start < max:
39 m = re_obj.search(text, start)
40 if not m:
41 break
42 yield m
43 start = m.end()
44
45
46 placeholders = ['(%[diouxXeEfFgGcrs])', r'(\\+.?)', '(%[0-9]$lx)', '(%[0-9]\$[a-z])', '(<.+?>)']
47 re_placeholders = [re.compile(ph) for ph in placeholders]
57
58
60 """
61 A single unit in the file. Provisional work is done to make several
62 languages possible.
63 """
64
65
66 rootNode = ""
67
68 languageNode = ""
69
70 textNode = ""
71
72 namespace = None
73 _default_xml_space = "preserve"
74 """The default handling of spacing in the absense of an xml:space attribute.
75
76 This is mostly for correcting XLIFF behaviour."""
77
78 - def __init__(self, source, empty=False, **kwargs):
79 """Constructs a unit containing the given source string"""
80 if empty:
81 self._rich_source = None
82 self._rich_target = None
83 return
84 self.xmlelement = etree.Element(self.namespaced(self.rootNode))
85
86 super(LISAunit, self).__init__(source)
87
103
105 """Returns name in Clark notation.
106
107 For example namespaced("source") in an XLIFF document might return::
108 {urn:oasis:names:tc:xliff:document:1.1}source
109 This is needed throughout lxml.
110 """
111 return namespaced(self.namespace, name)
112
114 languageNodes = self.getlanguageNodes()
115 if len(languageNodes) > 0:
116 self.xmlelement[0] = dom_node
117 else:
118 self.xmlelement.append(dom_node)
119
122 source_dom = property(get_source_dom, set_source_dom)
123
127
130 source = property(getsource, setsource)
131
133 languageNodes = self.getlanguageNodes()
134 assert len(languageNodes) > 0
135 if dom_node is not None:
136 if append or len(languageNodes) == 0:
137 self.xmlelement.append(dom_node)
138 else:
139 self.xmlelement.insert(1, dom_node)
140 if not append and len(languageNodes) > 1:
141 self.xmlelement.remove(languageNodes[1])
142
148 target_dom = property(get_target_dom)
149
150 - def settarget(self, text, lang='xx', append=False):
173
178 target = property(gettarget, settarget)
179
181 """Returns a xml Element setup with given parameters to represent a
182 single language entry. Has to be overridden."""
183 return None
184
213
215 """Returns a list of all nodes that contain per language information."""
216 return list(self.xmlelement.iterchildren(self.namespaced(self.languageNode)))
217
219 """Retrieves a languageNode either by language or by index"""
220 if lang is None and index is None:
221 raise KeyError("No criterea for languageNode given")
222 languageNodes = self.getlanguageNodes()
223 if lang:
224 for set in languageNodes:
225 if getXMLlang(set) == lang:
226 return set
227 else:
228 if index >= len(languageNodes):
229 return None
230 else:
231 return languageNodes[index]
232 return None
233
234 - def getNodeText(self, languageNode, xml_space="preserve"):
235 """Retrieves the term from the given languageNode"""
236 if languageNode is None:
237 return None
238 if self.textNode:
239 terms = languageNode.iterdescendants(self.namespaced(self.textNode))
240 if terms is None:
241 return None
242 else:
243 return getText(terms.next(), xml_space)
244 else:
245 return getText(languageNode, xml_space)
246
248 return etree.tostring(self.xmlelement, pretty_print=True, encoding='utf-8')
249
252
253 xid = property(lambda self: self.xmlelement.attrib[self.namespaced('xid')],
254 lambda self, value: self._set_property(self.namespaced('xid'), value))
255
256 rid = property(lambda self: self.xmlelement.attrib[self.namespaced('rid')],
257 lambda self, value: self._set_property(self.namespaced('rid'), value))
258
260 term = cls(None, empty=True)
261 term.xmlelement = element
262 return term
263 createfromxmlElement = classmethod(createfromxmlElement)
264
266 """A class representing a file store for one of the LISA file formats."""
267 UnitClass = LISAunit
268
269 rootNode = ""
270
271 bodyNode = ""
272
273 XMLskeleton = ""
274
275 namespace = None
276
277 - def __init__(self, inputfile=None, sourcelanguage='en', targetlanguage=None, unitclass=None):
290
292 """Method to be overridden to initialise headers, etc."""
293 pass
294
296 """Returns name in Clark notation.
297
298 For example namespaced("source") in an XLIFF document might return::
299 {urn:oasis:names:tc:xliff:document:1.1}source
300 This is needed throughout lxml.
301 """
302 return namespaced(self.namespace, name)
303
304 - def initbody(self):
305 """Initialises self.body so it never needs to be retrieved from the XML again."""
306 self.namespace = self.document.getroot().nsmap.get(None, None)
307 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
308
310
311 """Adds and returns a new unit with the given string as first entry."""
312 newunit = self.UnitClass(source)
313 self.addunit(newunit)
314 return newunit
315
316 - def addunit(self, unit, new=True):
321
323 """Converts to a string containing the file's XML"""
324 return etree.tostring(self.document, pretty_print=True, xml_declaration=True, encoding='utf-8')
325
327 """Populates this object from the given xml string"""
328 if not hasattr(self, 'filename'):
329 self.filename = getattr(xml, 'name', '')
330 if hasattr(xml, "read"):
331 xml.seek(0)
332 posrc = xml.read()
333 xml = posrc
334 if etree.LXML_VERSION >= (2, 1, 0):
335
336
337 parser = etree.XMLParser(strip_cdata=False)
338 else:
339 parser = etree.XMLParser()
340 self.document = etree.fromstring(xml, parser).getroottree()
341 self._encoding = self.document.docinfo.encoding
342 self.initbody()
343 assert self.document.getroot().tag == self.namespaced(self.rootNode)
344 for entry in self.document.getroot().iterdescendants(self.namespaced(self.UnitClass.rootNode)):
345 term = self.UnitClass.createfromxmlElement(entry)
346 self.addunit(term, new=False)
347