Package translate :: Package storage :: Module xliff
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.xliff

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2005-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Module for handling XLIFF files for translation. 
 22   
 23  The official recommendation is to use the extention .xlf for XLIFF files. 
 24  """ 
 25   
 26  from lxml import etree 
 27   
 28  from translate.misc.multistring import multistring 
 29  from translate.misc.xml_helpers import * 
 30  from translate.storage import base, lisa 
 31  from translate.storage.lisa import getXMLspace 
 32  from translate.storage.placeables.lisa import xml_to_strelem, strelem_to_xml 
 33   
 34  # TODO: handle translation types 
 35   
36 -class xliffunit(lisa.LISAunit):
37 """A single term in the xliff file.""" 38 39 rootNode = "trans-unit" 40 languageNode = "source" 41 textNode = "" 42 namespace = 'urn:oasis:names:tc:xliff:document:1.1' 43 44 _default_xml_space = "default" 45 46 #TODO: id and all the trans-unit level stuff 47
48 - def __init__(self, source, empty=False, **kwargs):
49 """Override the constructor to set xml:space="preserve".""" 50 if empty: 51 return 52 super(xliffunit, self).__init__(source, empty, **kwargs) 53 lisa.setXMLspace(self.xmlelement, "preserve")
54
55 - def createlanguageNode(self, lang, text, purpose):
56 """Returns an xml Element setup with given parameters.""" 57 58 #TODO: for now we do source, but we have to test if it is target, perhaps 59 # with parameter. Alternatively, we can use lang, if supplied, since an xliff 60 #file has to conform to the bilingual nature promised by the header. 61 assert purpose 62 langset = etree.Element(self.namespaced(purpose)) 63 #TODO: check language 64 # lisa.setXMLlang(langset, lang) 65 66 # self.createPHnodes(langset, text) 67 langset.text = text 68 return langset
69
70 - def getlanguageNodes(self):
71 """We override this to get source and target nodes.""" 72 source = None 73 target = None 74 nodes = [] 75 try: 76 source = self.xmlelement.iterchildren(self.namespaced(self.languageNode)).next() 77 target = self.xmlelement.iterchildren(self.namespaced('target')).next() 78 nodes = [source, target] 79 except StopIteration: 80 if source is not None: 81 nodes.append(source) 82 if not target is None: 83 nodes.append(target) 84 return nodes
85
86 - def set_rich_source(self, value, sourcelang='en'):
87 sourcelanguageNode = self.get_source_dom() 88 if sourcelanguageNode is None: 89 sourcelanguageNode = self.createlanguageNode(sourcelang, u'', "source") 90 self.set_source_dom(sourcelanguageNode) 91 92 # Clear sourcelanguageNode first 93 for i in range(len(sourcelanguageNode)): 94 del sourcelanguageNode[0] 95 sourcelanguageNode.text = None 96 97 strelem_to_xml(sourcelanguageNode, value[0])
98
99 - def get_rich_source(self):
100 #rsrc = xml_to_strelem(self.source_dom) 101 #logging.debug('rich source: %s' % (repr(rsrc))) 102 #from dubulib.debug.misc import print_stack_funcs 103 #print_stack_funcs() 104 return [xml_to_strelem(self.source_dom, getXMLspace(self.xmlelement, self._default_xml_space))]
105 rich_source = property(get_rich_source, set_rich_source) 106
107 - def set_rich_target(self, value, lang='xx', append=False):
108 if value is None: 109 self.set_target_dom(self.createlanguageNode(lang, u'', "target")) 110 return 111 112 languageNode = self.get_target_dom() 113 if languageNode is None: 114 languageNode = self.createlanguageNode(lang, u'', "target") 115 self.set_target_dom(languageNode, append) 116 117 # Clear languageNode first 118 for i in range(len(languageNode)): 119 del languageNode[0] 120 languageNode.text = None 121 122 strelem_to_xml(languageNode, value[0])
123
124 - def get_rich_target(self, lang=None):
125 """retrieves the "target" text (second entry), or the entry in the 126 specified language, if it exists""" 127 return [xml_to_strelem(self.get_target_dom(lang), getXMLspace(self.xmlelement, self._default_xml_space))]
128 rich_target = property(get_rich_target, set_rich_target) 129
130 - def addalttrans(self, txt, origin=None, lang=None, sourcetxt=None, matchquality=None):
131 """Adds an alt-trans tag and alt-trans components to the unit. 132 133 @type txt: String 134 @param txt: Alternative translation of the source text. 135 """ 136 137 #TODO: support adding a source tag ad match quality attribute. At 138 # the source tag is needed to inject fuzzy matches from a TM. 139 if isinstance(txt, str): 140 txt = txt.decode("utf-8") 141 alttrans = etree.SubElement(self.xmlelement, self.namespaced("alt-trans")) 142 lisa.setXMLspace(alttrans, "preserve") 143 if sourcetxt: 144 if isinstance(sourcetxt, str): 145 sourcetxt = sourcetxt.decode("utf-8") 146 altsource = etree.SubElement(alttrans, self.namespaced("source")) 147 altsource.text = sourcetxt 148 alttarget = etree.SubElement(alttrans, self.namespaced("target")) 149 alttarget.text = txt 150 if matchquality: 151 alttrans.set("match-quality", matchquality) 152 if origin: 153 alttrans.set("origin", origin) 154 if lang: 155 lisa.setXMLlang(alttrans, lang)
156
157 - def getalttrans(self, origin=None):
158 """Returns <alt-trans> for the given origin as a list of units. No 159 origin means all alternatives.""" 160 translist = [] 161 for node in self.xmlelement.iterdescendants(self.namespaced("alt-trans")): 162 if self.correctorigin(node, origin): 163 # We build some mini units that keep the xmlelement. This 164 # makes it easier to delete it if it is passed back to us. 165 newunit = base.TranslationUnit(self.source) 166 167 # the source tag is optional 168 sourcenode = node.iterdescendants(self.namespaced("source")) 169 try: 170 newunit.source = lisa.getText(sourcenode.next(), getXMLspace(node, self._default_xml_space)) 171 except StopIteration: 172 pass 173 174 # must have one or more targets 175 targetnode = node.iterdescendants(self.namespaced("target")) 176 newunit.target = lisa.getText(targetnode.next(), getXMLspace(node, self._default_xml_space)) 177 #TODO: support multiple targets better 178 #TODO: support notes in alt-trans 179 newunit.xmlelement = node 180 181 translist.append(newunit) 182 return translist
183
184 - def delalttrans(self, alternative):
185 """Removes the supplied alternative from the list of alt-trans tags""" 186 self.xmlelement.remove(alternative.xmlelement)
187
188 - def addnote(self, text, origin=None, position="append"):
189 """Add a note specifically in a "note" tag""" 190 if text: 191 text = text.strip() 192 if not text: 193 return 194 if isinstance(text, str): 195 text = text.decode("utf-8") 196 note = etree.SubElement(self.xmlelement, self.namespaced("note")) 197 note.text = text 198 if origin: 199 note.set("from", origin)
200
201 - def getnotelist(self, origin=None):
202 """Private method that returns the text from notes matching 'origin' or all notes.""" 203 notenodes = self.xmlelement.iterdescendants(self.namespaced("note")) 204 # TODO: consider using xpath to construct initial_list directly 205 # or to simply get the correct text from the outset (just remember to 206 # check for duplication. 207 initial_list = [lisa.getText(note, getXMLspace(self.xmlelement, self._default_xml_space)) for note in notenodes if self.correctorigin(note, origin)] 208 209 # Remove duplicate entries from list: 210 dictset = {} 211 notelist = [dictset.setdefault(note, note) for note in initial_list if note not in dictset] 212 213 return notelist
214
215 - def getnotes(self, origin=None):
216 return '\n'.join(self.getnotelist(origin=origin))
217
218 - def removenotes(self, origin="translator"):
219 """Remove all the translator notes.""" 220 notes = self.xmlelement.iterdescendants(self.namespaced("note")) 221 for note in notes: 222 if self.correctorigin(note, origin=origin): 223 self.xmlelement.remove(note)
224
225 - def adderror(self, errorname, errortext):
226 """Adds an error message to this unit.""" 227 #TODO: consider factoring out: some duplication between XLIFF and TMX 228 text = errorname + ': ' + errortext 229 self.addnote(text, origin="pofilter")
230
231 - def geterrors(self):
232 """Get all error messages.""" 233 #TODO: consider factoring out: some duplication between XLIFF and TMX 234 notelist = self.getnotelist(origin="pofilter") 235 errordict = {} 236 for note in notelist: 237 errorname, errortext = note.split(': ') 238 errordict[errorname] = errortext 239 return errordict
240
241 - def isapproved(self):
242 """States whether this unit is approved.""" 243 return self.xmlelement.get("approved") == "yes"
244
245 - def markapproved(self, value=True):
246 """Mark this unit as approved.""" 247 if value: 248 self.xmlelement.set("approved", "yes") 249 elif self.isapproved(): 250 self.xmlelement.set("approved", "no")
251
252 - def isreview(self):
253 """States whether this unit needs to be reviewed""" 254 targetnode = self.getlanguageNode(lang=None, index=1) 255 return not targetnode is None and \ 256 "needs-review" in targetnode.get("state", "")
257
258 - def markreviewneeded(self, needsreview=True, explanation=None):
259 """Marks the unit to indicate whether it needs review. Adds an optional explanation as a note.""" 260 targetnode = self.getlanguageNode(lang=None, index=1) 261 if not targetnode is None: 262 if needsreview: 263 targetnode.set("state", "needs-review-translation") 264 if explanation: 265 self.addnote(explanation, origin="translator") 266 else: 267 del targetnode.attrib["state"]
268
269 - def isfuzzy(self):
270 # targetnode = self.getlanguageNode(lang=None, index=1) 271 # return not targetnode is None and \ 272 # (targetnode.get("state-qualifier") == "fuzzy-match" or \ 273 # targetnode.get("state") == "needs-review-translation") 274 return not self.isapproved()
275
276 - def markfuzzy(self, value=True):
277 if value: 278 self.markapproved(False) 279 else: 280 self.markapproved(True) 281 targetnode = self.getlanguageNode(lang=None, index=1) 282 if not targetnode is None: 283 if value: 284 targetnode.set("state", "needs-review-translation") 285 else: 286 for attribute in ["state", "state-qualifier"]: 287 if attribute in targetnode.attrib: 288 del targetnode.attrib[attribute]
289
290 - def settarget(self, text, lang='xx', append=False):
291 """Sets the target string to the given value.""" 292 super(xliffunit, self).settarget(text, lang, append) 293 if text: 294 self.marktranslated()
295 296 # This code is commented while this will almost always return false. 297 # This way pocount, etc. works well. 298 # def istranslated(self): 299 # targetnode = self.getlanguageNode(lang=None, index=1) 300 # return not targetnode is None and \ 301 # (targetnode.get("state") == "translated") 302
303 - def istranslatable(self):
304 value = self.xmlelement.get("translate") 305 if value and value.lower() == 'no': 306 return False 307 return True
308
309 - def marktranslated(self):
310 targetnode = self.getlanguageNode(lang=None, index=1) 311 if targetnode is None: 312 return 313 if self.isfuzzy() and "state-qualifier" in targetnode.attrib: 314 #TODO: consider 315 del targetnode.attrib["state-qualifier"] 316 targetnode.set("state", "translated")
317
318 - def setid(self, id):
319 self.xmlelement.set("id", id)
320
321 - def getid(self):
322 return self.xmlelement.get("id") or ""
323
324 - def addlocation(self, location):
325 self.setid(location)
326
327 - def getlocations(self):
328 return [self.getid()]
329
330 - def createcontextgroup(self, name, contexts=None, purpose=None):
331 """Add the context group to the trans-unit with contexts a list with 332 (type, text) tuples describing each context.""" 333 assert contexts 334 group = etree.Element(self.namespaced("context-group")) 335 # context-group tags must appear at the start within <group> 336 # tags. Otherwise it must be appended to the end of a group 337 # of tags. 338 if self.xmlelement.tag == self.namespaced("group"): 339 self.xmlelement.insert(0, group) 340 else: 341 self.xmlelement.append(group) 342 group.set("name", name) 343 if purpose: 344 group.set("purpose", purpose) 345 for type, text in contexts: 346 if isinstance(text, str): 347 text = text.decode("utf-8") 348 context = etree.SubElement(group, self.namespaced("context")) 349 context.text = text 350 context.set("context-type", type)
351
352 - def getcontextgroups(self, name):
353 """Returns the contexts in the context groups with the specified name""" 354 groups = [] 355 grouptags = self.xmlelement.iterdescendants(self.namespaced("context-group")) 356 #TODO: conbine name in query 357 for group in grouptags: 358 if group.get("name") == name: 359 contexts = group.iterdescendants(self.namespaced("context")) 360 pairs = [] 361 for context in contexts: 362 pairs.append((context.get("context-type"), lisa.getText(context, getXMLspace(self.xmlelement, self._default_xml_space)))) 363 groups.append(pairs) #not extend 364 return groups
365
366 - def getrestype(self):
367 """returns the restype attribute in the trans-unit tag""" 368 return self.xmlelement.get("restype")
369
370 - def merge(self, otherunit, overwrite=False, comments=True, authoritative=False):
371 #TODO: consider other attributes like "approved" 372 super(xliffunit, self).merge(otherunit, overwrite, comments) 373 if self.target: 374 self.marktranslated() 375 if otherunit.isfuzzy(): 376 self.markfuzzy() 377 elif otherunit.source == self.source: 378 self.markfuzzy(False) 379 if comments: 380 self.addnote(otherunit.getnotes())
381
382 - def correctorigin(self, node, origin):
383 """Check against node tag's origin (e.g note or alt-trans)""" 384 if origin == None: 385 return True 386 elif origin in node.get("from", ""): 387 return True 388 elif origin in node.get("origin", ""): 389 return True 390 else: 391 return False
392
393 - def multistring_to_rich(self, mstr):
394 """Override L{TranslationUnit.multistring_to_rich} which is used by the 395 C{rich_source} and C{rich_target} properties.""" 396 strings = mstr 397 if isinstance(mstr, multistring): 398 strings = mstr.strings 399 elif isinstance(mstr, basestring): 400 strings = [mstr] 401 402 return [xml_to_strelem(s) for s in strings]
403 multistring_to_rich = classmethod(multistring_to_rich) 404
405 - def rich_to_multistring(self, elem_list):
406 """Override L{TranslationUnit.rich_to_multistring} which is used by the 407 C{rich_source} and C{rich_target} properties.""" 408 return multistring([unicode(elem) for elem in elem_list])
409 rich_to_multistring = classmethod(rich_to_multistring)
410 411
412 -class xlifffile(lisa.LISAfile):
413 """Class representing a XLIFF file store.""" 414 UnitClass = xliffunit 415 Name = _("XLIFF Translation File") 416 Mimetypes = ["application/x-xliff", "application/x-xliff+xml"] 417 Extensions = ["xlf", "xliff"] 418 rootNode = "xliff" 419 bodyNode = "body" 420 XMLskeleton = '''<?xml version="1.0" ?> 421 <xliff version='1.1' xmlns='urn:oasis:names:tc:xliff:document:1.1'> 422 <file original='NoName' source-language='en' datatype='plaintext'> 423 <body> 424 </body> 425 </file> 426 </xliff>''' 427 namespace = 'urn:oasis:names:tc:xliff:document:1.1' 428 suggestions_in_format = True 429 """xliff units have alttrans tags which can be used to store suggestions""" 430
431 - def __init__(self, *args, **kwargs):
432 self._filename = None 433 lisa.LISAfile.__init__(self, *args, **kwargs) 434 self._messagenum = 0
435
436 - def initbody(self):
437 self.namespace = self.document.getroot().nsmap.get(None, None) 438 439 if self._filename: 440 filenode = self.getfilenode(self._filename, createifmissing=True) 441 else: 442 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next() 443 self.body = self.getbodynode(filenode, createifmissing=True)
444
445 - def addheader(self):
446 """Initialise the file header.""" 447 pass
448
449 - def createfilenode(self, filename, sourcelanguage=None, targetlanguage=None, datatype='plaintext'):
450 """creates a filenode with the given filename. All parameters 451 are needed for XLIFF compliance.""" 452 if sourcelanguage is None: 453 sourcelanguage = self.sourcelanguage 454 if targetlanguage is None: 455 targetlanguage = self.targetlanguage 456 457 # find the default NoName file tag and use it instead of creating a new one 458 for filenode in self.document.getroot().iterchildren(self.namespaced("file")): 459 if filenode.get("original") == "NoName": 460 filenode.set("original", filename) 461 filenode.set("source-language", sourcelanguage) 462 if targetlanguage: 463 filenode.set("target-language", targetlanguage) 464 return filenode 465 466 filenode = etree.Element(self.namespaced("file")) 467 filenode.set("original", filename) 468 filenode.set("source-language", sourcelanguage) 469 if targetlanguage: 470 filenode.set("target-language", targetlanguage) 471 filenode.set("datatype", datatype) 472 bodyNode = etree.SubElement(filenode, self.namespaced(self.bodyNode)) 473 return filenode
474
475 - def getfilename(self, filenode):
476 """returns the name of the given file""" 477 return filenode.get("original")
478
479 - def setfilename(self, filenode, filename):
480 """set the name of the given file""" 481 return filenode.set("original", filename)
482
483 - def getfilenames(self):
484 """returns all filenames in this XLIFF file""" 485 filenodes = self.document.getroot().iterchildren(self.namespaced("file")) 486 filenames = [self.getfilename(filenode) for filenode in filenodes] 487 filenames = filter(None, filenames) 488 if len(filenames) == 1 and filenames[0] == '': 489 filenames = [] 490 return filenames
491
492 - def getfilenode(self, filename, createifmissing=False):
493 """finds the filenode with the given name""" 494 filenodes = self.document.getroot().iterchildren(self.namespaced("file")) 495 for filenode in filenodes: 496 if self.getfilename(filenode) == filename: 497 return filenode 498 if createifmissing: 499 filenode = self.createfilenode(filename) 500 return filenode 501 return None
502
503 - def setsourcelanguage(self, language):
504 if not language: 505 return 506 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next() 507 filenode.set("source-language", language)
508
509 - def getsourcelanguage(self):
510 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next() 511 return filenode.get("source-language")
512 sourcelanguage = property(getsourcelanguage, setsourcelanguage) 513
514 - def settargetlanguage(self, language):
515 if not language: 516 return 517 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next() 518 filenode.set("target-language", language)
519
520 - def gettargetlanguage(self):
521 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next() 522 return filenode.get("target-language")
523 targetlanguage = property(gettargetlanguage, settargetlanguage) 524
525 - def getdatatype(self, filename=None):
526 """Returns the datatype of the stored file. If no filename is given, 527 the datatype of the first file is given.""" 528 if filename: 529 node = self.getfilenode(filename) 530 if not node is None: 531 return node.get("datatype") 532 else: 533 filenames = self.getfilenames() 534 if len(filenames) > 0 and filenames[0] != "NoName": 535 return self.getdatatype(filenames[0]) 536 return ""
537
538 - def getdate(self, filename=None):
539 """Returns the date attribute for the file. If no filename is given, 540 the date of the first file is given. If the date attribute is not 541 specified, None is returned.""" 542 if filename: 543 node = self.getfilenode(filename) 544 if not node is None: 545 return node.get("date") 546 else: 547 filenames = self.getfilenames() 548 if len(filenames) > 0 and filenames[0] != "NoName": 549 return self.getdate(filenames[0]) 550 return None
551
552 - def removedefaultfile(self):
553 """We want to remove the default file-tag as soon as possible if we 554 know if still present and empty.""" 555 filenodes = list(self.document.getroot().iterchildren(self.namespaced("file"))) 556 if len(filenodes) > 1: 557 for filenode in filenodes: 558 if filenode.get("original") == "NoName" and \ 559 not list(filenode.iterdescendants(self.namespaced(self.UnitClass.rootNode))): 560 self.document.getroot().remove(filenode) 561 break
562
563 - def getheadernode(self, filenode, createifmissing=False):
564 """finds the header node for the given filenode""" 565 # TODO: Deprecated? 566 headernode = filenode.iterchildren(self.namespaced("header")) 567 try: 568 return headernode.next() 569 except StopIteration: 570 pass 571 if not createifmissing: 572 return None 573 headernode = etree.SubElement(filenode, self.namespaced("header")) 574 return headernode
575
576 - def getbodynode(self, filenode, createifmissing=False):
577 """finds the body node for the given filenode""" 578 bodynode = filenode.iterchildren(self.namespaced("body")) 579 try: 580 return bodynode.next() 581 except StopIteration: 582 pass 583 if not createifmissing: 584 return None 585 bodynode = etree.SubElement(filenode, self.namespaced("body")) 586 return bodynode
587
588 - def addsourceunit(self, source, filename="NoName", createifmissing=False):
589 """adds the given trans-unit to the last used body node if the 590 filename has changed it uses the slow method instead (will 591 create the nodes required if asked). Returns success""" 592 if self._filename != filename: 593 if not self.switchfile(filename, createifmissing): 594 return None 595 unit = super(xlifffile, self).addsourceunit(source) 596 self._messagenum += 1 597 unit.setid("%d" % self._messagenum) 598 return unit
599
600 - def switchfile(self, filename, createifmissing=False):
601 """adds the given trans-unit (will create the nodes required if asked). Returns success""" 602 self._filename = filename 603 filenode = self.getfilenode(filename) 604 if filenode is None: 605 if not createifmissing: 606 return False 607 filenode = self.createfilenode(filename) 608 self.document.getroot().append(filenode) 609 610 self.body = self.getbodynode(filenode, createifmissing=createifmissing) 611 if self.body is None: 612 return False 613 self._messagenum = len(list(self.body.iterdescendants(self.namespaced("trans-unit")))) 614 #TODO: was 0 based before - consider 615 # messagenum = len(self.units) 616 #TODO: we want to number them consecutively inside a body/file tag 617 #instead of globally in the whole XLIFF file, but using len(self.units) 618 #will be much faster 619 return True
620
621 - def creategroup(self, filename="NoName", createifmissing=False, restype=None):
622 """adds a group tag into the specified file""" 623 if self._filename != filename: 624 if not self.switchfile(filename, createifmissing): 625 return None 626 group = etree.SubElement(self.body, self.namespaced("group")) 627 if restype: 628 group.set("restype", restype) 629 return group
630
631 - def __str__(self):
632 self.removedefaultfile() 633 return super(xlifffile, self).__str__()
634
635 - def parsestring(cls, storestring):
636 """Parses the string to return the correct file object""" 637 xliff = super(xlifffile, cls).parsestring(storestring) 638 if xliff.units: 639 header = xliff.units[0] 640 if ("gettext-domain-header" in (header.getrestype() or "") \ 641 or xliff.getdatatype() == "po") \ 642 and cls.__name__.lower() != "poxlifffile": 643 import poxliff 644 xliff = poxliff.PoXliffFile.parsestring(storestring) 645 return xliff
646 parsestring = classmethod(parsestring)
647