Package translate :: Package storage :: Module fpo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.fpo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """Classes for the support of Gettext .po and .pot files. 
 22   
 23  This implementation assumes that cpo is working. This should not be used 
 24  directly, but can be used once cpo has been established to work.""" 
 25   
 26  #TODO: 
 27  # - handle headerless PO files better 
 28  # - previous msgid and msgctxt 
 29  # - accept only unicodes everywhere 
 30   
 31  from translate.misc.multistring import multistring 
 32  from translate.lang import data 
 33  from translate.storage import pocommon, base, cpo 
 34  from translate.storage.pocommon import encodingToUse 
 35  import re 
 36  import copy 
 37  import cStringIO 
 38   
 39  lsep = " " 
 40  """Seperator for #: entries""" 
 41   
 42  basic_header = r'''msgid "" 
 43  msgstr "" 
 44  "Content-Type: text/plain; charset=UTF-8\n" 
 45  "Content-Transfer-Encoding: 8bit\n" 
 46  ''' 
 47   
48 -class pounit(pocommon.pounit):
49 # othercomments = [] # # this is another comment 50 # automaticcomments = [] # #. comment extracted from the source code 51 # sourcecomments = [] # #: sourcefile.xxx:35 52 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 53 # prev_msgid = [] # 54 # prev_msgid_plural = [] # 55 # typecomments = [] # #, fuzzy 56 # msgidcomment = u"" # _: within msgid 57 # msgctxt 58 # msgid = [] 59 # msgstr = [] 60 61 # Our homegrown way to indicate what must be copied in a shallow 62 # fashion 63 __shallow__ = ['_store'] 64
65 - def __init__(self, source=None, encoding="UTF-8"):
66 pocommon.pounit.__init__(self, source) 67 self._encoding = encodingToUse(encoding) 68 self.obsolete = False 69 self._initallcomments(blankall=True) 70 self._msgctxt = u"" 71 72 self.target = u""
73
74 - def _initallcomments(self, blankall=False):
75 """Initialises allcomments""" 76 if blankall: 77 self.othercomments = [] 78 self.automaticcomments = [] 79 self.sourcecomments = [] 80 self.typecomments = [] 81 self.msgidcomment = u""
82
83 - def getsource(self):
84 return self._source
85
86 - def setsource(self, source):
87 self._rich_source = None 88 # assert isinstance(source, unicode) 89 source = data.forceunicode(source or u"") 90 source = source or u"" 91 if isinstance(source, multistring): 92 self._source = source 93 elif isinstance(source, unicode): 94 self._source = source 95 else: 96 #unicode, list, dict 97 self._source = multistring(source)
98 source = property(getsource, setsource) 99 100
101 - def gettarget(self):
102 """Returns the unescaped msgstr""" 103 return self._target
104
105 - def settarget(self, target):
106 """Sets the msgstr to the given (unescaped) value""" 107 self._rich_target = None 108 # assert isinstance(target, unicode) 109 # target = data.forceunicode(target) 110 if self.hasplural(): 111 if isinstance(target, multistring): 112 self._target = target 113 else: 114 #unicode, list, dict 115 self._target = multistring(target) 116 elif isinstance(target, (dict, list)): 117 if len(target) == 1: 118 self._target = target[0] 119 else: 120 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 121 else: 122 self._target = target
123 target = property(gettarget, settarget) 124
125 - def getnotes(self, origin=None):
126 """Return comments based on origin value (programmer, developer, source code and translator)""" 127 if origin == None: 128 comments = u"\n".join(self.othercomments) 129 comments += u"\n".join(self.automaticcomments) 130 elif origin == "translator": 131 comments = u"\n".join (self.othercomments) 132 elif origin in ["programmer", "developer", "source code"]: 133 comments = u"\n".join(self.automaticcomments) 134 else: 135 raise ValueError("Comment type not valid") 136 return comments
137
138 - def addnote(self, text, origin=None, position="append"):
139 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 140 # ignore empty strings and strings without non-space characters 141 if not (text and text.strip()): 142 return 143 text = data.forceunicode(text) 144 commentlist = self.othercomments 145 if origin in ["programmer", "developer", "source code"]: 146 autocomments = True 147 commentlist = self.automaticcomments 148 if text.endswith(u'\n'): 149 text = text[:-1] 150 text = text.split(u"\n") 151 if position == "append": 152 commentlist.extend(text) 153 else: 154 newcomments = text 155 newcomments.extend(commentlist) 156 if autocomments: 157 self.automaticcomments = newcomments 158 else: 159 self.othercomments = newcomments
160
161 - def removenotes(self):
162 """Remove all the translator's notes (other comments)""" 163 self.othercomments = []
164
165 - def __deepcopy__(self, memo={}):
166 # Make an instance to serve as the copy 167 new_unit = self.__class__() 168 # We'll be testing membership frequently, so make a set from 169 # self.__shallow__ 170 shallow = set(self.__shallow__) 171 # Make deep copies of all members which are not in shallow 172 for key, value in self.__dict__.iteritems(): 173 if key not in shallow: 174 setattr(new_unit, key, copy.deepcopy(value)) 175 # Make shallow copies of all members which are in shallow 176 for key in set(shallow): 177 setattr(new_unit, key, getattr(self, key)) 178 # Mark memo with ourself, so that we won't get deep copied 179 # again 180 memo[id(self)] = self 181 # Return our copied unit 182 return new_unit
183
184 - def copy(self):
185 return copy.deepcopy(self)
186
187 - def _msgidlen(self):
188 if self.hasplural(): 189 len("".join([string for string in self.source.strings])) 190 else: 191 return len(self.source)
192
193 - def _msgstrlen(self):
194 if self.hasplural(): 195 len("".join([string for string in self.target.strings])) 196 else: 197 return len(self.target)
198
199 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
200 """Merges the otherpo (with the same msgid) into this one. 201 202 Overwrite non-blank self.msgstr only if overwrite is True 203 merge comments only if comments is True 204 """ 205 206 def mergelists(list1, list2, split=False): 207 #Split if directed to do so: 208 if split: 209 splitlist1 = [] 210 splitlist2 = [] 211 for item in list1: 212 splitlist1.extend(item.split()) 213 for item in list2: 214 splitlist2.extend(item.split()) 215 list1.extend([item for item in splitlist2 if not item in splitlist1]) 216 else: 217 #Normal merge, but conform to list1 newline style 218 if list1 != list2: 219 for item in list2: 220 # avoid duplicate comment lines (this might cause some problems) 221 if item not in list1 or len(item) < 5: 222 list1.append(item)
223 224 if not isinstance(otherpo, pounit): 225 super(pounit, self).merge(otherpo, overwrite, comments) 226 return 227 if comments: 228 mergelists(self.othercomments, otherpo.othercomments) 229 mergelists(self.typecomments, otherpo.typecomments) 230 if not authoritative: 231 # We don't bring across otherpo.automaticcomments as we consider ourself 232 # to be the the authority. Same applies to otherpo.msgidcomments 233 mergelists(self.automaticcomments, otherpo.automaticcomments) 234 # mergelists(self.msgidcomments, otherpo.msgidcomments) #XXX? 235 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 236 if not self.istranslated() or overwrite: 237 # Remove kde-style comments from the translation (if any). XXX - remove 238 if pocommon.extract_msgid_comment(otherpo.target): 239 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '') 240 self.target = otherpo.target 241 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 242 self.markfuzzy() 243 else: 244 self.markfuzzy(otherpo.isfuzzy()) 245 elif not otherpo.istranslated(): 246 if self.source != otherpo.source: 247 self.markfuzzy() 248 else: 249 if self.target != otherpo.target: 250 self.markfuzzy()
251
252 - def isheader(self):
253 #TODO: fix up nicely 254 return not self.getid() and len(self.target) > 0
255
256 - def isblank(self):
257 if self.isheader() or self.msgidcomment: 258 return False 259 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and len(self._msgctxt) == 0: 260 return True 261 return False
262
263 - def hastypecomment(self, typecomment):
264 """Check whether the given type comment is present""" 265 # check for word boundaries properly by using a regular expression... 266 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
267
268 - def hasmarkedcomment(self, commentmarker):
269 """Check whether the given comment marker is present as # (commentmarker) ...""" 270 # raise DeprecationWarning 271 commentmarker = "(%s)" % commentmarker 272 for comment in self.othercomments: 273 if comment.startswith(commentmarker): 274 return True 275 return False
276
277 - def settypecomment(self, typecomment, present=True):
278 """Alters whether a given typecomment is present""" 279 if self.hastypecomment(typecomment) != present: 280 if present: 281 self.typecomments.append("#, %s\n" % typecomment) 282 else: 283 # this should handle word boundaries properly ... 284 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 285 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
286
287 - def istranslated(self):
288 return super(pounit, self).istranslated() and not self.isobsolete()
289
290 - def istranslatable(self):
291 return not (self.isheader() or self.isblank() or self.obsolete)
292
293 - def isfuzzy(self):
294 return self.hastypecomment("fuzzy")
295
296 - def markfuzzy(self, present=True):
297 self.settypecomment("fuzzy", present)
298
299 - def isobsolete(self):
300 return self.obsolete
301
302 - def makeobsolete(self):
303 """Makes this unit obsolete""" 304 self.obsolete = True 305 self.sourcecomments = [] 306 self.automaticcomments = []
307
308 - def resurrect(self):
309 """Makes an obsolete unit normal""" 310 self.obsolete = False
311
312 - def hasplural(self):
313 """returns whether this pounit contains plural strings...""" 314 source = self.source 315 return isinstance(source, multistring) and len(source.strings) > 1
316
317 - def parse(self, src):
318 raise DeprecationWarning("Should not be parsing with a unit") 319 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
320
321 - def __str__(self):
322 """convert to a string. double check that unicode is handled somehow here""" 323 _cpo_unit = cpo.pounit.buildfromunit(self) 324 return str(_cpo_unit)
325
326 - def getlocations(self):
327 """Get a list of locations from sourcecomments in the PO unit 328 329 rtype: List 330 return: A list of the locations with '#: ' stripped 331 332 """ 333 #TODO: rename to .locations 334 return self.sourcecomments
335
336 - def addlocation(self, location):
337 """Add a location to sourcecomments in the PO unit 338 339 @param location: Text location e.g. 'file.c:23' does not include #: 340 @type location: String 341 """ 342 self.sourcecomments.extend(location.split())
343
344 - def _extract_msgidcomments(self, text=None):
345 """Extract KDE style msgid comments from the unit. 346 347 @rtype: String 348 @return: Returns the extracted msgidcomments found in this unit's msgid. 349 """ 350 if text: 351 return pocommon.extract_msgid_comment(text) 352 else: 353 return self.msgidcomment
354
355 - def getcontext(self):
356 """Get the message context.""" 357 return self._msgctxt + self.msgidcomment
358
359 - def getid(self):
360 """Returns a unique identifier for this unit.""" 361 context = self.getcontext() 362 # Gettext does not consider the plural to determine duplicates, only 363 # the msgid. For generation of .mo files, we might want to use this 364 # code to generate the entry for the hash table, but for now, it is 365 # commented out for conformance to gettext. 366 # id = '\0'.join(self.source.strings) 367 id = self.source 368 if self.msgidcomment: 369 id = u"_: %s\n%s" % (context, id) 370 elif context: 371 id = u"%s\04%s" % (context, id) 372 return id
373
374 - def buildfromunit(cls, unit):
375 """Build a native unit from a foreign unit, preserving as much 376 information as possible.""" 377 if type(unit) == cls and hasattr(unit, "copy") and callable(unit.copy): 378 return unit.copy() 379 elif isinstance(unit, pocommon.pounit): 380 newunit = cls(unit.source) 381 newunit.target = unit.target 382 #context 383 newunit.msgidcomment = unit._extract_msgidcomments() 384 if not newunit.msgidcomment: 385 newunit._msgctxt = unit.getcontext() 386 387 locations = unit.getlocations() 388 if locations: 389 newunit.addlocations(locations) 390 notes = unit.getnotes("developer") 391 if notes: 392 newunit.addnote(notes, "developer") 393 notes = unit.getnotes("translator") 394 if notes: 395 newunit.addnote(notes, "translator") 396 if unit.isobsolete(): 397 newunit.makeobsolete() 398 newunit.markfuzzy(unit.isfuzzy()) 399 for tc in ['python-format', 'c-format', 'php-format']: 400 if unit.hastypecomment(tc): 401 newunit.settypecomment(tc) 402 break 403 return newunit 404 else: 405 return base.TranslationUnit.buildfromunit(unit)
406 buildfromunit = classmethod(buildfromunit) 407
408 -class pofile(pocommon.pofile):
409 """A .po file containing various units""" 410 UnitClass = pounit 411
412 - def changeencoding(self, newencoding):
413 """Deprecated: changes the encoding on the file.""" 414 # This should not be here but in poheader. It also shouldn't mangle the 415 # header itself, but use poheader methods. All users are removed, so 416 # we can deprecate after one release. 417 raise DeprecationWarning 418 419 self._encoding = encodingToUse(newencoding) 420 if not self.units: 421 return 422 header = self.header() 423 if not header or header.isblank(): 424 return 425 charsetline = None 426 headerstr = header.target 427 for line in headerstr.split("\n"): 428 if not ":" in line: 429 continue 430 key, value = line.strip().split(":", 1) 431 if key.strip() != "Content-Type": 432 continue 433 charsetline = line 434 if charsetline is None: 435 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding 436 else: 437 charset = re.search("charset=([^ ]*)", charsetline) 438 if charset is None: 439 newcharsetline = charsetline 440 if not newcharsetline.strip().endswith(";"): 441 newcharsetline += ";" 442 newcharsetline += " charset=%s" % self._encoding 443 else: 444 charset = charset.group(1) 445 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1) 446 headerstr = headerstr.replace(charsetline, newcharsetline, 1) 447 header.target = headerstr
448
449 - def _build_self_from_cpo(self):
450 """Builds up this store from the internal cpo store. 451 452 A user must ensure that self._cpo_store already exists, and that it is 453 deleted afterwards.""" 454 for unit in self._cpo_store.units: 455 self.addunit(self.UnitClass.buildfromunit(unit)) 456 self._encoding = self._cpo_store._encoding
457
458 - def _build_cpo_from_self(self):
459 """Builds the internal cpo store from the data in self. 460 461 A user must ensure that self._cpo_store does not exist, and should 462 delete it after using it.""" 463 self._cpo_store = cpo.pofile() 464 for unit in self.units: 465 if not unit.isblank(): 466 self._cpo_store.addunit(cpo.pofile.UnitClass.buildfromunit(unit)) 467 if not self._cpo_store.header(): 468 #only add a temporary header 469 self._cpo_store.makeheader(charset="utf-8", encoding="8bit")
470 471
472 - def parse(self, input):
473 """Parses the given file or file source string.""" 474 try: 475 if hasattr(input, 'name'): 476 self.filename = input.name 477 elif not getattr(self, 'filename', ''): 478 self.filename = '' 479 tmp_header_added = False 480 # if isinstance(input, str) and '"Content-Type: text/plain; charset=' not in input[:200]: 481 # input = basic_header + input 482 # tmp_header_added = True 483 self.units = [] 484 self._cpo_store = cpo.pofile(input) 485 self._build_self_from_cpo() 486 del self._cpo_store 487 if tmp_header_added: 488 self.units = self.units[1:] 489 except Exception, e: 490 raise base.ParseError(e)
491
492 - def removeduplicates(self, duplicatestyle="merge"):
493 """Make sure each msgid is unique ; merge comments etc from duplicates into original""" 494 # TODO: can we handle consecutive calls to removeduplicates()? What 495 # about files already containing msgctxt? - test 496 id_dict = {} 497 uniqueunits = [] 498 # TODO: this is using a list as the pos aren't hashable, but this is slow. 499 # probably not used frequently enough to worry about it, though. 500 markedpos = [] 501 def addcomment(thepo): 502 thepo.msgidcomment = " ".join(thepo.getlocations()) 503 markedpos.append(thepo)
504 for thepo in self.units: 505 id = thepo.getid() 506 if thepo.isheader() and not thepo.getlocations(): 507 # header msgids shouldn't be merged... 508 uniqueunits.append(thepo) 509 elif id in id_dict: 510 if duplicatestyle == "merge": 511 if id: 512 id_dict[id].merge(thepo) 513 else: 514 addcomment(thepo) 515 uniqueunits.append(thepo) 516 elif duplicatestyle == "msgctxt": 517 origpo = id_dict[id] 518 if origpo not in markedpos: 519 origpo._msgctxt += " ".join(origpo.getlocations()) 520 markedpos.append(thepo) 521 thepo._msgctxt += " ".join(thepo.getlocations()) 522 uniqueunits.append(thepo) 523 else: 524 if not id: 525 if duplicatestyle == "merge": 526 addcomment(thepo) 527 else: 528 thepo._msgctxt += u" ".join(thepo.getlocations()) 529 id_dict[id] = thepo 530 uniqueunits.append(thepo) 531 self.units = uniqueunits
532
533 - def __str__(self):
534 """Convert to a string. double check that unicode is handled somehow here""" 535 self._cpo_store = cpo.pofile(encoding=self._encoding) 536 self._build_cpo_from_self() 537 output = str(self._cpo_store) 538 del self._cpo_store 539 return output
540