Package translate :: Package storage :: Module pypo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.pypo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  """classes that hold units of .po files (pounit) or entire files (pofile) 
 22  gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)""" 
 23   
 24  from __future__ import generators 
 25  from translate.misc.multistring import multistring 
 26  from translate.misc import quote 
 27  from translate.misc import textwrap 
 28  from translate.lang import data 
 29  from translate.storage import pocommon, base 
 30  from translate.storage.pocommon import encodingToUse 
 31  import re 
 32  import copy 
 33  import cStringIO 
 34  import poparser 
 35   
 36  lsep = "\n#: " 
 37  """Seperator for #: entries""" 
 38   
 39  # general functions for quoting / unquoting po strings 
 40   
 41  po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'} 
 42  po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()]) 
 43   
44 -def escapeforpo(line):
45 """Escapes a line for po format. assumes no \n occurs in the line. 46 47 @param line: unescaped text 48 """ 49 special_locations = [] 50 for special_key in po_escape_map: 51 special_locations.extend(quote.find_all(line, special_key)) 52 special_locations = dict.fromkeys(special_locations).keys() 53 special_locations.sort() 54 escaped_line = "" 55 last_location = 0 56 for location in special_locations: 57 escaped_line += line[last_location:location] 58 escaped_line += po_escape_map[line[location:location+1]] 59 last_location = location+1 60 escaped_line += line[last_location:] 61 return escaped_line
62
63 -def unescapehandler(escape):
64 65 return po_unescape_map.get(escape, escape)
66
67 -def wrapline(line):
68 """Wrap text for po files.""" 69 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False) 70 71 # Lines should not start with a space... 72 if len(wrappedlines) > 1: 73 for index, line in enumerate(wrappedlines[1:]): 74 if line.startswith(' '): 75 # Remove the space at the beginning of the line: 76 wrappedlines[index+1] = line[1:] 77 78 # Append a space to the previous line: 79 wrappedlines[index] += ' ' 80 return wrappedlines
81
82 -def quoteforpo(text):
83 """quotes the given text for a PO file, returning quoted and escaped lines""" 84 polines = [] 85 if text is None: 86 return polines 87 lines = text.split("\n") 88 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71): 89 if len(lines) != 2 or lines[1]: 90 polines.extend(['""']) 91 for line in lines[:-1]: 92 #TODO: We should only wrap after escaping 93 lns = wrapline(line) 94 if len(lns) > 0: 95 for ln in lns[:-1]: 96 polines.extend(['"' + escapeforpo(ln) + '"']) 97 if lns[-1]: 98 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"']) 99 else: 100 polines.extend(['"\\n"']) 101 if lines[-1]: 102 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])]) 103 return polines
104
105 -def extractpoline(line):
106 """Remove quote and unescape line from po file. 107 108 @param line: a quoted line from a po file (msgid or msgstr) 109 """ 110 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0] 111 return extracted
112
113 -def unquotefrompo(postr):
114 return u"".join([extractpoline(line) for line in postr])
115
116 -def is_null(lst):
117 return lst == [] or len(lst) == 1 and lst[0] == '""'
118
119 -def extractstr(string):
120 left = string.find('"') 121 right = string.rfind('"') 122 if right > -1: 123 return string[left:right+1] 124 else: 125 return string[left:] + '"'
126
127 -class pounit(pocommon.pounit):
128 # othercomments = [] # # this is another comment 129 # automaticcomments = [] # #. comment extracted from the source code 130 # sourcecomments = [] # #: sourcefile.xxx:35 131 # prev_msgctxt = [] # #| The previous values that msgctxt and msgid held 132 # prev_msgid = [] # 133 # prev_msgid_plural = [] # 134 # typecomments = [] # #, fuzzy 135 # msgidcomments = [] # _: within msgid 136 # msgctxt 137 # msgid = [] 138 # msgstr = [] 139 140 # Our homegrown way to indicate what must be copied in a shallow 141 # fashion 142 __shallow__ = ['_store'] 143
144 - def __init__(self, source=None, encoding="UTF-8"):
145 self._encoding = encodingToUse(encoding) 146 self.obsolete = False 147 self._initallcomments(blankall=True) 148 self.prev_msgctxt = [] 149 self.prev_msgid = [] 150 self.prev_msgid_plural = [] 151 self.msgctxt = [] 152 self.msgid = [] 153 self.msgid_pluralcomments = [] 154 self.msgid_plural = [] 155 self.msgstr = [] 156 self.obsoletemsgctxt = [] 157 self.obsoletemsgid = [] 158 self.obsoletemsgid_pluralcomments = [] 159 self.obsoletemsgid_plural = [] 160 self.obsoletemsgstr = [] 161 pocommon.pounit.__init__(self, source)
162
163 - def _initallcomments(self, blankall=False):
164 """Initialises allcomments""" 165 if blankall: 166 self.othercomments = [] 167 self.automaticcomments = [] 168 self.sourcecomments = [] 169 self.typecomments = [] 170 self.msgidcomments = [] 171 self.obsoletemsgidcomments = []
172
173 - def _get_all_comments(self):
174 return [self.othercomments, 175 self.automaticcomments, 176 self.sourcecomments, 177 self.typecomments, 178 self.msgidcomments, 179 self.obsoletemsgidcomments]
180 181 allcomments = property(_get_all_comments) 182
183 - def _get_source_vars(self, msgid, msgid_plural):
184 multi = multistring(unquotefrompo(msgid), self._encoding) 185 if self.hasplural(): 186 pluralform = unquotefrompo(msgid_plural) 187 if isinstance(pluralform, str): 188 pluralform = pluralform.decode(self._encoding) 189 multi.strings.append(pluralform) 190 return multi
191
192 - def _set_source_vars(self, source):
193 msgid = None 194 msgid_plural = None 195 if isinstance(source, str): 196 source = source.decode(self._encoding) 197 if isinstance(source, multistring): 198 source = source.strings 199 if isinstance(source, list): 200 msgid = quoteforpo(source[0]) 201 if len(source) > 1: 202 msgid_plural = quoteforpo(source[1]) 203 else: 204 msgid_plural = [] 205 else: 206 msgid = quoteforpo(source) 207 msgid_plural = [] 208 return msgid, msgid_plural
209
210 - def getsource(self):
211 """Returns the unescaped msgid""" 212 return self._get_source_vars(self.msgid, self.msgid_plural)
213
214 - def setsource(self, source):
215 """Sets the msgid to the given (unescaped) value. 216 217 @param source: an unescaped source string. 218 """ 219 self.msgid, self.msgid_plural = self._set_source_vars(source)
220 source = property(getsource, setsource) 221
222 - def _get_prev_source(self):
223 """Returns the unescaped msgid""" 224 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
225
226 - def _set_prev_source(self, source):
227 """Sets the msgid to the given (unescaped) value. 228 229 @param source: an unescaped source string. 230 """ 231 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
232 prev_source = property(_get_prev_source, _set_prev_source) 233
234 - def gettarget(self):
235 """Returns the unescaped msgstr""" 236 if isinstance(self.msgstr, dict): 237 multi = multistring(map(unquotefrompo, self.msgstr.values()), self._encoding) 238 else: 239 multi = multistring(unquotefrompo(self.msgstr), self._encoding) 240 return multi
241
242 - def settarget(self, target):
243 """Sets the msgstr to the given (unescaped) value""" 244 self._rich_target = None 245 if isinstance(target, str): 246 target = target.decode(self._encoding) 247 if self.hasplural(): 248 if isinstance(target, multistring): 249 target = target.strings 250 elif isinstance(target, basestring): 251 target = [target] 252 elif isinstance(target, (dict, list)): 253 if len(target) == 1: 254 target = target[0] 255 else: 256 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target)) 257 templates = self.msgstr 258 if isinstance(templates, list): 259 templates = {0: templates} 260 if isinstance(target, list): 261 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))]) 262 elif isinstance(target, dict): 263 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()]) 264 else: 265 self.msgstr = quoteforpo(target)
266 target = property(gettarget, settarget) 267
268 - def getnotes(self, origin=None):
269 """Return comments based on origin value (programmer, developer, source code and translator)""" 270 if origin == None: 271 comments = u"".join([comment[2:] for comment in self.othercomments]) 272 comments += u"".join([comment[3:] for comment in self.automaticcomments]) 273 elif origin == "translator": 274 comments = u"".join ([comment[2:] for comment in self.othercomments]) 275 elif origin in ["programmer", "developer", "source code"]: 276 comments = u"".join([comment[3:] for comment in self.automaticcomments]) 277 else: 278 raise ValueError("Comment type not valid") 279 # Let's drop the last newline 280 return comments[:-1]
281
282 - def addnote(self, text, origin=None, position="append"):
283 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote""" 284 # ignore empty strings and strings without non-space characters 285 if not (text and text.strip()): 286 return 287 text = data.forceunicode(text) 288 commentlist = self.othercomments 289 linestart = "# " 290 autocomments = False 291 if origin in ["programmer", "developer", "source code"]: 292 autocomments = True 293 commentlist = self.automaticcomments 294 linestart = "#. " 295 text = text.split("\n") 296 if position == "append": 297 commentlist += [linestart + line + "\n" for line in text] 298 else: 299 newcomments = [linestart + line + "\n" for line in text] 300 newcomments += [line for line in commentlist] 301 if autocomments: 302 self.automaticcomments = newcomments 303 else: 304 self.othercomments = newcomments
305
306 - def removenotes(self):
307 """Remove all the translator's notes (other comments)""" 308 self.othercomments = []
309
310 - def __deepcopy__(self, memo={}):
311 # Make an instance to serve as the copy 312 new_unit = self.__class__() 313 # We'll be testing membership frequently, so make a set from 314 # self.__shallow__ 315 shallow = set(self.__shallow__) 316 # Make deep copies of all members which are not in shallow 317 for key, value in self.__dict__.iteritems(): 318 if key not in shallow: 319 setattr(new_unit, key, copy.deepcopy(value)) 320 # Make shallow copies of all members which are in shallow 321 for key in set(shallow): 322 setattr(new_unit, key, getattr(self, key)) 323 # Mark memo with ourself, so that we won't get deep copied 324 # again 325 memo[id(self)] = self 326 # Return our copied unit 327 return new_unit
328
329 - def copy(self):
330 return copy.deepcopy(self)
331
332 - def _msgidlen(self):
333 if self.hasplural(): 334 return len(unquotefrompo(self.msgid)) + len(unquotefrompo(self.msgid_plural)) 335 else: 336 return len(unquotefrompo(self.msgid))
337
338 - def _msgstrlen(self):
339 if isinstance(self.msgstr, dict): 340 combinedstr = "\n".join([unquotefrompo(msgstr) for msgstr in self.msgstr.itervalues()]) 341 return len(combinedstr) 342 else: 343 return len(unquotefrompo(self.msgstr))
344
345 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
346 """Merges the otherpo (with the same msgid) into this one. 347 348 Overwrite non-blank self.msgstr only if overwrite is True 349 merge comments only if comments is True 350 """ 351 352 def mergelists(list1, list2, split=False): 353 #decode where necessary 354 if unicode in [type(item) for item in list2] + [type(item) for item in list1]: 355 for position, item in enumerate(list1): 356 if isinstance(item, str): 357 list1[position] = item.decode("utf-8") 358 for position, item in enumerate(list2): 359 if isinstance(item, str): 360 list2[position] = item.decode("utf-8") 361 362 #Determine the newline style of list1 363 lineend = "" 364 if list1 and list1[0]: 365 for candidate in ["\n", "\r", "\n\r"]: 366 if list1[0].endswith(candidate): 367 lineend = candidate 368 if not lineend: 369 lineend = "" 370 else: 371 lineend = "\n" 372 373 #Split if directed to do so: 374 if split: 375 splitlist1 = [] 376 splitlist2 = [] 377 prefix = "#" 378 for item in list1: 379 splitlist1.extend(item.split()[1:]) 380 prefix = item.split()[0] 381 for item in list2: 382 splitlist2.extend(item.split()[1:]) 383 prefix = item.split()[0] 384 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1]) 385 else: 386 #Normal merge, but conform to list1 newline style 387 if list1 != list2: 388 for item in list2: 389 if lineend: 390 item = item.rstrip() + lineend 391 # avoid duplicate comment lines (this might cause some problems) 392 if item not in list1 or len(item) < 5: 393 list1.append(item)
394 if not isinstance(otherpo, pounit): 395 super(pounit, self).merge(otherpo, overwrite, comments) 396 return 397 if comments: 398 mergelists(self.othercomments, otherpo.othercomments) 399 mergelists(self.typecomments, otherpo.typecomments) 400 if not authoritative: 401 # We don't bring across otherpo.automaticcomments as we consider ourself 402 # to be the the authority. Same applies to otherpo.msgidcomments 403 mergelists(self.automaticcomments, otherpo.automaticcomments) 404 mergelists(self.msgidcomments, otherpo.msgidcomments) 405 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True) 406 if not self.istranslated() or overwrite: 407 # Remove kde-style comments from the translation (if any). 408 if self._extract_msgidcomments(otherpo.target): 409 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '') 410 self.target = otherpo.target 411 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext(): 412 self.markfuzzy() 413 else: 414 self.markfuzzy(otherpo.isfuzzy()) 415 elif not otherpo.istranslated(): 416 if self.source != otherpo.source: 417 self.markfuzzy() 418 else: 419 if self.target != otherpo.target: 420 self.markfuzzy()
421
422 - def isheader(self):
423 #return (self._msgidlen() == 0) and (self._msgstrlen() > 0) and (len(self.msgidcomments) == 0) 424 #rewritten here for performance: 425 return (is_null(self.msgid) 426 and not is_null(self.msgstr) 427 and self.msgidcomments == [] 428 and is_null(self.msgctxt) 429 )
430
431 - def isblank(self):
432 if self.isheader() or len(self.msgidcomments): 433 return False 434 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)): 435 return True 436 return False
437 # TODO: remove: 438 # Before, the equivalent of the following was the final return statement: 439 # return len(self.source.strip()) == 0 440
441 - def hastypecomment(self, typecomment):
442 """Check whether the given type comment is present""" 443 # check for word boundaries properly by using a regular expression... 444 return sum(map(lambda tcline: len(re.findall("\\b%s\\b" % typecomment, tcline)), self.typecomments)) != 0
445
446 - def hasmarkedcomment(self, commentmarker):
447 """Check whether the given comment marker is present as # (commentmarker) ...""" 448 commentmarker = "(%s)" % commentmarker 449 for comment in self.othercomments: 450 if comment.replace("#", "", 1).strip().startswith(commentmarker): 451 return True 452 return False
453
454 - def settypecomment(self, typecomment, present=True):
455 """Alters whether a given typecomment is present""" 456 if self.hastypecomment(typecomment) != present: 457 if present: 458 self.typecomments.append("#, %s\n" % typecomment) 459 else: 460 # this should handle word boundaries properly ... 461 typecomments = map(lambda tcline: re.sub("\\b%s\\b[ \t,]*" % typecomment, "", tcline), self.typecomments) 462 self.typecomments = filter(lambda tcline: tcline.strip() != "#,", typecomments)
463
464 - def isfuzzy(self):
465 return self.hastypecomment("fuzzy")
466
467 - def markfuzzy(self, present=True):
468 self.settypecomment("fuzzy", present)
469
470 - def isobsolete(self):
471 return self.obsolete
472
473 - def makeobsolete(self):
474 """Makes this unit obsolete""" 475 self.obsolete = True 476 if self.msgctxt: 477 self.obsoletemsgctxt = self.msgctxt 478 if self.msgid: 479 self.obsoletemsgid = self.msgid 480 self.msgid = [] 481 if self.msgidcomments: 482 self.obsoletemsgidcomments = self.msgidcomments 483 self.msgidcomments = [] 484 if self.msgid_plural: 485 self.obsoletemsgid_plural = self.msgid_plural 486 self.msgid_plural = [] 487 if self.msgstr: 488 self.obsoletemsgstr = self.msgstr 489 self.msgstr = [] 490 self.sourcecomments = [] 491 self.automaticcomments = []
492
493 - def resurrect(self):
494 """Makes an obsolete unit normal""" 495 self.obsolete = False 496 if self.obsoletemsgctxt: 497 self.msgid = self.obsoletemsgctxt 498 self.obsoletemsgctxt = [] 499 if self.obsoletemsgid: 500 self.msgid = self.obsoletemsgid 501 self.obsoletemsgid = [] 502 if self.obsoletemsgidcomments: 503 self.msgidcomments = self.obsoletemsgidcomments 504 self.obsoletemsgidcomments = [] 505 if self.obsoletemsgid_plural: 506 self.msgid_plural = self.obsoletemsgid_plural 507 self.obsoletemsgid_plural = [] 508 if self.obsoletemsgstr: 509 self.msgstr = self.obsoletemsgstr 510 self.obsoletemgstr = []
511
512 - def hasplural(self):
513 """returns whether this pounit contains plural strings...""" 514 return len(self.msgid_plural) > 0
515
516 - def parse(self, src):
517 return poparser.parse_unit(poparser.ParseState(cStringIO.StringIO(src), pounit), self)
518
519 - def _getmsgpartstr(self, partname, partlines, partcomments=""):
520 if isinstance(partlines, dict): 521 partkeys = partlines.keys() 522 partkeys.sort() 523 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys]) 524 partstr = partname + " " 525 partstartline = 0 526 if len(partlines) > 0 and len(partcomments) == 0: 527 partstr += partlines[0] 528 partstartline = 1 529 elif len(partcomments) > 0: 530 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0: 531 # if there is a blank leader line, it must come before the comment 532 partstr += partlines[0] + '\n' 533 # but if the whole string is blank, leave it in 534 if len(partlines) > 1: 535 partstartline += 1 536 else: 537 # All partcomments should start on a newline 538 partstr += '""\n' 539 # combine comments into one if more than one 540 if len(partcomments) > 1: 541 combinedcomment = [] 542 for comment in partcomments: 543 comment = unquotefrompo([comment]) 544 if comment.startswith("_:"): 545 comment = comment[len("_:"):] 546 if comment.endswith("\\n"): 547 comment = comment[:-len("\\n")] 548 #Before we used to strip. Necessary in some cases? 549 combinedcomment.append(comment) 550 partcomments = quoteforpo("_:%s" % "".join(combinedcomment)) 551 # comments first, no blank leader line needed 552 partstr += "\n".join(partcomments) 553 partstr = quote.rstripeol(partstr) 554 else: 555 partstr += '""' 556 partstr += '\n' 557 # add the rest 558 for partline in partlines[partstartline:]: 559 partstr += partline + '\n' 560 return partstr
561
562 - def _encodeifneccessary(self, output):
563 """encodes unicode strings and returns other strings unchanged""" 564 if isinstance(output, unicode): 565 encoding = encodingToUse(getattr(self, "encoding", "UTF-8")) 566 return output.encode(encoding) 567 return output
568
569 - def __str__(self):
570 """convert to a string. double check that unicode is handled somehow here""" 571 output = self._getoutput() 572 return self._encodeifneccessary(output)
573
574 - def _getoutput(self):
575 """return this po element as a string""" 576 def add_prev_msgid_lines(lines, header, var): 577 if len(var) > 0: 578 lines.append("#| %s %s\n" % (header, var[0])) 579 lines.extend("#| %s\n" % line for line in var[1:])
580 581 def add_prev_msgid_info(lines): 582 add_prev_msgid_lines(lines, 'msgctxt', self.prev_msgctxt) 583 add_prev_msgid_lines(lines, 'msgid', self.prev_msgid) 584 add_prev_msgid_lines(lines, 'msgid_plural', self.prev_msgid_plural) 585 586 lines = [] 587 lines.extend(self.othercomments) 588 if self.isobsolete(): 589 lines.extend(self.typecomments) 590 obsoletelines = [] 591 if self.obsoletemsgctxt: 592 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt)) 593 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments)) 594 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments: 595 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments)) 596 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr)) 597 for index, obsoleteline in enumerate(obsoletelines): 598 # We need to account for a multiline msgid or msgstr here 599 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "') 600 lines.extend(obsoletelines) 601 lines = [self._encodeifneccessary(line) for line in lines] 602 return "".join(lines) 603 # if there's no msgid don't do msgid and string, unless we're the header 604 # this will also discard any comments other than plain othercomments... 605 if is_null(self.msgid): 606 if not (self.isheader() or self.getcontext() or self.sourcecomments): 607 return "".join(lines) 608 lines.extend(self.automaticcomments) 609 lines.extend(self.sourcecomments) 610 lines.extend(self.typecomments) 611 add_prev_msgid_info(lines) 612 if self.msgctxt: 613 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt)) 614 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments)) 615 if self.msgid_plural or self.msgid_pluralcomments: 616 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments)) 617 lines.append(self._getmsgpartstr("msgstr", self.msgstr)) 618 lines = [self._encodeifneccessary(line) for line in lines] 619 postr = "".join(lines) 620 return postr 621
622 - def getlocations(self):
623 """Get a list of locations from sourcecomments in the PO unit 624 625 rtype: List 626 return: A list of the locations with '#: ' stripped 627 628 """ 629 locations = [] 630 for sourcecomment in self.sourcecomments: 631 locations += quote.rstripeol(sourcecomment)[3:].split() 632 return locations
633
634 - def addlocation(self, location):
635 """Add a location to sourcecomments in the PO unit 636 637 @param location: Text location e.g. 'file.c:23' does not include #: 638 @type location: String 639 640 """ 641 self.sourcecomments.append("#: %s\n" % location)
642
643 - def _extract_msgidcomments(self, text=None):
644 """Extract KDE style msgid comments from the unit. 645 646 @rtype: String 647 @return: Returns the extracted msgidcomments found in this unit's msgid. 648 """ 649 650 if not text: 651 text = unquotefrompo(self.msgidcomments) 652 return text.split('\n')[0].replace('_: ', '', 1)
653
654 - def setmsgidcomment(self, msgidcomment):
655 if msgidcomment: 656 self.msgidcomments = ['"_: %s\\n"' % msgidcomment] 657 else: 658 self.msgidcomments = []
659 660 msgidcomment = property(_extract_msgidcomments, setmsgidcomment) 661
662 - def getcontext(self):
663 """Get the message context.""" 664 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
665
666 - def getid(self):
667 """Returns a unique identifier for this unit.""" 668 context = self.getcontext() 669 # Gettext does not consider the plural to determine duplicates, only 670 # the msgid. For generation of .mo files, we might want to use this 671 # code to generate the entry for the hash table, but for now, it is 672 # commented out for conformance to gettext. 673 # id = '\0'.join(self.source.strings) 674 id = self.source 675 if self.msgidcomments: 676 id = u"_: %s\n%s" % (context, id) 677 elif context: 678 id = u"%s\04%s" % (context, id) 679 return id
680
681 -class pofile(pocommon.pofile):
682 """A .po file containing various units""" 683 UnitClass = pounit 684
685 - def changeencoding(self, newencoding):
686 """Deprecated: changes the encoding on the file.""" 687 # This should not be here but in poheader. It also shouldn't mangle the 688 # header itself, but use poheader methods. All users are removed, so 689 # we can deprecate after one release. 690 raise DeprecationWarning 691 692 self._encoding = encodingToUse(newencoding) 693 if not self.units: 694 return 695 header = self.header() 696 if not header or header.isblank(): 697 return 698 charsetline = None 699 headerstr = unquotefrompo(header.msgstr) 700 for line in headerstr.split("\n"): 701 if not ":" in line: 702 continue 703 key, value = line.strip().split(":", 1) 704 if key.strip() != "Content-Type": 705 continue 706 charsetline = line 707 if charsetline is None: 708 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding 709 else: 710 charset = re.search("charset=([^ ]*)", charsetline) 711 if charset is None: 712 newcharsetline = charsetline 713 if not newcharsetline.strip().endswith(";"): 714 newcharsetline += ";" 715 newcharsetline += " charset=%s" % self._encoding 716 else: 717 charset = charset.group(1) 718 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1) 719 headerstr = headerstr.replace(charsetline, newcharsetline, 1) 720 header.msgstr = quoteforpo(headerstr)
721
722 - def parse(self, input):
723 """Parses the given file or file source string.""" 724 try: 725 if hasattr(input, 'name'): 726 self.filename = input.name 727 elif not getattr(self, 'filename', ''): 728 self.filename = '' 729 if isinstance(input, str): 730 input = cStringIO.StringIO(input) 731 # clear units to get rid of automatically generated headers before parsing 732 self.units = [] 733 poparser.parse_units(poparser.ParseState(input, pounit), self) 734 except Exception, e: 735 raise base.ParseError(e)
736
737 - def removeduplicates(self, duplicatestyle="merge"):
738 """Make sure each msgid is unique ; merge comments etc from duplicates into original""" 739 # TODO: can we handle consecutive calls to removeduplicates()? What 740 # about files already containing msgctxt? - test 741 id_dict = {} 742 uniqueunits = [] 743 # TODO: this is using a list as the pos aren't hashable, but this is slow. 744 # probably not used frequently enough to worry about it, though. 745 markedpos = [] 746 def addcomment(thepo): 747 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations())) 748 markedpos.append(thepo)
749 for thepo in self.units: 750 id = thepo.getid() 751 if thepo.isheader() and not thepo.getlocations(): 752 # header msgids shouldn't be merged... 753 uniqueunits.append(thepo) 754 elif id in id_dict: 755 if duplicatestyle == "merge": 756 if id: 757 id_dict[id].merge(thepo) 758 else: 759 addcomment(thepo) 760 uniqueunits.append(thepo) 761 elif duplicatestyle == "msgctxt": 762 origpo = id_dict[id] 763 if origpo not in markedpos: 764 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations()))) 765 markedpos.append(thepo) 766 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 767 uniqueunits.append(thepo) 768 else: 769 if not id: 770 if duplicatestyle == "merge": 771 addcomment(thepo) 772 else: 773 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations()))) 774 id_dict[id] = thepo 775 uniqueunits.append(thepo) 776 self.units = uniqueunits
777
778 - def __str__(self):
779 """Convert to a string. double check that unicode is handled somehow here""" 780 output = self._getoutput() 781 if isinstance(output, unicode): 782 return output.encode(getattr(self, "encoding", "UTF-8")) 783 return output
784
785 - def _getoutput(self):
786 """convert the units back to lines""" 787 lines = [] 788 for unit in self.units: 789 unitsrc = str(unit) + "\n" 790 lines.append(unitsrc) 791 lines = "".join(self.encode(lines)).rstrip() 792 #After the last pounit we will have \n\n and we only want to end in \n: 793 if lines: 794 lines += "\n" 795 return lines
796
797 - def encode(self, lines):
798 """encode any unicode strings in lines in self._encoding""" 799 newlines = [] 800 encoding = self._encoding 801 if encoding is None or encoding.lower() == "charset": 802 encoding = 'UTF-8' 803 for line in lines: 804 if isinstance(line, unicode): 805 line = line.encode(encoding) 806 newlines.append(line) 807 return newlines
808
809 - def decode(self, lines):
810 """decode any non-unicode strings in lines with self._encoding""" 811 newlines = [] 812 for line in lines: 813 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset": 814 try: 815 line = line.decode(self._encoding) 816 except UnicodeError, e: 817 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line)) 818 newlines.append(line) 819 return newlines
820
821 - def unit_iter(self):
822 for unit in self.units: 823 if not (unit.isheader() or unit.isobsolete()): 824 yield unit
825