1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """classes that hold units of .po files (pounit) or entire files (pofile)
22 gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)"""
23
24 from __future__ import generators
25 from translate.misc.multistring import multistring
26 from translate.misc import quote
27 from translate.misc import textwrap
28 from translate.lang import data
29 from translate.storage import pocommon, base
30 from translate.storage.pocommon import encodingToUse
31 import re
32 import copy
33 import cStringIO
34 import poparser
35
36 lsep = "\n#: "
37 """Seperator for #: entries"""
38
39
40
41 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'}
42 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()])
43
45 """Escapes a line for po format. assumes no \n occurs in the line.
46
47 @param line: unescaped text
48 """
49 special_locations = []
50 for special_key in po_escape_map:
51 special_locations.extend(quote.find_all(line, special_key))
52 special_locations = dict.fromkeys(special_locations).keys()
53 special_locations.sort()
54 escaped_line = ""
55 last_location = 0
56 for location in special_locations:
57 escaped_line += line[last_location:location]
58 escaped_line += po_escape_map[line[location:location+1]]
59 last_location = location+1
60 escaped_line += line[last_location:]
61 return escaped_line
62
66
68 """Wrap text for po files."""
69 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False)
70
71
72 if len(wrappedlines) > 1:
73 for index, line in enumerate(wrappedlines[1:]):
74 if line.startswith(' '):
75
76 wrappedlines[index+1] = line[1:]
77
78
79 wrappedlines[index] += ' '
80 return wrappedlines
81
83 """quotes the given text for a PO file, returning quoted and escaped lines"""
84 polines = []
85 if text is None:
86 return polines
87 lines = text.split("\n")
88 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71):
89 if len(lines) != 2 or lines[1]:
90 polines.extend(['""'])
91 for line in lines[:-1]:
92
93 lns = wrapline(line)
94 if len(lns) > 0:
95 for ln in lns[:-1]:
96 polines.extend(['"' + escapeforpo(ln) + '"'])
97 if lns[-1]:
98 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"'])
99 else:
100 polines.extend(['"\\n"'])
101 if lines[-1]:
102 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])])
103 return polines
104
106 """Remove quote and unescape line from po file.
107
108 @param line: a quoted line from a po file (msgid or msgstr)
109 """
110 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0]
111 return extracted
112
115
117 return lst == [] or len(lst) == 1 and lst[0] == '""'
118
120 left = string.find('"')
121 right = string.rfind('"')
122 if right > -1:
123 return string[left:right+1]
124 else:
125 return string[left:] + '"'
126
127 -class pounit(pocommon.pounit):
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142 __shallow__ = ['_store']
143
144 - def __init__(self, source=None, encoding="UTF-8"):
145 self._encoding = encodingToUse(encoding)
146 self.obsolete = False
147 self._initallcomments(blankall=True)
148 self.prev_msgctxt = []
149 self.prev_msgid = []
150 self.prev_msgid_plural = []
151 self.msgctxt = []
152 self.msgid = []
153 self.msgid_pluralcomments = []
154 self.msgid_plural = []
155 self.msgstr = []
156 self.obsoletemsgctxt = []
157 self.obsoletemsgid = []
158 self.obsoletemsgid_pluralcomments = []
159 self.obsoletemsgid_plural = []
160 self.obsoletemsgstr = []
161 pocommon.pounit.__init__(self, source)
162
172
180
181 allcomments = property(_get_all_comments)
182
191
209
213
215 """Sets the msgid to the given (unescaped) value.
216
217 @param source: an unescaped source string.
218 """
219 self.msgid, self.msgid_plural = self._set_source_vars(source)
220 source = property(getsource, setsource)
221
223 """Returns the unescaped msgid"""
224 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
225
227 """Sets the msgid to the given (unescaped) value.
228
229 @param source: an unescaped source string.
230 """
231 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
232 prev_source = property(_get_prev_source, _set_prev_source)
233
241
243 """Sets the msgstr to the given (unescaped) value"""
244 self._rich_target = None
245 if isinstance(target, str):
246 target = target.decode(self._encoding)
247 if self.hasplural():
248 if isinstance(target, multistring):
249 target = target.strings
250 elif isinstance(target, basestring):
251 target = [target]
252 elif isinstance(target, (dict, list)):
253 if len(target) == 1:
254 target = target[0]
255 else:
256 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
257 templates = self.msgstr
258 if isinstance(templates, list):
259 templates = {0: templates}
260 if isinstance(target, list):
261 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))])
262 elif isinstance(target, dict):
263 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()])
264 else:
265 self.msgstr = quoteforpo(target)
266 target = property(gettarget, settarget)
267
269 """Return comments based on origin value (programmer, developer, source code and translator)"""
270 if origin == None:
271 comments = u"".join([comment[2:] for comment in self.othercomments])
272 comments += u"".join([comment[3:] for comment in self.automaticcomments])
273 elif origin == "translator":
274 comments = u"".join ([comment[2:] for comment in self.othercomments])
275 elif origin in ["programmer", "developer", "source code"]:
276 comments = u"".join([comment[3:] for comment in self.automaticcomments])
277 else:
278 raise ValueError("Comment type not valid")
279
280 return comments[:-1]
281
282 - def addnote(self, text, origin=None, position="append"):
283 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote"""
284
285 if not (text and text.strip()):
286 return
287 text = data.forceunicode(text)
288 commentlist = self.othercomments
289 linestart = "# "
290 autocomments = False
291 if origin in ["programmer", "developer", "source code"]:
292 autocomments = True
293 commentlist = self.automaticcomments
294 linestart = "#. "
295 text = text.split("\n")
296 if position == "append":
297 commentlist += [linestart + line + "\n" for line in text]
298 else:
299 newcomments = [linestart + line + "\n" for line in text]
300 newcomments += [line for line in commentlist]
301 if autocomments:
302 self.automaticcomments = newcomments
303 else:
304 self.othercomments = newcomments
305
307 """Remove all the translator's notes (other comments)"""
308 self.othercomments = []
309
311
312 new_unit = self.__class__()
313
314
315 shallow = set(self.__shallow__)
316
317 for key, value in self.__dict__.iteritems():
318 if key not in shallow:
319 setattr(new_unit, key, copy.deepcopy(value))
320
321 for key in set(shallow):
322 setattr(new_unit, key, getattr(self, key))
323
324
325 memo[id(self)] = self
326
327 return new_unit
328
330 return copy.deepcopy(self)
331
337
344
345 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
346 """Merges the otherpo (with the same msgid) into this one.
347
348 Overwrite non-blank self.msgstr only if overwrite is True
349 merge comments only if comments is True
350 """
351
352 def mergelists(list1, list2, split=False):
353
354 if unicode in [type(item) for item in list2] + [type(item) for item in list1]:
355 for position, item in enumerate(list1):
356 if isinstance(item, str):
357 list1[position] = item.decode("utf-8")
358 for position, item in enumerate(list2):
359 if isinstance(item, str):
360 list2[position] = item.decode("utf-8")
361
362
363 lineend = ""
364 if list1 and list1[0]:
365 for candidate in ["\n", "\r", "\n\r"]:
366 if list1[0].endswith(candidate):
367 lineend = candidate
368 if not lineend:
369 lineend = ""
370 else:
371 lineend = "\n"
372
373
374 if split:
375 splitlist1 = []
376 splitlist2 = []
377 prefix = "#"
378 for item in list1:
379 splitlist1.extend(item.split()[1:])
380 prefix = item.split()[0]
381 for item in list2:
382 splitlist2.extend(item.split()[1:])
383 prefix = item.split()[0]
384 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1])
385 else:
386
387 if list1 != list2:
388 for item in list2:
389 if lineend:
390 item = item.rstrip() + lineend
391
392 if item not in list1 or len(item) < 5:
393 list1.append(item)
394 if not isinstance(otherpo, pounit):
395 super(pounit, self).merge(otherpo, overwrite, comments)
396 return
397 if comments:
398 mergelists(self.othercomments, otherpo.othercomments)
399 mergelists(self.typecomments, otherpo.typecomments)
400 if not authoritative:
401
402
403 mergelists(self.automaticcomments, otherpo.automaticcomments)
404 mergelists(self.msgidcomments, otherpo.msgidcomments)
405 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True)
406 if not self.istranslated() or overwrite:
407
408 if self._extract_msgidcomments(otherpo.target):
409 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments()+ '\n', '')
410 self.target = otherpo.target
411 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext():
412 self.markfuzzy()
413 else:
414 self.markfuzzy(otherpo.isfuzzy())
415 elif not otherpo.istranslated():
416 if self.source != otherpo.source:
417 self.markfuzzy()
418 else:
419 if self.target != otherpo.target:
420 self.markfuzzy()
421
423
424
425 return (is_null(self.msgid)
426 and not is_null(self.msgstr)
427 and self.msgidcomments == []
428 and is_null(self.msgctxt)
429 )
430
432 if self.isheader() or len(self.msgidcomments):
433 return False
434 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)):
435 return True
436 return False
437
438
439
440
445
453
463
466
469
472
474 """Makes this unit obsolete"""
475 self.obsolete = True
476 if self.msgctxt:
477 self.obsoletemsgctxt = self.msgctxt
478 if self.msgid:
479 self.obsoletemsgid = self.msgid
480 self.msgid = []
481 if self.msgidcomments:
482 self.obsoletemsgidcomments = self.msgidcomments
483 self.msgidcomments = []
484 if self.msgid_plural:
485 self.obsoletemsgid_plural = self.msgid_plural
486 self.msgid_plural = []
487 if self.msgstr:
488 self.obsoletemsgstr = self.msgstr
489 self.msgstr = []
490 self.sourcecomments = []
491 self.automaticcomments = []
492
494 """Makes an obsolete unit normal"""
495 self.obsolete = False
496 if self.obsoletemsgctxt:
497 self.msgid = self.obsoletemsgctxt
498 self.obsoletemsgctxt = []
499 if self.obsoletemsgid:
500 self.msgid = self.obsoletemsgid
501 self.obsoletemsgid = []
502 if self.obsoletemsgidcomments:
503 self.msgidcomments = self.obsoletemsgidcomments
504 self.obsoletemsgidcomments = []
505 if self.obsoletemsgid_plural:
506 self.msgid_plural = self.obsoletemsgid_plural
507 self.obsoletemsgid_plural = []
508 if self.obsoletemsgstr:
509 self.msgstr = self.obsoletemsgstr
510 self.obsoletemgstr = []
511
513 """returns whether this pounit contains plural strings..."""
514 return len(self.msgid_plural) > 0
515
518
520 if isinstance(partlines, dict):
521 partkeys = partlines.keys()
522 partkeys.sort()
523 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys])
524 partstr = partname + " "
525 partstartline = 0
526 if len(partlines) > 0 and len(partcomments) == 0:
527 partstr += partlines[0]
528 partstartline = 1
529 elif len(partcomments) > 0:
530 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0:
531
532 partstr += partlines[0] + '\n'
533
534 if len(partlines) > 1:
535 partstartline += 1
536 else:
537
538 partstr += '""\n'
539
540 if len(partcomments) > 1:
541 combinedcomment = []
542 for comment in partcomments:
543 comment = unquotefrompo([comment])
544 if comment.startswith("_:"):
545 comment = comment[len("_:"):]
546 if comment.endswith("\\n"):
547 comment = comment[:-len("\\n")]
548
549 combinedcomment.append(comment)
550 partcomments = quoteforpo("_:%s" % "".join(combinedcomment))
551
552 partstr += "\n".join(partcomments)
553 partstr = quote.rstripeol(partstr)
554 else:
555 partstr += '""'
556 partstr += '\n'
557
558 for partline in partlines[partstartline:]:
559 partstr += partline + '\n'
560 return partstr
561
563 """encodes unicode strings and returns other strings unchanged"""
564 if isinstance(output, unicode):
565 encoding = encodingToUse(getattr(self, "encoding", "UTF-8"))
566 return output.encode(encoding)
567 return output
568
570 """convert to a string. double check that unicode is handled somehow here"""
571 output = self._getoutput()
572 return self._encodeifneccessary(output)
573
575 """return this po element as a string"""
576 def add_prev_msgid_lines(lines, header, var):
577 if len(var) > 0:
578 lines.append("#| %s %s\n" % (header, var[0]))
579 lines.extend("#| %s\n" % line for line in var[1:])
580
581 def add_prev_msgid_info(lines):
582 add_prev_msgid_lines(lines, 'msgctxt', self.prev_msgctxt)
583 add_prev_msgid_lines(lines, 'msgid', self.prev_msgid)
584 add_prev_msgid_lines(lines, 'msgid_plural', self.prev_msgid_plural)
585
586 lines = []
587 lines.extend(self.othercomments)
588 if self.isobsolete():
589 lines.extend(self.typecomments)
590 obsoletelines = []
591 if self.obsoletemsgctxt:
592 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.obsoletemsgctxt))
593 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.obsoletemsgid, self.obsoletemsgidcomments))
594 if self.obsoletemsgid_plural or self.obsoletemsgid_pluralcomments:
595 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.obsoletemsgid_plural, self.obsoletemsgid_pluralcomments))
596 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.obsoletemsgstr))
597 for index, obsoleteline in enumerate(obsoletelines):
598
599 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "')
600 lines.extend(obsoletelines)
601 lines = [self._encodeifneccessary(line) for line in lines]
602 return "".join(lines)
603
604
605 if is_null(self.msgid):
606 if not (self.isheader() or self.getcontext() or self.sourcecomments):
607 return "".join(lines)
608 lines.extend(self.automaticcomments)
609 lines.extend(self.sourcecomments)
610 lines.extend(self.typecomments)
611 add_prev_msgid_info(lines)
612 if self.msgctxt:
613 lines.append(self._getmsgpartstr("msgctxt", self.msgctxt))
614 lines.append(self._getmsgpartstr("msgid", self.msgid, self.msgidcomments))
615 if self.msgid_plural or self.msgid_pluralcomments:
616 lines.append(self._getmsgpartstr("msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
617 lines.append(self._getmsgpartstr("msgstr", self.msgstr))
618 lines = [self._encodeifneccessary(line) for line in lines]
619 postr = "".join(lines)
620 return postr
621
623 """Get a list of locations from sourcecomments in the PO unit
624
625 rtype: List
626 return: A list of the locations with '#: ' stripped
627
628 """
629 locations = []
630 for sourcecomment in self.sourcecomments:
631 locations += quote.rstripeol(sourcecomment)[3:].split()
632 return locations
633
635 """Add a location to sourcecomments in the PO unit
636
637 @param location: Text location e.g. 'file.c:23' does not include #:
638 @type location: String
639
640 """
641 self.sourcecomments.append("#: %s\n" % location)
642
653
659
660 msgidcomment = property(_extract_msgidcomments, setmsgidcomment)
661
662 - def getcontext(self):
663 """Get the message context."""
664 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
665
667 """Returns a unique identifier for this unit."""
668 context = self.getcontext()
669
670
671
672
673
674 id = self.source
675 if self.msgidcomments:
676 id = u"_: %s\n%s" % (context, id)
677 elif context:
678 id = u"%s\04%s" % (context, id)
679 return id
680
681 -class pofile(pocommon.pofile):
682 """A .po file containing various units"""
683 UnitClass = pounit
684
686 """Deprecated: changes the encoding on the file."""
687
688
689
690 raise DeprecationWarning
691
692 self._encoding = encodingToUse(newencoding)
693 if not self.units:
694 return
695 header = self.header()
696 if not header or header.isblank():
697 return
698 charsetline = None
699 headerstr = unquotefrompo(header.msgstr)
700 for line in headerstr.split("\n"):
701 if not ":" in line:
702 continue
703 key, value = line.strip().split(":", 1)
704 if key.strip() != "Content-Type":
705 continue
706 charsetline = line
707 if charsetline is None:
708 headerstr += "Content-Type: text/plain; charset=%s" % self._encoding
709 else:
710 charset = re.search("charset=([^ ]*)", charsetline)
711 if charset is None:
712 newcharsetline = charsetline
713 if not newcharsetline.strip().endswith(";"):
714 newcharsetline += ";"
715 newcharsetline += " charset=%s" % self._encoding
716 else:
717 charset = charset.group(1)
718 newcharsetline = charsetline.replace("charset=%s" % charset, "charset=%s" % self._encoding, 1)
719 headerstr = headerstr.replace(charsetline, newcharsetline, 1)
720 header.msgstr = quoteforpo(headerstr)
721
723 """Parses the given file or file source string."""
724 try:
725 if hasattr(input, 'name'):
726 self.filename = input.name
727 elif not getattr(self, 'filename', ''):
728 self.filename = ''
729 if isinstance(input, str):
730 input = cStringIO.StringIO(input)
731
732 self.units = []
733 poparser.parse_units(poparser.ParseState(input, pounit), self)
734 except Exception, e:
735 raise base.ParseError(e)
736
738 """Make sure each msgid is unique ; merge comments etc from duplicates into original"""
739
740
741 id_dict = {}
742 uniqueunits = []
743
744
745 markedpos = []
746 def addcomment(thepo):
747 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations()))
748 markedpos.append(thepo)
749 for thepo in self.units:
750 id = thepo.getid()
751 if thepo.isheader() and not thepo.getlocations():
752
753 uniqueunits.append(thepo)
754 elif id in id_dict:
755 if duplicatestyle == "merge":
756 if id:
757 id_dict[id].merge(thepo)
758 else:
759 addcomment(thepo)
760 uniqueunits.append(thepo)
761 elif duplicatestyle == "msgctxt":
762 origpo = id_dict[id]
763 if origpo not in markedpos:
764 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations())))
765 markedpos.append(thepo)
766 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
767 uniqueunits.append(thepo)
768 else:
769 if not id:
770 if duplicatestyle == "merge":
771 addcomment(thepo)
772 else:
773 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
774 id_dict[id] = thepo
775 uniqueunits.append(thepo)
776 self.units = uniqueunits
777
779 """Convert to a string. double check that unicode is handled somehow here"""
780 output = self._getoutput()
781 if isinstance(output, unicode):
782 return output.encode(getattr(self, "encoding", "UTF-8"))
783 return output
784
786 """convert the units back to lines"""
787 lines = []
788 for unit in self.units:
789 unitsrc = str(unit) + "\n"
790 lines.append(unitsrc)
791 lines = "".join(self.encode(lines)).rstrip()
792
793 if lines:
794 lines += "\n"
795 return lines
796
798 """encode any unicode strings in lines in self._encoding"""
799 newlines = []
800 encoding = self._encoding
801 if encoding is None or encoding.lower() == "charset":
802 encoding = 'UTF-8'
803 for line in lines:
804 if isinstance(line, unicode):
805 line = line.encode(encoding)
806 newlines.append(line)
807 return newlines
808
810 """decode any non-unicode strings in lines with self._encoding"""
811 newlines = []
812 for line in lines:
813 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset":
814 try:
815 line = line.decode(self._encoding)
816 except UnicodeError, e:
817 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line))
818 newlines.append(line)
819 return newlines
820
822 for unit in self.units:
823 if not (unit.isheader() or unit.isobsolete()):
824 yield unit
825