1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """classes that hold units of .po files (pounit) or entire files (pofile)
22 gettext-style .po (or .pot) files are used in translations for KDE et al (see kbabel)"""
23
24 from __future__ import generators
25 import copy
26 import cStringIO
27 import re
28
29 from translate.lang import data
30 from translate.misc.multistring import multistring
31 from translate.misc import quote
32 from translate.misc import textwrap
33 from translate.storage import pocommon, base, poparser
34 from translate.storage.pocommon import encodingToUse
35
36 lsep = "\n#: "
37 """Seperator for #: entries"""
38
39
40
41 po_unescape_map = {"\\r": "\r", "\\t": "\t", '\\"': '"', '\\n': '\n', '\\\\': '\\'}
42 po_escape_map = dict([(value, key) for (key, value) in po_unescape_map.items()])
43
44
46 """Escapes a line for po format. assumes no \n occurs in the line.
47
48 @param line: unescaped text
49 """
50 special_locations = []
51 for special_key in po_escape_map:
52 special_locations.extend(quote.find_all(line, special_key))
53 special_locations = dict.fromkeys(special_locations).keys()
54 special_locations.sort()
55 escaped_line = ""
56 last_location = 0
57 for location in special_locations:
58 escaped_line += line[last_location:location]
59 escaped_line += po_escape_map[line[location:location+1]]
60 last_location = location + 1
61 escaped_line += line[last_location:]
62 return escaped_line
63
64
67
68
70 """Wrap text for po files."""
71 wrappedlines = textwrap.wrap(line, 76, replace_whitespace=False, expand_tabs=False, drop_whitespace=False)
72
73
74 if len(wrappedlines) > 1:
75 for index, line in enumerate(wrappedlines[1:]):
76 if line.startswith(' '):
77
78 wrappedlines[index+1] = line[1:]
79
80
81 wrappedlines[index] += ' '
82 return wrappedlines
83
84
86 """quotes the given text for a PO file, returning quoted and escaped lines"""
87 polines = []
88 if text is None:
89 return polines
90 lines = text.split("\n")
91 if len(lines) > 1 or (len(lines) == 1 and len(lines[0]) > 71):
92 if len(lines) != 2 or lines[1]:
93 polines.extend(['""'])
94 for line in lines[:-1]:
95
96 lns = wrapline(line)
97 if len(lns) > 0:
98 for ln in lns[:-1]:
99 polines.extend(['"' + escapeforpo(ln) + '"'])
100 if lns[-1]:
101 polines.extend(['"' + escapeforpo(lns[-1]) + '\\n"'])
102 else:
103 polines.extend(['"\\n"'])
104 if lines[-1]:
105 polines.extend(['"' + escapeforpo(line) + '"' for line in wrapline(lines[-1])])
106 return polines
107
108
110 """Remove quote and unescape line from po file.
111
112 @param line: a quoted line from a po file (msgid or msgstr)
113 """
114 extracted = quote.extractwithoutquotes(line, '"', '"', '\\', includeescapes=unescapehandler)[0]
115 return extracted
116
117
120
121
123 return lst == [] or len(lst) == 1 and lst[0] == '""'
124
125
127 left = string.find('"')
128 right = string.rfind('"')
129 if right > -1:
130 return string[left:right+1]
131 else:
132 return string[left:] + '"'
133
134
135 -class pounit(pocommon.pounit):
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150 __shallow__ = ['_store']
151
152 - def __init__(self, source=None, encoding="UTF-8"):
165
174
182
183 allcomments = property(_get_all_comments)
184
193
211
215
223 source = property(getsource, setsource)
224
226 """Returns the unescaped msgid"""
227 return self._get_source_vars(self.prev_msgid, self.prev_msgid_plural)
228
230 """Sets the msgid to the given (unescaped) value.
231
232 @param source: an unescaped source string.
233 """
234 self.prev_msgid, self.prev_msgid_plural = self._set_source_vars(source)
235 prev_source = property(_get_prev_source, _set_prev_source)
236
244
246 """Sets the msgstr to the given (unescaped) value"""
247 self._rich_target = None
248 if isinstance(target, str):
249 target = target.decode(self._encoding)
250 if self.hasplural():
251 if isinstance(target, multistring):
252 target = target.strings
253 elif isinstance(target, basestring):
254 target = [target]
255 elif isinstance(target, (dict, list)):
256 if len(target) == 1:
257 target = target[0]
258 else:
259 raise ValueError("po msgid element has no plural but msgstr has %d elements (%s)" % (len(target), target))
260 templates = self.msgstr
261 if isinstance(templates, list):
262 templates = {0: templates}
263 if isinstance(target, list):
264 self.msgstr = dict([(i, quoteforpo(target[i])) for i in range(len(target))])
265 elif isinstance(target, dict):
266 self.msgstr = dict([(i, quoteforpo(targetstring)) for i, targetstring in target.iteritems()])
267 else:
268 self.msgstr = quoteforpo(target)
269 target = property(gettarget, settarget)
270
272 """Return a list of alternate units.
273
274 Previous msgid and current msgstr is combined to form a single
275 alternative unit."""
276 prev_source = self.prev_source
277 if prev_source and self.isfuzzy():
278 unit = type(self)(prev_source)
279 unit.target = self.target
280
281
282
283 unit.xmlelement = dict()
284 return [unit]
285 return []
286
288 """Return comments based on origin value (programmer, developer, source code and translator)"""
289 if origin == None:
290 comments = u"".join([comment[2:] for comment in self.othercomments])
291 comments += u"".join([comment[3:] for comment in self.automaticcomments])
292 elif origin == "translator":
293 comments = u"".join([comment[2:] for comment in self.othercomments])
294 elif origin in ["programmer", "developer", "source code"]:
295 comments = u"".join([comment[3:] for comment in self.automaticcomments])
296 else:
297 raise ValueError("Comment type not valid")
298
299 return comments[:-1]
300
301 - def addnote(self, text, origin=None, position="append"):
302 """This is modeled on the XLIFF method. See xliff.py::xliffunit.addnote"""
303
304 if not (text and text.strip()):
305 return
306 text = data.forceunicode(text)
307 commentlist = self.othercomments
308 linestart = "# "
309 autocomments = False
310 if origin in ["programmer", "developer", "source code"]:
311 autocomments = True
312 commentlist = self.automaticcomments
313 linestart = "#. "
314 text = text.split("\n")
315 newcomments = [linestart + line + "\n" for line in text]
316 if position == "append":
317 newcomments = commentlist + newcomments
318 elif position == "prepend":
319 newcomments = newcomments + commentlist
320
321 if autocomments:
322 self.automaticcomments = newcomments
323 else:
324 self.othercomments = newcomments
325
327 """Remove all the translator's notes (other comments)"""
328 self.othercomments = []
329
331
332 new_unit = self.__class__()
333
334
335 shallow = set(self.__shallow__)
336
337 for key, value in self.__dict__.iteritems():
338 if key not in shallow:
339 setattr(new_unit, key, copy.deepcopy(value))
340
341 for key in set(shallow):
342 setattr(new_unit, key, getattr(self, key))
343
344
345 memo[id(self)] = self
346
347 return new_unit
348
350 return copy.deepcopy(self)
351
357
364
365 - def merge(self, otherpo, overwrite=False, comments=True, authoritative=False):
366 """Merges the otherpo (with the same msgid) into this one.
367
368 Overwrite non-blank self.msgstr only if overwrite is True
369 merge comments only if comments is True
370 """
371
372 def mergelists(list1, list2, split=False):
373
374 if unicode in [type(item) for item in list2] + [type(item) for item in list1]:
375 for position, item in enumerate(list1):
376 if isinstance(item, str):
377 list1[position] = item.decode("utf-8")
378 for position, item in enumerate(list2):
379 if isinstance(item, str):
380 list2[position] = item.decode("utf-8")
381
382
383 lineend = ""
384 if list1 and list1[0]:
385 for candidate in ["\n", "\r", "\n\r"]:
386 if list1[0].endswith(candidate):
387 lineend = candidate
388 if not lineend:
389 lineend = ""
390 else:
391 lineend = "\n"
392
393
394 if split:
395 splitlist1 = []
396 splitlist2 = []
397 prefix = "#"
398 for item in list1:
399 splitlist1.extend(item.split()[1:])
400 prefix = item.split()[0]
401 for item in list2:
402 splitlist2.extend(item.split()[1:])
403 prefix = item.split()[0]
404 list1.extend(["%s %s%s" % (prefix, item, lineend) for item in splitlist2 if not item in splitlist1])
405 else:
406
407 if list1 != list2:
408 for item in list2:
409 if lineend:
410 item = item.rstrip() + lineend
411
412 if item not in list1 or len(item) < 5:
413 list1.append(item)
414 if not isinstance(otherpo, pounit):
415 super(pounit, self).merge(otherpo, overwrite, comments)
416 return
417 if comments:
418 mergelists(self.othercomments, otherpo.othercomments)
419 mergelists(self.typecomments, otherpo.typecomments)
420 if not authoritative:
421
422
423 mergelists(self.automaticcomments, otherpo.automaticcomments)
424 mergelists(self.msgidcomments, otherpo.msgidcomments)
425 mergelists(self.sourcecomments, otherpo.sourcecomments, split=True)
426 if not self.istranslated() or overwrite:
427
428 if self._extract_msgidcomments(otherpo.target):
429 otherpo.target = otherpo.target.replace('_: ' + otherpo._extract_msgidcomments() + '\n', '')
430 self.target = otherpo.target
431 if self.source != otherpo.source or self.getcontext() != otherpo.getcontext():
432 self.markfuzzy()
433 else:
434 self.markfuzzy(otherpo.isfuzzy())
435 elif not otherpo.istranslated():
436 if self.source != otherpo.source:
437 self.markfuzzy()
438 else:
439 if self.target != otherpo.target:
440 self.markfuzzy()
441
443
444
445 return (is_null(self.msgid)
446 and not is_null(self.msgstr)
447 and self.msgidcomments == []
448 and is_null(self.msgctxt))
449
451 if self.isheader() or len(self.msgidcomments):
452 return False
453 if (self._msgidlen() == 0) and (self._msgstrlen() == 0) and (is_null(self.msgctxt)):
454 return True
455 return False
456
457
458
459
464
472
486
489
497
500
506
509
511 """Makes this unit obsolete"""
512 super(pounit, self).makeobsolete()
513 self.obsolete = True
514 self.sourcecomments = []
515 self.automaticcomments = []
516
518 """Makes an obsolete unit normal"""
519 super(pounit, self).resurrect()
520 self.obsolete = False
521
523 """returns whether this pounit contains plural strings..."""
524 return len(self.msgid_plural) > 0
525
528
530 if isinstance(partlines, dict):
531 partkeys = partlines.keys()
532 partkeys.sort()
533 return "".join([self._getmsgpartstr("%s[%d]" % (partname, partkey), partlines[partkey], partcomments) for partkey in partkeys])
534 partstr = partname + " "
535 partstartline = 0
536 if len(partlines) > 0 and len(partcomments) == 0:
537 partstr += partlines[0]
538 partstartline = 1
539 elif len(partcomments) > 0:
540 if len(partlines) > 0 and len(unquotefrompo(partlines[:1])) == 0:
541
542 partstr += partlines[0] + '\n'
543
544 if len(partlines) > 1:
545 partstartline += 1
546 else:
547
548 partstr += '""\n'
549
550 if len(partcomments) > 1:
551 combinedcomment = []
552 for comment in partcomments:
553 comment = unquotefrompo([comment])
554 if comment.startswith("_:"):
555 comment = comment[len("_:"):]
556 if comment.endswith("\\n"):
557 comment = comment[:-len("\\n")]
558
559 combinedcomment.append(comment)
560 partcomments = quoteforpo("_:%s" % "".join(combinedcomment))
561
562 partstr += "\n".join(partcomments)
563 partstr = quote.rstripeol(partstr)
564 else:
565 partstr += '""'
566 partstr += '\n'
567
568 for partline in partlines[partstartline:]:
569 partstr += partline + '\n'
570 return partstr
571
573 """encodes unicode strings and returns other strings unchanged"""
574 if isinstance(output, unicode):
575 encoding = encodingToUse(getattr(self, "_encoding", "UTF-8"))
576 return output.encode(encoding)
577 return output
578
580 """convert to a string. double check that unicode is handled somehow here"""
581 output = self._getoutput()
582 return self._encodeifneccessary(output)
583
585 """return this po element as a string"""
586
587 def add_prev_msgid_lines(lines, prefix, header, var):
588 if len(var) > 0:
589 lines.append("%s %s %s\n" % (prefix, header, var[0]))
590 lines.extend("%s %s\n" % (prefix, line) for line in var[1:])
591
592 def add_prev_msgid_info(lines, prefix):
593 add_prev_msgid_lines(lines, prefix, 'msgctxt', self.prev_msgctxt)
594 add_prev_msgid_lines(lines, prefix, 'msgid', self.prev_msgid)
595 add_prev_msgid_lines(lines, prefix, 'msgid_plural', self.prev_msgid_plural)
596
597 lines = []
598 lines.extend(self.othercomments)
599 if self.isobsolete():
600 lines.extend(self.typecomments)
601 obsoletelines = []
602 add_prev_msgid_info(obsoletelines, prefix="#~|")
603 if self.msgctxt:
604 obsoletelines.append(self._getmsgpartstr("#~ msgctxt", self.msgctxt))
605 obsoletelines.append(self._getmsgpartstr("#~ msgid", self.msgid, self.msgidcomments))
606 if self.msgid_plural or self.msgid_pluralcomments:
607 obsoletelines.append(self._getmsgpartstr("#~ msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
608 obsoletelines.append(self._getmsgpartstr("#~ msgstr", self.msgstr))
609 for index, obsoleteline in enumerate(obsoletelines):
610
611 obsoletelines[index] = obsoleteline.replace('\n"', '\n#~ "')
612 lines.extend(obsoletelines)
613 return u"".join(lines)
614
615
616 if is_null(self.msgid):
617 if not (self.isheader() or self.getcontext() or self.sourcecomments):
618 return u"".join(lines)
619 lines.extend(self.automaticcomments)
620 lines.extend(self.sourcecomments)
621 lines.extend(self.typecomments)
622 add_prev_msgid_info(lines, prefix="#|")
623 if self.msgctxt:
624 lines.append(self._getmsgpartstr(u"msgctxt", self.msgctxt))
625 lines.append(self._getmsgpartstr(u"msgid", self.msgid, self.msgidcomments))
626 if self.msgid_plural or self.msgid_pluralcomments:
627 lines.append(self._getmsgpartstr(u"msgid_plural", self.msgid_plural, self.msgid_pluralcomments))
628 lines.append(self._getmsgpartstr(u"msgstr", self.msgstr))
629 postr = u"".join(lines)
630 return postr
631
633 """Get a list of locations from sourcecomments in the PO unit
634
635 rtype: List
636 return: A list of the locations with '#: ' stripped
637
638 """
639 locations = []
640 for sourcecomment in self.sourcecomments:
641 locations += quote.rstripeol(sourcecomment)[3:].split()
642 for i, loc in enumerate(locations):
643 locations[i] = pocommon.unquote_plus(loc)
644 return locations
645
647 """Add a location to sourcecomments in the PO unit
648
649 @param location: Text location e.g. 'file.c:23' does not include #:
650 @type location: String
651
652 """
653 if location.find(" ") != -1:
654 location = pocommon.quote_plus(location)
655 self.sourcecomments.append("#: %s\n" % location)
656
667
673
674 msgidcomment = property(_extract_msgidcomments, setmsgidcomment)
675
676 - def getcontext(self):
677 """Get the message context."""
678 return unquotefrompo(self.msgctxt) + self._extract_msgidcomments()
679
680 - def setcontext(self, context):
681 context = data.forceunicode(context)
682 self.msgctxt = quoteforpo(context)
683
685 """Returns a unique identifier for this unit."""
686 context = self.getcontext()
687
688
689
690
691
692 id = self.source
693 if self.msgidcomments:
694 id = u"_: %s\n%s" % (context, id)
695 elif context:
696 id = u"%s\04%s" % (context, id)
697 return id
698
699
700 -class pofile(pocommon.pofile):
701 """A .po file containing various units"""
702 UnitClass = pounit
703
705 """Parses the given file or file source string."""
706 if True:
707
708 if hasattr(input, 'name'):
709 self.filename = input.name
710 elif not getattr(self, 'filename', ''):
711 self.filename = ''
712 if isinstance(input, str):
713 input = cStringIO.StringIO(input)
714
715 self.units = []
716 poparser.parse_units(poparser.ParseState(input, pounit), self)
717
718
719
721 """Make sure each msgid is unique ; merge comments etc from duplicates into original"""
722
723
724 id_dict = {}
725 uniqueunits = []
726
727
728 markedpos = []
729
730 def addcomment(thepo):
731 thepo.msgidcomments.append('"_: %s\\n"' % " ".join(thepo.getlocations()))
732 markedpos.append(thepo)
733 for thepo in self.units:
734 id = thepo.getid()
735 if thepo.isheader() and not thepo.getlocations():
736
737 uniqueunits.append(thepo)
738 elif id in id_dict:
739 if duplicatestyle == "merge":
740 if id:
741 id_dict[id].merge(thepo)
742 else:
743 addcomment(thepo)
744 uniqueunits.append(thepo)
745 elif duplicatestyle == "msgctxt":
746 origpo = id_dict[id]
747 if origpo not in markedpos:
748 origpo.msgctxt.append('"%s"' % escapeforpo(" ".join(origpo.getlocations())))
749 markedpos.append(thepo)
750 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
751 uniqueunits.append(thepo)
752 else:
753 if not id:
754 if duplicatestyle == "merge":
755 addcomment(thepo)
756 else:
757 thepo.msgctxt.append('"%s"' % escapeforpo(" ".join(thepo.getlocations())))
758 id_dict[id] = thepo
759 uniqueunits.append(thepo)
760 self.units = uniqueunits
761
763 """Convert to a string. double check that unicode is handled somehow here"""
764 output = self._getoutput()
765 if isinstance(output, unicode):
766 try:
767 return output.encode(getattr(self, "_encoding", "UTF-8"))
768 except UnicodeEncodeError, e:
769 self.updateheader(add=True, Content_Type="text/plain; charset=UTF-8")
770 self._encoding = "UTF-8"
771 for unit in self.units:
772 unit._encoding = "UTF-8"
773 return self._getoutput().encode("UTF-8")
774
775 return output
776
778 """convert the units back to lines"""
779 lines = []
780 for unit in self.units:
781 unitsrc = unit._getoutput() + u"\n"
782 lines.append(unitsrc)
783 lines = u"".join(lines).rstrip()
784
785 if lines:
786 lines += u"\n"
787 return lines
788
800
802 """decode any non-unicode strings in lines with self._encoding"""
803 newlines = []
804 for line in lines:
805 if isinstance(line, str) and self._encoding is not None and self._encoding.lower() != "charset":
806 try:
807 line = line.decode(self._encoding)
808 except UnicodeError, e:
809 raise UnicodeError("Error decoding line with encoding %r: %s. Line is %r" % (self._encoding, e, line))
810 newlines.append(line)
811 return newlines
812
814 for unit in self.units:
815 if not (unit.isheader() or unit.isobsolete()):
816 yield unit
817