1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import re
23
24 """
25 From the GNU gettext manual:
26 WHITE-SPACE
27 # TRANSLATOR-COMMENTS
28 #. AUTOMATIC-COMMENTS
29 #| PREVIOUS MSGID (Gettext 0.16 - check if this is the correct position - not yet implemented)
30 #: REFERENCE...
31 #, FLAG...
32 msgctxt CONTEXT (Gettext 0.15)
33 msgid UNTRANSLATED-STRING
34 msgstr TRANSLATED-STRING
35 """
36
37 isspace = str.isspace
38 find = str.find
39 rfind = str.rfind
40 startswith = str.startswith
41 append = list.append
42 decode = str.decode
43
45 - def __init__(self, input_iterator, UnitClass, encoding = None):
46 self._input_iterator = input_iterator
47 self.next_line = ''
48 self.eof = False
49 self.encoding = encoding
50 self.read_line()
51 self.UnitClass = UnitClass
52
54 if self.encoding is not None:
55 return decode(string, self.encoding)
56 else:
57 return string
58
60 current = self.next_line
61 if self.eof:
62 return current
63 try:
64 self.next_line = self._input_iterator.next()
65 while not self.eof and isspace(self.next_line):
66 self.next_line = self._input_iterator.next()
67 except StopIteration:
68 self.next_line = ''
69 self.eof = True
70 return current
71
74
76 """Read all the lines belonging starting with #|. These lines contain
77 the previous msgid and msgctxt info. We strip away the leading '#| '
78 and read until we stop seeing #|."""
79 prevmsgid_lines = []
80 next_line = parse_state.next_line
81 while startswith(next_line, '#| '):
82 append(prevmsgid_lines, parse_state.read_line()[3:])
83 next_line = parse_state.next_line
84 return prevmsgid_lines
85
87 parse_message(parse_state, 'msgctxt', 7, unit.prev_msgctxt)
88 return len(unit.prev_msgctxt) > 0
89
91 parse_message(parse_state, 'msgid', 5, unit.prev_msgid)
92 return len(unit.prev_msgid) > 0
93
95 parse_message(parse_state, 'msgid_plural', 12, unit.prev_msgid_plural)
96 return len(unit.prev_msgid_plural) > 0
97
129
137
139 """Read all the lines belonging to the current unit if obsolete."""
140 obsolete_lines = []
141 if startswith(parse_state.next_line, '#~ '):
142 append(obsolete_lines, parse_state.read_line()[3:])
143 else:
144 return obsolete_lines
145
146
147 next_line = parse_state.next_line
148 if startswith(next_line, '#~ msgid ') and obsolete_lines[-1].startswith('msgctxt'):
149 append(obsolete_lines, parse_state.read_line()[3:])
150 next_line = parse_state.next_line
151 while startswith(next_line, '#~ ') and not (startswith(next_line, '#~ msgid ') or startswith(next_line, '#~ msgctxt')):
152 append(obsolete_lines, parse_state.read_line()[3:])
153 next_line = parse_state.next_line
154 return obsolete_lines
155
164
166 line = parse_state.next_line
167 left = find(line, '"', start_pos)
168 if left == start_pos or isspace(line[start_pos:left]):
169 right = rfind(line, '"')
170 if left != right:
171 return parse_state.read_line()[left:right+1]
172 else:
173
174
175 return parse_state.read_line()[left:-1] + '"'
176 return None
177
185
187 string = parse_quoted(parse_state, first_start_pos)
188 while string is not None:
189 if not startswith(string, '"_:'):
190 append(msg_list, parse_state.decode(string))
191 string = parse_quoted(parse_state)
192 else:
193 string = parse_msg_comment(parse_state, msg_comment_list, string)
194
195 -def parse_message(parse_state, start_of_string, start_of_string_len, msg_list, msg_comment_list=None):
196 if msg_comment_list is None:
197 msg_comment_list = []
198 if startswith(parse_state.next_line, start_of_string):
199 return parse_multiple_quoted(parse_state, msg_list, msg_comment_list, start_of_string_len)
200
202 parse_message(parse_state, 'msgctxt', 7, unit.msgctxt)
203 return len(unit.msgctxt) > 0
204
206 parse_message(parse_state, 'msgid', 5, unit.msgid, unit.msgidcomments)
207 return len(unit.msgid) > 0 or len(unit.msgidcomments) > 0
208
210 parse_message(parse_state, 'msgstr', 6, unit.msgstr)
211 return len(unit.msgstr) > 0
212
216
217 MSGSTR_ARRAY_ENTRY_LEN = len('msgstr[')
218
219 -def add_to_dict(msgstr_dict, line, right_bracket_pos, entry):
220 index = int(line[MSGSTR_ARRAY_ENTRY_LEN:right_bracket_pos])
221 if index not in msgstr_dict:
222 msgstr_dict[index] = []
223 msgstr_dict[index].extend(entry)
224
225 -def get_entry(parse_state, right_bracket_pos):
226 entry = []
227 parse_message(parse_state, 'msgstr[', right_bracket_pos + 1, entry)
228 return entry
229
230 -def parse_msgstr_array_entry(parse_state, msgstr_dict):
231 line = parse_state.next_line
232 right_bracket_pos = find(line, ']', MSGSTR_ARRAY_ENTRY_LEN)
233 if right_bracket_pos >= 0:
234 entry = get_entry(parse_state, right_bracket_pos)
235 if len(entry) > 0:
236 add_to_dict(msgstr_dict, line, right_bracket_pos, entry)
237 return True
238 else:
239 return False
240 else:
241 return False
242
252
259
267
269 unit = unit or parse_state.UnitClass()
270 parsed_comments = parse_comments(parse_state, unit)
271 obsolete_unit = parse_obsolete(parse_state, unit)
272 if obsolete_unit is not None:
273 return obsolete_unit
274 parsed_msg_entries = parse_msg_entries(parse_state, unit)
275 if parsed_comments or parsed_msg_entries:
276 return unit
277 else:
278 return None
279
281 charset = None
282 if isinstance(unit.msgstr, list) and len(unit.msgstr) > 0 and isinstance(unit.msgstr[0], str):
283 charset = re.search("charset=([^\\s\\\\n]+)", "".join(unit.msgstr))
284 if charset:
285 encoding = charset.group(1)
286 if encoding != 'CHARSET':
287 store._encoding = encoding
288 else:
289 store._encoding = 'utf-8'
290 else:
291 store._encoding = 'utf-8'
292 parse_state.encoding = store._encoding
293
295 return [decode(item) for item in lst]
296
298 for attr in ('msgctxt', 'msgid', 'msgid_pluralcomments',
299 'msgid_plural', 'msgstr', 'obsoletemsgctxt',
300 'obsoletemsgid', 'obsoletemsgid_pluralcomments',
301 'obsoletemsgid_plural', 'obsoletemsgstr',
302 'othercomments', 'automaticcomments', 'sourcecomments',
303 'typecomments', 'msgidcomments', 'obsoletemsgidcomments'):
304 element = getattr(unit, attr)
305 if isinstance(element, list):
306 setattr(unit, attr, decode_list(element, decode))
307 else:
308 setattr(unit, attr, dict([(key, decode_list(value, decode)) for key, value in element.items()]))
309
317
319 unit = parse_header(parse_state, store)
320 while unit:
321 store.addunit(unit)
322 unit = parse_unit(parse_state)
323 return parse_state.eof
324