1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """
23 Classes that hold units of .oo files (oounit) or entire files (oofile).
24
25 These are specific .oo files for localisation exported by OpenOffice.org - SDF
26 format (previously knows as GSI files). For an overview of the format, see
27 U{http://l10n.openoffice.org/L10N_Framework/Intermediate_file_format.html}
28
29 The behaviour in terms of escaping is explained in detail in the programming
30 comments.
31 """
32
33
34 import os
35 import re
36 from translate.misc import quote
37 from translate.misc import wStringIO
38 import warnings
39
40
41
42 normalfilenamechars = "/#.0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
43 normalizetable = ""
44 for i in map(chr, range(256)):
45 if i in normalfilenamechars:
46 normalizetable += i
47 else:
48 normalizetable += "_"
49
52 self.normalchars = {}
53 for char in normalchars:
54 self.normalchars[ord(char)] = char
56 return self.normalchars.get(key, u"_")
57
58 unormalizetable = unormalizechar(normalfilenamechars.decode("ascii"))
59
66
68 """converts an oo key tuple into a unique identifier
69
70 @param ookey: an oo key
71 @type ookey: tuple
72 @param long_keys: Use long keys
73 @type long_keys: Boolean
74 @rtype: str
75 @return: unique ascii identifier
76 """
77 project, sourcefile, resourcetype, groupid, localid, platform = ookey
78 sourcefile = sourcefile.replace('\\','/')
79 if long_keys:
80 sourcebase = os.path.join(project, sourcefile)
81 else:
82 sourceparts = sourcefile.split('/')
83 sourcebase = "".join(sourceparts[-1:])
84 if len(groupid) == 0 or len(localid) == 0:
85 fullid = groupid + localid
86 else:
87 fullid = groupid + "." + localid
88 if resourcetype:
89 fullid = fullid + "." + resourcetype
90 key = "%s#%s" % (sourcebase, fullid)
91 return normalizefilename(key)
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113 -def escape_text(text):
114 """Escapes SDF text to be suitable for unit consumption."""
115 return text.replace("\n", "\\n").replace("\t", "\\t").replace("\r", "\\r")
116
117 -def unescape_text(text):
118 """Unescapes SDF text to be suitable for unit consumption."""
119 return text.replace("\\\\", "\a").replace("\\n", "\n").replace("\\t", "\t").\
120 replace("\\r", "\r").replace("\a", "\\\\")
121
122 helptagre = re.compile('''<[/]??[a-z_\-]+?(?:| +[a-z]+?=".*?") *[/]??>''')
123
125 """Escapes the help text as it would be in an SDF file.
126
127 <, >, " are only escaped in <[[:lower:]]> tags. Some HTML tags make it in in
128 lowercase so those are dealt with. Some OpenOffice.org help tags are not
129 escaped.
130 """
131 text = text.replace("\\", "\\\\")
132 for tag in helptagre.findall(text):
133 escapethistag = False
134 for escape_tag in ["ahelp", "link", "item", "emph", "defaultinline", "switchinline", "caseinline", "variable", "bookmark_value", "image", "embedvar", "alt"]:
135 if tag.startswith("<%s" % escape_tag) or tag == "</%s>" % escape_tag:
136 escapethistag = True
137 if tag in ["<br/>", "<help-id-missing/>"]:
138 escapethistag = True
139 if escapethistag:
140 escaped_tag = ("\\<" + tag[1:-1] + "\\>").replace('"', '\\"')
141 text = text.replace(tag, escaped_tag)
142 return text
143
145 """Unescapes normal text to be suitable for writing to the SDF file."""
146 return text.replace(r"\<", "<").replace(r"\>", ">").replace(r'\"', '"').replace(r"\\", "\\")
147
149 """Encode a Unicode string the the specified encoding"""
150 if isinstance(text, unicode):
151 return text.encode('UTF-8')
152 return text
153
154
156 """this represents one line, one translation in an .oo file"""
158 """construct an ooline from its parts"""
159 if parts is None:
160 self.project, self.sourcefile, self.dummy, self.resourcetype, \
161 self.groupid, self.localid, self.helpid, self.platform, \
162 self.width, self.languageid, self.text, self.helptext, \
163 self.quickhelptext, self.title, self.timestamp = [""] * 15
164 else:
165 self.setparts(parts)
166
168 """create a line from its tab-delimited parts"""
169 if len(parts) != 15:
170 warnings.warn("oo line contains %d parts, it should contain 15: %r" % \
171 (len(parts), parts))
172 newparts = list(parts)
173 if len(newparts) < 15:
174 newparts = newparts + [""] * (15-len(newparts))
175 else:
176 newparts = newparts[:15]
177 parts = tuple(newparts)
178 self.project, self.sourcefile, self.dummy, self.resourcetype, \
179 self.groupid, self.localid, self.helpid, self.platform, \
180 self.width, self.languageid, self._text, self.helptext, \
181 self.quickhelptext, self.title, self.timestamp = parts
182
184 """return a list of parts in this line"""
185 return (self.project, self.sourcefile, self.dummy, self.resourcetype,
186 self.groupid, self.localid, self.helpid, self.platform,
187 self.width, self.languageid, self._text, self.helptext,
188 self.quickhelptext, self.title, self.timestamp)
189
191 """Obtains the text column and handle escaping."""
192 if self.sourcefile.endswith(".xhp"):
193 return unescape_help_text(self._text)
194 else:
195 return unescape_text(self._text)
196
197 - def settext(self, text):
198 """Sets the text column and handle escaping."""
199 if self.sourcefile.endswith(".xhp"):
200 self._text = escape_help_text(text)
201 else:
202 self._text = escape_text(text)
203 text = property(gettext, settext)
204
208
210 """return a line in tab-delimited form"""
211 parts = self.getparts()
212 return "\t".join(parts)
213
215 """get the key that identifies the resource"""
216 return (self.project, self.sourcefile, self.resourcetype, self.groupid,
217 self.localid, self.platform)
218
220 """this represents a number of translations of a resource"""
222 """construct the oounit"""
223 self.languages = {}
224 self.lines = []
225
227 """add a line to the oounit"""
228 self.languages[line.languageid] = line
229 self.lines.append(line)
230
234
236 """return the lines in tab-delimited form"""
237 return "\r\n".join([str(line) for line in self.lines])
238
240 """this represents an entire .oo file"""
241 UnitClass = oounit
243 """constructs the oofile"""
244 self.oolines = []
245 self.units = []
246 self.ookeys = {}
247 self.filename = ""
248 self.languages = []
249 if input is not None:
250 self.parse(input)
251
253 """adds a parsed line to the file"""
254 key = thisline.getkey()
255 element = self.ookeys.get(key, None)
256 if element is None:
257 element = self.UnitClass()
258 self.units.append(element)
259 self.ookeys[key] = element
260 element.addline(thisline)
261 self.oolines.append(thisline)
262 if thisline.languageid not in self.languages:
263 self.languages.append(thisline.languageid)
264
266 """parses lines and adds them to the file"""
267 if not self.filename:
268 self.filename = getattr(input, 'name', '')
269 if hasattr(input, "read"):
270 src = input.read()
271 input.close()
272 else:
273 src = input
274 for line in src.split("\n"):
275 line = quote.rstripeol(line)
276 if not line:
277 continue
278 parts = line.split("\t")
279 thisline = ooline(parts)
280 self.addline(thisline)
281
285
287 """converts all the lines back to tab-delimited form"""
288 lines = []
289 for oe in self.units:
290 if len(oe.lines) > 2:
291 warnings.warn("contains %d lines (should be 2 at most): languages %r" % (len(oe.lines), oe.languages))
292 oekeys = [line.getkey() for line in oe.lines]
293 warnings.warn("contains %d lines (should be 2 at most): keys %r" % (len(oe.lines), oekeys))
294 oeline = str(oe) + "\r\n"
295 lines.append(oeline)
296 return "".join(lines)
297
299 """this takes a huge GSI file and represents it as multiple smaller files..."""
300 - def __init__(self, filename, mode=None, multifilestyle="single"):
301 """initialises oomultifile from a seekable inputfile or writable outputfile"""
302 self.filename = filename
303 if mode is None:
304 if os.path.exists(filename):
305 mode = 'r'
306 else:
307 mode = 'w'
308 self.mode = mode
309 self.multifilestyle = multifilestyle
310 self.multifilename = os.path.splitext(filename)[0]
311 self.multifile = open(filename, mode)
312 self.subfilelines = {}
313 if mode == "r":
314 self.createsubfileindex()
315
317 """reads in all the lines and works out the subfiles"""
318 linenum = 0
319 for line in self.multifile:
320 subfile = self.getsubfilename(line)
321 if not subfile in self.subfilelines:
322 self.subfilelines[subfile] = []
323 self.subfilelines[subfile].append(linenum)
324 linenum += 1
325
327 """looks up the subfile name for the line"""
328 if line.count("\t") < 2:
329 raise ValueError("invalid tab-delimited line: %r" % line)
330 lineparts = line.split("\t", 2)
331 module, filename = lineparts[0], lineparts[1]
332 if self.multifilestyle == "onefile":
333 ooname = self.multifilename
334 elif self.multifilestyle == "toplevel":
335 ooname = module
336 else:
337 filename = filename.replace("\\", "/")
338 fileparts = [module] + filename.split("/")
339 ooname = os.path.join(*fileparts[:-1])
340 return ooname + os.extsep + "oo"
341
343 """returns a list of subfiles in the file"""
344 return self.subfilelines.keys()
345
347 """iterates through the subfile names"""
348 for subfile in self.listsubfiles():
349 yield subfile
350
352 """checks if this pathname is a valid subfile"""
353 return pathname in self.subfilelines
354
356 """returns the list of lines matching the subfile"""
357 lines = []
358 requiredlines = dict.fromkeys(self.subfilelines[subfile])
359 linenum = 0
360 self.multifile.seek(0)
361 for line in self.multifile:
362 if linenum in requiredlines:
363 lines.append(line)
364 linenum += 1
365 return "".join(lines)
366
373
375 """returns a pseudo-file object for the given subfile"""
376 def onclose(contents):
377 self.multifile.write(contents)
378 self.multifile.flush()
379 outputfile = wStringIO.CatchStringOutput(onclose)
380 outputfile.filename = subfile
381 return outputfile
382
384 """returns an oofile built up from the given subfile's lines"""
385 subfilesrc = self.getsubfilesrc(subfile)
386 oosubfile = oofile()
387 oosubfile.filename = subfile
388 oosubfile.parse(subfilesrc)
389 return oosubfile
390