1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Grep XLIFF, Gettext PO and TMX localization files
23
24 Matches are output to snippet files of the same type which can then be reviewed
25 and later merged using pomerge
26
27 See: http://translate.sourceforge.net/wiki/toolkit/pogrep for examples and
28 usage instructions
29 """
30
31 from translate.storage import factory
32 from translate.misc import optrecurse
33 from translate.misc.multistring import multistring
34 from translate.lang import data
35 import re
36 import locale
37
38
40 """Just a small data structure that represents a search match."""
41
42
43 - def __init__(self, unit, part='target', part_n=0, start=0, end=0):
44 self.unit = unit
45 self.part = part
46 self.part_n = part_n
47 self.start = start
48 self.end = end
49
50
52 if self.part == 'target':
53 if self.unit.hasplural():
54 getter = lambda: self.unit.target.strings[self.part_n]
55 else:
56 getter = lambda: self.unit.target
57 return getter
58 elif self.part == 'source':
59 if self.unit.hasplural():
60 getter = lambda: self.unit.source.strings[self.part_n]
61 else:
62 getter = lambda: self.unit.source
63 return getter
64 elif self.part == 'notes':
65 def getter():
66 return self.unit.getnotes()[self.part_n]
67 return getter
68 elif self.part == 'locations':
69 def getter():
70 return self.unit.getlocations()[self.part_n]
71 return getter
72
74 if self.part == 'target':
75 if self.unit.hasplural():
76 def setter(value):
77 strings = self.unit.target.strings
78 strings[self.part_n] = value
79 self.unit.target = strings
80 else:
81 def setter(value):
82 self.unit.target = value
83 return setter
84
85
94
97
99 """Calculate the real index in the unnormalized string that corresponds to
100 the index nfc_index in the normalized string."""
101 length = nfc_index
102 max_length = len(string)
103 while len(data.normalize(string[:length])) <= nfc_index:
104 if length == max_length:
105 return length
106 length += 1
107 return length - 1
108
109
123
125 - def __init__(self, searchstring, searchparts, ignorecase=False, useregexp=False,
126 invertmatch=False, accelchar=None, encoding='utf-8', includeheader=False,
127 max_matches=0):
128 """builds a checkfilter using the given checker"""
129 if isinstance(searchstring, unicode):
130 self.searchstring = searchstring
131 else:
132 self.searchstring = searchstring.decode(encoding)
133 self.searchstring = data.normalize(self.searchstring)
134 if searchparts:
135
136
137 self.search_source = ('source' in searchparts) or ('msgid' in searchparts)
138 self.search_target = ('target' in searchparts) or ('msgstr' in searchparts)
139 self.search_notes = ('notes' in searchparts) or ('comment' in searchparts)
140 self.search_locations = 'locations' in searchparts
141 else:
142 self.search_source = True
143 self.search_target = True
144 self.search_notes = False
145 self.search_locations = False
146 self.ignorecase = ignorecase
147 if self.ignorecase:
148 self.searchstring = self.searchstring.lower()
149 self.useregexp = useregexp
150 if self.useregexp:
151 self.searchpattern = re.compile(self.searchstring)
152 self.invertmatch = invertmatch
153 self.accelchar = accelchar
154 self.includeheader = includeheader
155 self.max_matches = max_matches
156
158 if teststr is None:
159 return False
160 teststr = data.normalize(teststr)
161 if self.ignorecase:
162 teststr = teststr.lower()
163 if self.accelchar:
164 teststr = re.sub(self.accelchar + self.accelchar, "#", teststr)
165 teststr = re.sub(self.accelchar, "", teststr)
166 if self.useregexp:
167 found = self.searchpattern.search(teststr)
168 else:
169 found = teststr.find(self.searchstring) != -1
170 if self.invertmatch:
171 found = not found
172 return found
173
175 """runs filters on an element"""
176 if unit.isheader(): return []
177
178 if self.search_source:
179 if isinstance(unit.source, multistring):
180 strings = unit.source.strings
181 else:
182 strings = [unit.source]
183 for string in strings:
184 if self.matches(string):
185 return True
186
187 if self.search_target:
188 if isinstance(unit.target, multistring):
189 strings = unit.target.strings
190 else:
191 strings = [unit.target]
192 for string in strings:
193 if self.matches(string):
194 return True
195
196 if self.search_notes:
197 if self.matches(unit.getnotes()):
198 return True
199 if self.search_locations:
200 if self.matches(u" ".join(unit.getlocations())):
201 return True
202 return False
203
218
220 if not self.searchstring:
221 return [], []
222
223 searchstring = self.searchstring
224 flags = re.LOCALE | re.MULTILINE | re.UNICODE
225
226 if self.ignorecase:
227 flags |= re.IGNORECASE
228 if not self.useregexp:
229 searchstring = re.escape(searchstring)
230 self.re_search = re.compile(u'(%s)' % (searchstring), flags)
231
232 matches = []
233 indexes = []
234
235 for index, unit in enumerate(units):
236 old_length = len(matches)
237
238 if self.search_target:
239 if unit.hasplural():
240 targets = unit.target.strings
241 else:
242 targets = [unit.target]
243 matches.extend(find_matches(unit, 'target', targets, self.re_search))
244 if self.search_source:
245 if unit.hasplural():
246 sources = unit.source.strings
247 else:
248 sources = [unit.source]
249 matches.extend(find_matches(unit, 'source', sources, self.re_search))
250 if self.search_notes:
251 matches.extend(find_matches(unit, 'notes', unit.getnotes(), self.re_search))
252
253 if self.search_locations:
254 matches.extend(find_matches(unit, 'locations', unit.getlocations(), self.re_search))
255
256
257
258
259 if self.max_matches and len(matches) > self.max_matches:
260 raise Exception("Too many matches found")
261
262 if len(matches) > old_length:
263 old_length = len(matches)
264 indexes.append(index)
265
266 return matches, indexes
267
269 """a specialized Option Parser for the grep tool..."""
271 """parses the command line options, handling implicit input/output args"""
272 (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values)
273
274 if args:
275 options.searchstring = args[0]
276 args = args[1:]
277 else:
278 self.error("At least one argument must be given for the search string")
279 if args and not options.input:
280 if not options.output:
281 options.input = args[:-1]
282 args = args[-1:]
283 else:
284 options.input = args
285 args = []
286 if args and not options.output:
287 options.output = args[-1]
288 args = args[:-1]
289 if args:
290 self.error("You have used an invalid combination of --input, --output and freestanding args")
291 if isinstance(options.input, list) and len(options.input) == 1:
292 options.input = options.input[0]
293 return (options, args)
294
296 """sets the usage string - if usage not given, uses getusagestring for each option"""
297 if usage is None:
298 self.usage = "%prog searchstring " + " ".join([self.getusagestring(option) for option in self.option_list])
299 else:
300 super(GrepOptionParser, self).set_usage(usage)
301
310
311 -def rungrep(inputfile, outputfile, templatefile, checkfilter):
312 """reads in inputfile, filters using checkfilter, writes to outputfile"""
313 fromfile = factory.getobject(inputfile)
314 tofile = checkfilter.filterfile(fromfile)
315 if tofile.isempty():
316 return False
317 outputfile.write(str(tofile))
318 return True
319
321 formats = {"po":("po", rungrep), "pot":("pot", rungrep),
322 "mo":("mo", rungrep), "gmo":("gmo", rungrep),
323 "tmx":("tmx", rungrep),
324 "xliff":("xliff", rungrep), "xlf":("xlf", rungrep), "xlff":("xlff", rungrep),
325 None:("po", rungrep)}
326 parser = GrepOptionParser(formats)
327 parser.add_option("", "--search", dest="searchparts",
328 action="append", type="choice", choices=["source", "target", "notes", "locations", "msgid", "msgstr", "comment" ],
329 metavar="SEARCHPARTS", help="searches the given parts (source, target, notes and locations)")
330 parser.add_option("-I", "--ignore-case", dest="ignorecase",
331 action="store_true", default=False, help="ignore case distinctions")
332 parser.add_option("-e", "--regexp", dest="useregexp",
333 action="store_true", default=False, help="use regular expression matching")
334 parser.add_option("-v", "--invert-match", dest="invertmatch",
335 action="store_true", default=False, help="select non-matching lines")
336 parser.add_option("", "--accelerator", dest="accelchar",
337 action="store", type="choice", choices=["&", "_", "~"],
338 metavar="ACCELERATOR", help="ignores the given accelerator when matching")
339 parser.add_option("", "--header", dest="includeheader",
340 action="store_true", default=False,
341 help="include a PO header in the output")
342 parser.set_usage()
343 parser.passthrough.append('checkfilter')
344 parser.description = __doc__
345 return parser
346
348 parser = cmdlineparser()
349 parser.run()
350
351 if __name__ == '__main__':
352 main()
353