1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Grep XLIFF, Gettext PO and TMX localization files
23
24 Matches are output to snippet files of the same type which can then be reviewed
25 and later merged using pomerge
26
27 See: http://translate.sourceforge.net/wiki/toolkit/pogrep for examples and
28 usage instructions
29 """
30
31 from translate.storage import factory
32 from translate.storage.poheader import poheader
33 from translate.misc import optrecurse
34 from translate.misc.multistring import multistring
35 from translate.lang import data
36 import re
37 import locale
38
39
41 """Just a small data structure that represents a search match."""
42
43
44 - def __init__(self, unit, part='target', part_n=0, start=0, end=0):
45 self.unit = unit
46 self.part = part
47 self.part_n = part_n
48 self.start = start
49 self.end = end
50
51
53 if self.part == 'target':
54 if self.unit.hasplural():
55 getter = lambda: self.unit.target.strings[self.part_n]
56 else:
57 getter = lambda: self.unit.target
58 return getter
59 elif self.part == 'source':
60 if self.unit.hasplural():
61 getter = lambda: self.unit.source.strings[self.part_n]
62 else:
63 getter = lambda: self.unit.source
64 return getter
65 elif self.part == 'notes':
66 def getter():
67 return self.unit.getnotes()[self.part_n]
68 return getter
69 elif self.part == 'locations':
70 def getter():
71 return self.unit.getlocations()[self.part_n]
72 return getter
73
75 if self.part == 'target':
76 if self.unit.hasplural():
77 def setter(value):
78 strings = self.unit.target.strings
79 strings[self.part_n] = value
80 self.unit.target = strings
81 else:
82 def setter(value):
83 self.unit.target = value
84 return setter
85
86
95
98
100 """Calculate the real index in the unnormalized string that corresponds to
101 the index nfc_index in the normalized string."""
102 length = nfc_index
103 max_length = len(string)
104 while len(data.normalize(string[:length])) <= nfc_index:
105 if length == max_length:
106 return length
107 length += 1
108 return length - 1
109
110
123
125 - def __init__(self, searchstring, searchparts, ignorecase=False, useregexp=False,
126 invertmatch=False, accelchar=None, encoding='utf-8',
127 max_matches=0):
128 """builds a checkfilter using the given checker"""
129 if isinstance(searchstring, unicode):
130 self.searchstring = searchstring
131 else:
132 self.searchstring = searchstring.decode(encoding)
133 self.searchstring = data.normalize(self.searchstring)
134 if searchparts:
135
136
137 self.search_source = ('source' in searchparts) or ('msgid' in searchparts)
138 self.search_target = ('target' in searchparts) or ('msgstr' in searchparts)
139 self.search_notes = ('notes' in searchparts) or ('comment' in searchparts)
140 self.search_locations = 'locations' in searchparts
141 else:
142 self.search_source = True
143 self.search_target = True
144 self.search_notes = False
145 self.search_locations = False
146 self.ignorecase = ignorecase
147 if self.ignorecase:
148 self.searchstring = self.searchstring.lower()
149 self.useregexp = useregexp
150 if self.useregexp:
151 self.searchpattern = re.compile(self.searchstring)
152 self.invertmatch = invertmatch
153 self.accelchar = accelchar
154 self.max_matches = max_matches
155
157 if teststr is None:
158 return False
159 teststr = data.normalize(teststr)
160 if self.ignorecase:
161 teststr = teststr.lower()
162 if self.accelchar:
163 teststr = re.sub(self.accelchar + self.accelchar, "#", teststr)
164 teststr = re.sub(self.accelchar, "", teststr)
165 if self.useregexp:
166 found = self.searchpattern.search(teststr)
167 else:
168 found = teststr.find(self.searchstring) != -1
169 if self.invertmatch:
170 found = not found
171 return found
172
174 """runs filters on an element"""
175 if unit.isheader(): return []
176
177 if self.search_source:
178 if isinstance(unit.source, multistring):
179 strings = unit.source.strings
180 else:
181 strings = [unit.source]
182 for string in strings:
183 if self.matches(string):
184 return True
185
186 if self.search_target:
187 if isinstance(unit.target, multistring):
188 strings = unit.target.strings
189 else:
190 strings = [unit.target]
191 for string in strings:
192 if self.matches(string):
193 return True
194
195 if self.search_notes:
196 if self.matches(unit.getnotes()):
197 return True
198 if self.search_locations:
199 if self.matches(u" ".join(unit.getlocations())):
200 return True
201 return False
202
215
217 if not self.searchstring:
218 return [], []
219
220 searchstring = self.searchstring
221 flags = re.LOCALE | re.MULTILINE | re.UNICODE
222
223 if self.ignorecase:
224 flags |= re.IGNORECASE
225 if not self.useregexp:
226 searchstring = re.escape(searchstring)
227 self.re_search = re.compile(u'(%s)' % (searchstring), flags)
228
229 matches = []
230 indexes = []
231
232 for index, unit in enumerate(units):
233 old_length = len(matches)
234
235 if self.search_target:
236 if unit.hasplural():
237 targets = unit.target.strings
238 else:
239 targets = [unit.target]
240 matches.extend(find_matches(unit, 'target', targets, self.re_search))
241 if self.search_source:
242 if unit.hasplural():
243 sources = unit.source.strings
244 else:
245 sources = [unit.source]
246 matches.extend(find_matches(unit, 'source', sources, self.re_search))
247 if self.search_notes:
248 matches.extend(find_matches(unit, 'notes', unit.getnotes(), self.re_search))
249
250 if self.search_locations:
251 matches.extend(find_matches(unit, 'locations', unit.getlocations(), self.re_search))
252
253
254
255
256 if self.max_matches and len(matches) > self.max_matches:
257 raise Exception("Too many matches found")
258
259 if len(matches) > old_length:
260 old_length = len(matches)
261 indexes.append(index)
262
263 return matches, indexes
264
266 """a specialized Option Parser for the grep tool..."""
268 """parses the command line options, handling implicit input/output args"""
269 (options, args) = optrecurse.optparse.OptionParser.parse_args(self, args, values)
270
271 if args:
272 options.searchstring = args[0]
273 args = args[1:]
274 else:
275 self.error("At least one argument must be given for the search string")
276 if args and not options.input:
277 if not options.output:
278 options.input = args[:-1]
279 args = args[-1:]
280 else:
281 options.input = args
282 args = []
283 if args and not options.output:
284 options.output = args[-1]
285 args = args[:-1]
286 if args:
287 self.error("You have used an invalid combination of --input, --output and freestanding args")
288 if isinstance(options.input, list) and len(options.input) == 1:
289 options.input = options.input[0]
290 return (options, args)
291
293 """sets the usage string - if usage not given, uses getusagestring for each option"""
294 if usage is None:
295 self.usage = "%prog searchstring " + " ".join([self.getusagestring(option) for option in self.option_list])
296 else:
297 super(GrepOptionParser, self).set_usage(usage)
298
307
308 -def rungrep(inputfile, outputfile, templatefile, checkfilter):
309 """reads in inputfile, filters using checkfilter, writes to outputfile"""
310 fromfile = factory.getobject(inputfile)
311 tofile = checkfilter.filterfile(fromfile)
312 if tofile.isempty():
313 return False
314 outputfile.write(str(tofile))
315 return True
316
318 formats = {"po":("po", rungrep), "pot":("pot", rungrep),
319 "mo":("mo", rungrep), "gmo":("gmo", rungrep),
320 "tmx":("tmx", rungrep),
321 "xliff":("xliff", rungrep), "xlf":("xlf", rungrep), "xlff":("xlff", rungrep),
322 None:("po", rungrep)}
323 parser = GrepOptionParser(formats)
324 parser.add_option("", "--search", dest="searchparts",
325 action="append", type="choice", choices=["source", "target", "notes", "locations", "msgid", "msgstr", "comment" ],
326 metavar="SEARCHPARTS", help="searches the given parts (source, target, notes and locations)")
327 parser.add_option("-I", "--ignore-case", dest="ignorecase",
328 action="store_true", default=False, help="ignore case distinctions")
329 parser.add_option("-e", "--regexp", dest="useregexp",
330 action="store_true", default=False, help="use regular expression matching")
331 parser.add_option("-v", "--invert-match", dest="invertmatch",
332 action="store_true", default=False, help="select non-matching lines")
333 parser.add_option("", "--accelerator", dest="accelchar",
334 action="store", type="choice", choices=["&", "_", "~"],
335 metavar="ACCELERATOR", help="ignores the given accelerator when matching")
336 parser.set_usage()
337 parser.passthrough.append('checkfilter')
338 parser.description = __doc__
339 return parser
340
342 parser = cmdlineparser()
343 parser.run()
344
345 if __name__ == '__main__':
346 main()
347