Package translate :: Package filters :: Module checks
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.checks

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  #  
   4  # Copyright 2004-2008 Zuza Software Foundation 
   5  #  
   6  # This file is part of translate. 
   7  # 
   8  # translate is free software; you can redistribute it and/or modify 
   9  # it under the terms of the GNU General Public License as published by 
  10  # the Free Software Foundation; either version 2 of the License, or 
  11  # (at your option) any later version. 
  12  #  
  13  # translate is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16  # GNU General Public License for more details. 
  17  # 
  18  # You should have received a copy of the GNU General Public License 
  19  # along with translate; if not, write to the Free Software 
  20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  21   
  22  """This is a set of validation checks that can be performed on translation  
  23  units. 
  24   
  25  Derivatives of UnitChecker (like StandardUnitChecker) check translation units, 
  26  and derivatives of TranslationChecker (like StandardChecker) check  
  27  (source, target) translation pairs. 
  28   
  29  When adding a new test here, please document and explain the behaviour on the  
  30  U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}. 
  31  """ 
  32   
  33  from translate.filters import helpers 
  34  from translate.filters import decoration 
  35  from translate.filters import prefilters 
  36  from translate.filters import spelling 
  37  from translate.lang import factory 
  38  from translate.lang import data 
  39  # The import of xliff could fail if the user doesn't have lxml installed. For 
  40  # now we try to continue gracefully to help users who aren't interested in  
  41  # support for XLIFF or other XML formats. 
  42  try: 
  43      from translate.storage import xliff 
  44  except ImportError, e: 
  45      xliff = None 
  46  # The import of xliff fail silently in the absence of lxml if another module 
  47  # already tried to import it unsuccessfully, so let's make 100% sure: 
  48  if not hasattr(xliff, "xliffunit"): 
  49      xliff = None 
  50  import re 
  51   
  52  # These are some regular expressions that are compiled for use in some tests 
  53   
  54  # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't cover everything we leave \w instead of specifying the exact letters as 
  55  # this should capture printf types defined in other platforms. 
  56  # extended to support Python named format specifiers 
  57  printf_pat = re.compile('%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#-]*(?:\d+)?(?:\.\d+)?(hh\|h\|l\|ll)?(?P<type>[\w%])))') 
  58   
  59  # The name of the XML tag 
  60  tagname_re = re.compile("<[\s]*([\w\/]*)") 
  61   
  62  # We allow escaped quotes, probably for old escaping style of OOo helpcontent 
  63  #TODO: remove escaped strings once usage is audited 
  64  property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))") 
  65   
  66  # The whole tag 
  67  tag_re = re.compile("<[^>]+>") 
  68   
  69  gconf_attribute_re = re.compile('"[a-z_]+?"') 
  70   
71 -def tagname(string):
72 """Returns the name of the XML/HTML tag in string""" 73 return tagname_re.match(string).groups(1)[0]
74
75 -def intuplelist(pair, list):
76 """Tests to see if pair == (a,b,c) is in list, but handles None entries in 77 list as wildcards (only allowed in positions "a" and "c"). We take a shortcut 78 by only considering "c" if "b" has already matched.""" 79 a, b, c = pair 80 if (b, c) == (None, None): 81 #This is a tagname 82 return pair 83 for pattern in list: 84 x, y, z = pattern 85 if (x, y) in [(a, b), (None, b)]: 86 if z in [None, c]: 87 return pattern 88 return pair
89
90 -def tagproperties(strings, ignore):
91 """Returns all the properties in the XML/HTML tag string as 92 (tagname, propertyname, propertyvalue), but ignore those combinations 93 specified in ignore.""" 94 properties = [] 95 for string in strings: 96 tag = tagname(string) 97 properties += [(tag, None, None)] 98 #Now we isolate the attribute pairs. 99 pairs = property_re.findall(string) 100 for property, value, a, b in pairs: 101 #Strip the quotes: 102 value = value[1:-1] 103 104 canignore = False 105 if (tag, property, value) in ignore or \ 106 intuplelist((tag,property,value), ignore) != (tag,property,value): 107 canignore = True 108 break 109 if not canignore: 110 properties += [(tag, property, value)] 111 return properties
112 113
114 -class FilterFailure(Exception):
115 """This exception signals that a Filter didn't pass, and gives an explanation 116 or a comment"""
117 - def __init__(self, messages):
118 if not isinstance(messages, list): 119 messages = [messages] 120 assert isinstance(messages[0], unicode) # Assumption: all of same type 121 joined = u", ".join(messages) 122 Exception.__init__(self, joined) 123 # Python 2.3 doesn't have .args 124 if not hasattr(self, "args"): 125 self.args = joined
126
127 -class SeriousFilterFailure(FilterFailure):
128 """This exception signals that a Filter didn't pass, and the bad translation 129 might break an application (so the string will be marked fuzzy)""" 130 pass
131 132 #(tag, attribute, value) specifies a certain attribute which can be changed/ 133 #ignored if it exists inside tag. In the case where there is a third element 134 #in the tuple, it indicates a property value that can be ignored if present 135 #(like defaults, for example) 136 #If a certain item is None, it indicates that it is relevant for all values of 137 #the property/tag that is specified as None. A non-None value of "value" 138 #indicates that the value of the attribute must be taken into account. 139 common_ignoretags = [(None, "xml-lang", None)] 140 common_canchangetags = [("img", "alt", None), (None, "title", None)] 141 # Actually the title tag is allowed on many tags in HTML (but probably not all) 142
143 -class CheckerConfig(object):
144 """object representing the configuration of a checker"""
145 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None, 146 notranslatewords=None, musttranslatewords=None, validchars=None, 147 punctuation=None, endpunctuation=None, ignoretags=None, 148 canchangetags=None, criticaltests=None, credit_sources=None):
149 # Init lists 150 self.accelmarkers = self._init_list(accelmarkers) 151 self.varmatches = self._init_list(varmatches) 152 self.criticaltests = self._init_list(criticaltests) 153 self.credit_sources = self._init_list(credit_sources) 154 # Lang data 155 self.targetlanguage = targetlanguage 156 self.updatetargetlanguage(targetlanguage) 157 self.sourcelang = factory.getlanguage('en') 158 # Inits with default values 159 self.punctuation = self._init_default(data.normalized_unicode(punctuation), self.lang.punctuation) 160 self.endpunctuation = self._init_default(data.normalized_unicode(endpunctuation), self.lang.sentenceend) 161 self.ignoretags = self._init_default(ignoretags, common_ignoretags) 162 self.canchangetags = self._init_default(canchangetags, common_canchangetags) 163 # Other data 164 # TODO: allow user configuration of untranslatable words 165 self.notranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(notranslatewords)]) 166 self.musttranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(musttranslatewords)]) 167 validchars = data.normalized_unicode(validchars) 168 self.validcharsmap = {} 169 self.updatevalidchars(validchars)
170
171 - def _init_list(self, list):
172 """initialise configuration paramaters that are lists 173 174 @type list: List 175 @param list: None (we'll initialise a blank list) or a list paramater 176 @rtype: List 177 """ 178 if list is None: 179 list = [] 180 return list
181
182 - def _init_default(self, param, default):
183 """initialise parameters that can have default options 184 185 @param param: the user supplied paramater value 186 @param default: default values when param is not specified 187 @return: the paramater as specified by the user of the default settings 188 """ 189 if param is None: 190 return default 191 return param
192
193 - def update(self, otherconfig):
194 """combines the info in otherconfig into this config object""" 195 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage 196 self.updatetargetlanguage(self.targetlanguage) 197 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers]) 198 self.varmatches.extend(otherconfig.varmatches) 199 self.notranslatewords.update(otherconfig.notranslatewords) 200 self.musttranslatewords.update(otherconfig.musttranslatewords) 201 self.validcharsmap.update(otherconfig.validcharsmap) 202 self.punctuation += otherconfig.punctuation 203 self.endpunctuation += otherconfig.endpunctuation 204 #TODO: consider also updating in the following cases: 205 self.ignoretags = otherconfig.ignoretags 206 self.canchangetags = otherconfig.canchangetags 207 self.criticaltests.extend(otherconfig.criticaltests) 208 self.credit_sources = otherconfig.credit_sources
209
210 - def updatevalidchars(self, validchars):
211 """updates the map that eliminates valid characters""" 212 if validchars is None: 213 return True 214 validcharsmap = dict([(ord(validchar), None) for validchar in data.normalized_unicode(validchars)]) 215 self.validcharsmap.update(validcharsmap)
216
217 - def updatetargetlanguage(self, langcode):
218 """Updates the target language in the config to the given target language""" 219 self.lang = factory.getlanguage(langcode)
220
221 -def cache_results(f):
222 def cached_f(self, param1): 223 key = (f.__name__, param1) 224 res_cache = self.results_cache 225 if key in res_cache: 226 return res_cache[key] 227 else: 228 value = f(self, param1) 229 res_cache[key] = value 230 return value
231 return cached_f 232
233 -class UnitChecker(object):
234 """Parent Checker class which does the checking based on functions available 235 in derived classes.""" 236 preconditions = {} 237
238 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
239 self.errorhandler = errorhandler 240 if checkerconfig is None: 241 self.setconfig(CheckerConfig()) 242 else: 243 self.setconfig(checkerconfig) 244 # exclude functions defined in UnitChecker from being treated as tests... 245 self.helperfunctions = {} 246 for functionname in dir(UnitChecker): 247 function = getattr(self, functionname) 248 if callable(function): 249 self.helperfunctions[functionname] = function 250 self.defaultfilters = self.getfilters(excludefilters, limitfilters) 251 252 self.results_cache = {}
253
254 - def getfilters(self, excludefilters=None, limitfilters=None):
255 """returns dictionary of available filters, including/excluding those in 256 the given lists""" 257 filters = {} 258 if limitfilters is None: 259 # use everything available unless instructed 260 limitfilters = dir(self) 261 if excludefilters is None: 262 excludefilters = {} 263 for functionname in limitfilters: 264 if functionname in excludefilters: continue 265 if functionname in self.helperfunctions: continue 266 if functionname == "errorhandler": continue 267 filterfunction = getattr(self, functionname, None) 268 if not callable(filterfunction): continue 269 filters[functionname] = filterfunction 270 return filters
271
272 - def setconfig(self, config):
273 """sets the accelerator list""" 274 self.config = config 275 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers] 276 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname) 277 for startmatch, endmatch in self.config.varmatches] 278 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, prefilters.varnone) 279 for startmatch, endmatch in self.config.varmatches]
280
281 - def setsuggestionstore(self, store):
282 """Sets the filename that a checker should use for evaluating suggestions.""" 283 self.suggestion_store = store 284 if self.suggestion_store: 285 self.suggestion_store.require_index()
286
287 - def filtervariables(self, str1):
288 """filter out variables from str1""" 289 return helpers.multifilter(str1, self.varfilters)
290 filtervariables = cache_results(filtervariables) 291
292 - def removevariables(self, str1):
293 """remove variables from str1""" 294 return helpers.multifilter(str1, self.removevarfilter)
295 removevariables = cache_results(removevariables) 296
297 - def filteraccelerators(self, str1):
298 """filter out accelerators from str1""" 299 return helpers.multifilter(str1, self.accfilters, None)
300 filteraccelerators = cache_results(filteraccelerators) 301
302 - def filteraccelerators_by_list(self, str1, acceptlist=None):
303 """filter out accelerators from str1""" 304 return helpers.multifilter(str1, self.accfilters, acceptlist)
305
306 - def filterwordswithpunctuation(self, str1):
307 """replaces words with punctuation with their unpunctuated equivalents""" 308 return prefilters.filterwordswithpunctuation(str1)
309 filterwordswithpunctuation = cache_results(filterwordswithpunctuation) 310
311 - def filterxml(self, str1):
312 """filter out XML from the string so only text remains""" 313 return tag_re.sub("", str1)
314 filterxml = cache_results(filterxml) 315
316 - def run_test(self, test, unit):
317 """Runs the given test on the given unit. 318 319 Note that this can raise a FilterFailure as part of normal operation""" 320 return test(unit)
321
322 - def run_filters(self, unit):
323 """run all the tests in this suite, return failures as testname, message_or_exception""" 324 self.results_cache = {} 325 failures = {} 326 ignores = self.config.lang.ignoretests[:] 327 functionnames = self.defaultfilters.keys() 328 priorityfunctionnames = self.preconditions.keys() 329 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames) 330 for functionname in priorityfunctionnames + otherfunctionnames: 331 if functionname in ignores: 332 continue 333 filterfunction = getattr(self, functionname, None) 334 # this filterfunction may only be defined on another checker if using TeeChecker 335 if filterfunction is None: 336 continue 337 filtermessage = filterfunction.__doc__ 338 try: 339 filterresult = self.run_test(filterfunction, unit) 340 except FilterFailure, e: 341 filterresult = False 342 filtermessage = e.args[0] 343 except Exception, e: 344 if self.errorhandler is None: 345 raise ValueError("error in filter %s: %r, %r, %s" % \ 346 (functionname, unit.source, unit.target, e)) 347 else: 348 filterresult = self.errorhandler(functionname, unit.source, unit.target, e) 349 if not filterresult: 350 # we test some preconditions that aren't actually a cause for failure 351 if functionname in self.defaultfilters: 352 failures[functionname] = filtermessage 353 if functionname in self.preconditions: 354 for ignoredfunctionname in self.preconditions[functionname]: 355 ignores.append(ignoredfunctionname) 356 self.results_cache = {} 357 return failures
358
359 -class TranslationChecker(UnitChecker):
360 """A checker that passes source and target strings to the checks, not the 361 whole unit. 362 363 This provides some speedup and simplifies testing."""
364 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
365 super(TranslationChecker, self).__init__(checkerconfig, excludefilters, limitfilters, errorhandler)
366
367 - def run_test(self, test, unit):
368 """Runs the given test on the given unit. 369 370 Note that this can raise a FilterFailure as part of normal operation.""" 371 if self.hasplural: 372 filtermessages = [] 373 filterresult = True 374 for pluralform in unit.target.strings: 375 try: 376 if not test(self.str1, unicode(pluralform)): 377 filterresult = False 378 except FilterFailure, e: 379 filterresult = False 380 filtermessages.append( unicode(e.args) ) 381 if not filterresult and filtermessages: 382 raise FilterFailure(filtermessages) 383 else: 384 return filterresult 385 else: 386 return test(self.str1, self.str2)
387
388 - def run_filters(self, unit):
389 """Do some optimisation by caching some data of the unit for the benefit 390 of run_test().""" 391 self.str1 = data.normalized_unicode(unit.source) or u"" 392 self.str2 = data.normalized_unicode(unit.target) or u"" 393 self.hasplural = unit.hasplural() 394 self.locations = unit.getlocations() 395 return super(TranslationChecker, self).run_filters(unit)
396
397 -class TeeChecker:
398 """A Checker that controls multiple checkers."""
399 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, 400 checkerclasses=None, errorhandler=None, languagecode=None):
401 """construct a TeeChecker from the given checkers""" 402 self.limitfilters = limitfilters 403 if checkerclasses is None: 404 checkerclasses = [StandardChecker] 405 self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses] 406 if languagecode: 407 for checker in self.checkers: 408 checker.config.updatetargetlanguage(languagecode) 409 # Let's hook up the language specific checker 410 lang_checker = self.checkers[0].config.lang.checker 411 if lang_checker: 412 self.checkers.append(lang_checker) 413 414 self.combinedfilters = self.getfilters(excludefilters, limitfilters) 415 self.config = checkerconfig or self.checkers[0].config
416
417 - def getfilters(self, excludefilters=None, limitfilters=None):
418 """returns dictionary of available filters, including/excluding those in 419 the given lists""" 420 if excludefilters is None: 421 excludefilters = {} 422 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers] 423 self.combinedfilters = {} 424 for filters in filterslist: 425 self.combinedfilters.update(filters) 426 # TODO: move this somewhere more sensible (a checkfilters method?) 427 if limitfilters is not None: 428 for filtername in limitfilters: 429 if not filtername in self.combinedfilters: 430 import sys 431 print >> sys.stderr, "warning: could not find filter %s" % filtername 432 return self.combinedfilters
433
434 - def run_filters(self, unit):
435 """run all the tests in the checker's suites""" 436 failures = {} 437 for checker in self.checkers: 438 failures.update(checker.run_filters(unit)) 439 return failures
440
441 - def setsuggestionstore(self, store):
442 """Sets the filename that a checker should use for evaluating suggestions.""" 443 for checker in self.checkers: 444 checker.setsuggestionstore(store)
445 446
447 -class StandardChecker(TranslationChecker):
448 """The basic test suite for source -> target translations."""
449 - def untranslated(self, str1, str2):
450 """checks whether a string has been translated at all""" 451 str2 = prefilters.removekdecomments(str2) 452 return not (len(str1.strip()) > 0 and len(str2) == 0)
453
454 - def unchanged(self, str1, str2):
455 """checks whether a translation is basically identical to the original string""" 456 str1 = self.filteraccelerators(self.removevariables(str1)).strip() 457 str2 = self.filteraccelerators(self.removevariables(str2)).strip() 458 if len(str1) < 2: 459 return True 460 # If the whole string is upperase, or nothing in the string can go 461 # towards uppercase, let's assume there is nothing translatable 462 # TODO: reconsider 463 if (str1.isupper() or str1.upper() == str1) and str1 == str2: 464 return True 465 if self.config.notranslatewords: 466 words1 = str1.split() 467 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]: 468 #currently equivalent to: 469 # if len(words1) == 1 and words1[0] in self.config.notranslatewords: 470 #why do we only test for one notranslate word? 471 return True 472 # we could also check for things like str1.isnumeric(), but the test 473 # above (str1.upper() == str1) makes this unnecessary 474 if str1.lower() == str2.lower(): 475 raise FilterFailure(u"please translate") 476 return True
477
478 - def blank(self, str1, str2):
479 """checks whether a translation only contains spaces""" 480 len1 = len(str1.strip()) 481 len2 = len(str2.strip()) 482 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
483
484 - def short(self, str1, str2):
485 """checks whether a translation is much shorter than the original string""" 486 len1 = len(str1.strip()) 487 len2 = len(str2.strip()) 488 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
489
490 - def long(self, str1, str2):
491 """checks whether a translation is much longer than the original string""" 492 len1 = len(str1.strip()) 493 len2 = len(str2.strip()) 494 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
495
496 - def escapes(self, str1, str2):
497 """checks whether escaping is consistent between the two strings""" 498 if not helpers.countsmatch(str1, str2, (u"\\", u"\\\\")): 499 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if u"\\" in word]) 500 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if u"\\" in word]) 501 raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2)) 502 else: 503 return True
504
505 - def newlines(self, str1, str2):
506 """checks whether newlines are consistent between the two strings""" 507 if not helpers.countsmatch(str1, str2, (u"\n", u"\r")): 508 raise FilterFailure(u"line endings in original don't match line endings in translation") 509 else: 510 return True
511
512 - def tabs(self, str1, str2):
513 """checks whether tabs are consistent between the two strings""" 514 if not helpers.countmatch(str1, str2, "\t"): 515 raise SeriousFilterFailure(u"tabs in original don't match tabs in translation") 516 else: 517 return True
518
519 - def singlequoting(self, str1, str2):
520 """checks whether singlequoting is consistent between the two strings""" 521 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1))) 522 str1 = self.config.lang.punctranslate(str1) 523 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2))) 524 return helpers.countsmatch(str1, str2, (u"'", u"''", u"\\'"))
525
526 - def doublequoting(self, str1, str2):
527 """checks whether doublequoting is consistent between the two strings""" 528 str1 = self.filteraccelerators(self.filtervariables(str1)) 529 str1 = self.filterxml(str1) 530 str1 = self.config.lang.punctranslate(str1) 531 str2 = self.filteraccelerators(self.filtervariables(str2)) 532 str2 = self.filterxml(str2) 533 return helpers.countsmatch(str1, str2, (u'"', u'""', u'\\"', u"«", u"»", u"“", u"”"))
534
535 - def doublespacing(self, str1, str2):
536 """checks for bad double-spaces by comparing to original""" 537 str1 = self.filteraccelerators(str1) 538 str2 = self.filteraccelerators(str2) 539 return helpers.countmatch(str1, str2, u" ")
540
541 - def puncspacing(self, str1, str2):
542 """checks for bad spacing after punctuation""" 543 # Convert all nbsp to space, and just check spaces. Useful intermediate step to stricter nbsp checking? 544 str1 = self.filteraccelerators(self.filtervariables(str1)) 545 str1 = self.config.lang.punctranslate(str1) 546 str1 = str1.replace(u"\u00a0", u" ") 547 if str1.find(u" ") == -1: 548 return True 549 str2 = self.filteraccelerators(self.filtervariables(str2)) 550 str2 = str2.replace(u"\u00a0", u" ") 551 for puncchar in self.config.punctuation: 552 plaincount1 = str1.count(puncchar) 553 plaincount2 = str2.count(puncchar) 554 if not plaincount1 or plaincount1 != plaincount2: 555 continue 556 spacecount1 = str1.count(puncchar + u" ") 557 spacecount2 = str2.count(puncchar + u" ") 558 if spacecount1 != spacecount2: 559 # handle extra spaces that are because of transposed punctuation 560 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1-spacecount2) == 1: 561 continue 562 return False 563 return True
564
565 - def printf(self, str1, str2):
566 """checks whether printf format strings match""" 567 count1 = count2 = plural = None 568 # self.hasplural only set by run_filters, not always available 569 if 'hasplural' in self.__dict__: 570 plural = self.hasplural 571 for var_num2, match2 in enumerate(printf_pat.finditer(str2)): 572 count2 = var_num2 + 1 573 str2key = match2.group('key') 574 if match2.group('ord'): 575 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 576 count1 = var_num1 + 1 577 if int(match2.group('ord')) == var_num1 + 1: 578 if match2.group('fullvar') != match1.group('fullvar'): 579 return 0 580 elif str2key: 581 str1key = None 582 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 583 count1 = var_num1 + 1 584 if match1.group('key') and str2key == match1.group('key'): 585 str1key = match1.group('key') 586 # '%.0s' "placeholder" in plural will match anything 587 if plural and match2.group('fullvar') == '.0s': 588 continue 589 if match1.group('fullvar') != match2.group('fullvar'): 590 return 0 591 if str1key == None: 592 return 0 593 else: 594 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 595 count1 = var_num1 + 1 596 # '%.0s' "placeholder" in plural will match anything 597 if plural and match2.group('fullvar') == '.0s': 598 continue 599 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')): 600 return 0 601 602 if count2 is None: 603 if list(printf_pat.finditer(str1)): 604 return 0 605 606 if (count1 or count2) and (count1 != count2): 607 return 0 608 return 1
609
610 - def accelerators(self, str1, str2):
611 """checks whether accelerators are consistent between the two strings""" 612 str1 = self.filtervariables(str1) 613 str2 = self.filtervariables(str2) 614 messages = [] 615 for accelmarker in self.config.accelmarkers: 616 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel) 617 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel) 618 count1, countbad1 = counter1(str1) 619 count2, countbad2 = counter2(str2) 620 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel) 621 accel2, bad2 = getaccel(str2) 622 if count1 == count2: 623 continue 624 if count1 == 1 and count2 == 0: 625 if countbad2 == 1: 626 messages.append(u"accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0])) 627 else: 628 messages.append(u"accelerator %s is missing from translation" % accelmarker) 629 elif count1 == 0: 630 messages.append(u"accelerator %s does not occur in original and should not be in translation" % accelmarker) 631 elif count1 == 1 and count2 > count1: 632 messages.append(u"accelerator %s is repeated in translation" % accelmarker) 633 else: 634 messages.append(u"accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2)) 635 if messages: 636 if "accelerators" in self.config.criticaltests: 637 raise SeriousFilterFailure(messages) 638 else: 639 raise FilterFailure(messages) 640 return True
641 642 # def acceleratedvariables(self, str1, str2): 643 # """checks that no variables are accelerated""" 644 # messages = [] 645 # for accelerator in self.config.accelmarkers: 646 # for variablestart, variableend in self.config.varmatches: 647 # error = accelerator + variablestart 648 # if str1.find(error) >= 0: 649 # messages.append(u"original has an accelerated variable") 650 # if str2.find(error) >= 0: 651 # messages.append(u"translation has an accelerated variable") 652 # if messages: 653 # raise FilterFailure(messages) 654 # return True 655
656 - def variables(self, str1, str2):
657 """checks whether variables of various forms are consistent between the two strings""" 658 messages = [] 659 mismatch1, mismatch2 = [], [] 660 varnames1, varnames2 = [], [] 661 for startmarker, endmarker in self.config.varmatches: 662 varchecker = decoration.getvariables(startmarker, endmarker) 663 if startmarker and endmarker: 664 if isinstance(endmarker, int): 665 redecorate = lambda var: startmarker + var 666 else: 667 redecorate = lambda var: startmarker + var + endmarker 668 elif startmarker: 669 redecorate = lambda var: startmarker + var 670 else: 671 redecorate = lambda var: var 672 vars1 = varchecker(str1) 673 vars2 = varchecker(str2) 674 if vars1 != vars2: 675 # we use counts to compare so we can handle multiple variables 676 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)] 677 # filter variable names we've already seen, so they aren't matched by more than one filter... 678 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2] 679 varnames1.extend(vars1) 680 varnames2.extend(vars2) 681 vars1 = map(redecorate, vars1) 682 vars2 = map(redecorate, vars2) 683 mismatch1.extend(vars1) 684 mismatch2.extend(vars2) 685 if mismatch1: 686 messages.append(u"do not translate: %s" % u", ".join(mismatch1)) 687 elif mismatch2: 688 messages.append(u"translation contains variables not in original: %s" % u", ".join(mismatch2)) 689 if messages and mismatch1: 690 raise SeriousFilterFailure(messages) 691 elif messages: 692 raise FilterFailure(messages) 693 return True
694
695 - def functions(self, str1, str2):
696 """checks that function names are not translated""" 697 return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
698
699 - def emails(self, str1, str2):
700 """checks that emails are not translated""" 701 return helpers.funcmatch(str1, str2, decoration.getemails)
702
703 - def urls(self, str1, str2):
704 """checks that URLs are not translated""" 705 return helpers.funcmatch(str1, str2, decoration.geturls)
706
707 - def numbers(self, str1, str2):
708 """checks whether numbers of various forms are consistent between the two strings""" 709 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
710
711 - def startwhitespace(self, str1, str2):
712 """checks whether whitespace at the beginning of the strings matches""" 713 return helpers.funcmatch(str1, str2, decoration.spacestart)
714
715 - def endwhitespace(self, str1, str2):
716 """checks whether whitespace at the end of the strings matches""" 717 str1 = self.config.lang.punctranslate(str1) 718 return helpers.funcmatch(str1, str2, decoration.spaceend)
719
720 - def startpunc(self, str1, str2):
721 """checks whether punctuation at the beginning of the strings match""" 722 str1 = self.filterxml(self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1)))) 723 str1 = self.config.lang.punctranslate(str1) 724 str2 = self.filterxml(self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2)))) 725 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
726
727 - def endpunc(self, str1, str2):
728 """checks whether punctuation at the end of the strings match""" 729 str1 = self.filtervariables(str1) 730 str1 = self.config.lang.punctranslate(str1) 731 str2 = self.filtervariables(str2) 732 str1 = str1.rstrip() 733 str2 = str2.rstrip() 734 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation + u":")
735
736 - def purepunc(self, str1, str2):
737 """checks that strings that are purely punctuation are not changed""" 738 # this test is a subset of startandend 739 if (decoration.ispurepunctuation(str1)): 740 return str1 == str2 741 else: 742 return not decoration.ispurepunctuation(str2)
743
744 - def brackets(self, str1, str2):
745 """checks that the number of brackets in both strings match""" 746 str1 = self.filtervariables(str1) 747 str2 = self.filtervariables(str2) 748 messages = [] 749 missing = [] 750 extra = [] 751 for bracket in (u"[", u"]", u"{", u"}", u"(", u")"): 752 count1 = str1.count(bracket) 753 count2 = str2.count(bracket) 754 if count2 < count1: 755 missing.append(u"'%s'" % bracket) 756 elif count2 > count1: 757 extra.append(u"'%s'" % bracket) 758 if missing: 759 messages.append(u"translation is missing %s" % u", ".join(missing)) 760 if extra: 761 messages.append(u"translation has extra %s" % u", ".join(extra)) 762 if messages: 763 raise FilterFailure(messages) 764 return True
765
766 - def sentencecount(self, str1, str2):
767 """checks that the number of sentences in both strings match""" 768 str1 = self.filteraccelerators(str1) 769 str2 = self.filteraccelerators(str2) 770 sentences1 = len(self.config.sourcelang.sentences(str1)) 771 sentences2 = len(self.config.lang.sentences(str2)) 772 if not sentences1 == sentences2: 773 raise FilterFailure(u"The number of sentences differ: %d versus %d" % (sentences1, sentences2)) 774 return True
775
776 - def options(self, str1, str2):
777 """checks that options are not translated""" 778 str1 = self.filtervariables(str1) 779 for word1 in str1.split(): 780 if word1 != u"--" and word1.startswith(u"--") and word1[-1].isalnum(): 781 parts = word1.split(u"=") 782 if not parts[0] in str2: 783 raise FilterFailure(u"The option %s does not occur or is translated in the translation." % parts[0]) 784 if len(parts) > 1 and parts[1] in str2: 785 raise FilterFailure(u"The parameter %(param)s in option %(option)s is not translated." % {"param": parts[1], "option": parts[0]}) 786 return True
787
788 - def startcaps(self, str1, str2):
789 """checks that the message starts with the correct capitalisation""" 790 str1 = self.filteraccelerators(str1) 791 str2 = self.filteraccelerators(str2) 792 if len(str1) > 1 and len(str2) > 1: 793 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2) 794 if len(str1) == 0 and len(str2) == 0: 795 return True 796 if len(str1) == 0 or len(str2) == 0: 797 return False 798 return True
799
800 - def simplecaps(self, str1, str2):
801 """checks the capitalisation of two strings isn't wildly different""" 802 str1 = self.removevariables(str1) 803 str2 = self.removevariables(str2) 804 # TODO: review this. The 'I' is specific to English, so it probably serves 805 # no purpose to get sourcelang.sentenceend 806 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, u" i ", str1) 807 capitals1 = helpers.filtercount(str1, unicode.isupper) 808 capitals2 = helpers.filtercount(str2, unicode.isupper) 809 alpha1 = helpers.filtercount(str1, unicode.isalpha) 810 alpha2 = helpers.filtercount(str2, unicode.isalpha) 811 # Capture the all caps case 812 if capitals1 == alpha1: 813 return capitals2 == alpha2 814 # some heuristic tests to try and see that the style of capitals is vaguely the same 815 if capitals1 == 0 or capitals1 == 1: 816 return capitals2 == capitals1 817 elif capitals1 < len(str1) / 10: 818 return capitals2 <= len(str2) / 8 819 elif len(str1) < 10: 820 return abs(capitals1 - capitals2) < 3 821 elif capitals1 > len(str1) * 6 / 10: 822 return capitals2 > len(str2) * 6 / 10 823 else: 824 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
825
826 - def acronyms(self, str1, str2):
827 """checks that acronyms that appear are unchanged""" 828 acronyms = [] 829 allowed = [] 830 for startmatch, endmatch in self.config.varmatches: 831 allowed += decoration.getvariables(startmatch, endmatch)(str1) 832 allowed += self.config.musttranslatewords.keys() 833 str1 = self.filteraccelerators(self.filtervariables(str1)) 834 iter = self.config.lang.word_iter(str1) 835 str2 = self.filteraccelerators(self.filtervariables(str2)) 836 #TODO: strip XML? - should provide better error messsages 837 # see mail/chrome/messanger/smime.properties.po 838 #TODO: consider limiting the word length for recognising acronyms to 839 #something like 5/6 characters 840 for word in iter: 841 if word.isupper() and len(word) > 1 and word not in allowed: 842 if str2.find(word) == -1: 843 acronyms.append(word) 844 if acronyms: 845 raise FilterFailure(u"acronyms should not be translated: " + u", ".join(acronyms)) 846 return True
847
848 - def doublewords(self, str1, str2):
849 """checks for repeated words in the translation""" 850 lastword = "" 851 without_newlines = "\n".join(str2.split("\n")) 852 words = self.filteraccelerators(self.removevariables(without_newlines)).replace(u".", u"").lower().split() 853 for word in words: 854 if word == lastword and word not in self.config.lang.validdoublewords: 855 raise FilterFailure(u"The word '%s' is repeated" % word) 856 lastword = word 857 return True
858
859 - def notranslatewords(self, str1, str2):
860 """checks that words configured as untranslatable appear in the translation too""" 861 if not self.config.notranslatewords: 862 return True 863 str1 = self.filtervariables(str1) 864 str2 = self.filtervariables(str2) 865 #The above is full of strange quotes and things in utf-8 encoding. 866 #single apostrophe perhaps problematic in words like "doesn't" 867 for seperator in self.config.punctuation: 868 str1 = str1.replace(seperator, u" ") 869 str2 = str2.replace(seperator, u" ") 870 words1 = self.filteraccelerators(str1).split() 871 words2 = self.filteraccelerators(str2).split() 872 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 873 if stopwords: 874 raise FilterFailure(u"do not translate: %s" % (u", ".join(stopwords))) 875 return True
876
877 - def musttranslatewords(self, str1, str2):
878 """checks that words configured as definitely translatable don't appear in 879 the translation""" 880 if not self.config.musttranslatewords: 881 return True 882 str1 = self.removevariables(str1) 883 str2 = self.removevariables(str2) 884 #The above is full of strange quotes and things in utf-8 encoding. 885 #single apostrophe perhaps problematic in words like "doesn't" 886 for seperator in self.config.punctuation: 887 str1 = str1.replace(seperator, u" ") 888 str2 = str2.replace(seperator, u" ") 889 words1 = self.filteraccelerators(str1).split() 890 words2 = self.filteraccelerators(str2).split() 891 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2] 892 if stopwords: 893 raise FilterFailure(u"please translate: %s" % (u", ".join(stopwords))) 894 return True
895
896 - def validchars(self, str1, str2):
897 """checks that only characters specified as valid appear in the translation""" 898 if not self.config.validcharsmap: 899 return True 900 invalid1 = str1.translate(self.config.validcharsmap) 901 invalid2 = str2.translate(self.config.validcharsmap) 902 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1] 903 if invalidchars: 904 raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars))) 905 return True
906
907 - def filepaths(self, str1, str2):
908 """checks that file paths have not been translated""" 909 for word1 in self.filteraccelerators(str1).split(): 910 if word1.startswith(u"/"): 911 if not helpers.countsmatch(str1, str2, (word1,)): 912 return False 913 return True
914
915 - def xmltags(self, str1, str2):
916 """checks that XML/HTML tags have not been translated""" 917 tags1 = tag_re.findall(str1) 918 if len(tags1) > 0: 919 if (len(tags1[0]) == len(str1)) and not u"=" in tags1[0]: 920 return True 921 tags2 = tag_re.findall(str2) 922 properties1 = tagproperties(tags1, self.config.ignoretags) 923 properties2 = tagproperties(tags2, self.config.ignoretags) 924 filtered1 = [] 925 filtered2 = [] 926 for property1 in properties1: 927 filtered1 += [intuplelist(property1, self.config.canchangetags)] 928 for property2 in properties2: 929 filtered2 += [intuplelist(property2, self.config.canchangetags)] 930 931 #TODO: consider the consequences of different ordering of attributes/tags 932 if filtered1 != filtered2: 933 return False 934 else: 935 # No tags in str1, let's just check that none were added in str2. This 936 # might be useful for fuzzy strings wrongly unfuzzied, for example. 937 tags2 = tag_re.findall(str2) 938 if len(tags2) > 0: 939 return False 940 return True
941
942 - def kdecomments(self, str1, str2):
943 """checks to ensure that no KDE style comments appear in the translation""" 944 return str2.find(u"\n_:") == -1 and not str2.startswith(u"_:")
945
946 - def compendiumconflicts(self, str1, str2):
947 """checks for Gettext compendium conflicts (#-#-#-#-#)""" 948 return str2.find(u"#-#-#-#-#") == -1
949
950 - def simpleplurals(self, str1, str2):
951 """checks for English style plural(s) for you to review""" 952 def numberofpatterns(string, patterns): 953 number = 0 954 for pattern in patterns: 955 number += len(re.findall(pattern, string)) 956 return number
957 958 sourcepatterns = ["\(s\)"] 959 targetpatterns = ["\(s\)"] 960 sourcecount = numberofpatterns(str1, sourcepatterns) 961 targetcount = numberofpatterns(str2, targetpatterns) 962 if self.config.lang.nplurals == 1: 963 return not targetcount 964 return sourcecount == targetcount
965
966 - def spellcheck(self, str1, str2):
967 """checks words that don't pass a spell check""" 968 if not self.config.targetlanguage: 969 return True 970 if not spelling.available: 971 return True 972 # TODO: filterxml? 973 str1 = self.filteraccelerators_by_list(self.filtervariables(str1), self.config.sourcelang.validaccel) 974 str2 = self.filteraccelerators_by_list(self.filtervariables(str2), self.config.lang.validaccel) 975 ignore1 = [] 976 messages = [] 977 for word, index, suggestions in spelling.check(str1, lang="en"): 978 ignore1.append(word) 979 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage): 980 if word in self.config.notranslatewords: 981 continue 982 if word in ignore1: 983 continue 984 # hack to ignore hyphenisation rules 985 if word in suggestions: 986 continue 987 messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions[:5]))) 988 if messages: 989 raise FilterFailure(messages) 990 return True
991
992 - def credits(self, str1, str2):
993 """checks for messages containing translation credits instead of normal translations.""" 994 return not str1 in self.config.credit_sources
995 996 # If the precondition filter is run and fails then the other tests listed are ignored 997 preconditions = {"untranslated": ("simplecaps", "variables", "startcaps", 998 "accelerators", "brackets", "endpunc", 999 "acronyms", "xmltags", "startpunc", 1000 "endwhitespace", "startwhitespace", 1001 "escapes", "doublequoting", "singlequoting", 1002 "filepaths", "purepunc", "doublespacing", 1003 "sentencecount", "numbers", "isfuzzy", 1004 "isreview", "notranslatewords", "musttranslatewords", 1005 "emails", "simpleplurals", "urls", "printf", 1006 "tabs", "newlines", "functions", "options", 1007 "blank", "nplurals", "gconf"), 1008 "blank": ("simplecaps", "variables", "startcaps", 1009 "accelerators", "brackets", "endpunc", 1010 "acronyms", "xmltags", "startpunc", 1011 "endwhitespace", "startwhitespace", 1012 "escapes", "doublequoting", "singlequoting", 1013 "filepaths", "purepunc", "doublespacing", 1014 "sentencecount", "numbers", "isfuzzy", 1015 "isreview", "notranslatewords", "musttranslatewords", 1016 "emails", "simpleplurals", "urls", "printf", 1017 "tabs", "newlines", "functions", "options", 1018 "gconf"), 1019 "credits": ("simplecaps", "variables", "startcaps", 1020 "accelerators", "brackets", "endpunc", 1021 "acronyms", "xmltags", "startpunc", 1022 "escapes", "doublequoting", "singlequoting", 1023 "filepaths", "doublespacing", 1024 "sentencecount", "numbers", 1025 "emails", "simpleplurals", "urls", "printf", 1026 "tabs", "newlines", "functions", "options"), 1027 "purepunc": ("startcaps", "options"), 1028 # This is causing some problems since Python 2.6, as 1029 # startcaps is now seen as an important one to always execute 1030 # and could now be done before it is blocked by a failing 1031 # "untranslated" or "blank" test. This is probably happening 1032 # due to slightly different implementation of the internal 1033 # dict handling since Python 2.6. We should never have relied 1034 # on this ordering anyway. 1035 #"startcaps": ("simplecaps",), 1036 "endwhitespace": ("endpunc",), 1037 "startwhitespace":("startpunc",), 1038 "unchanged": ("doublewords",), 1039 "compendiumconflicts": ("accelerators", "brackets", "escapes", 1040 "numbers", "startpunc", "long", "variables", 1041 "startcaps", "sentencecount", "simplecaps", 1042 "doublespacing", "endpunc", "xmltags", 1043 "startwhitespace", "endwhitespace", 1044 "singlequoting", "doublequoting", 1045 "filepaths", "purepunc", "doublewords", "printf") } 1046 1047 # code to actually run the tests (use unittest?) 1048 1049 openofficeconfig = CheckerConfig( 1050 accelmarkers = ["~"], 1051 varmatches = [("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)], 1052 ignoretags = [("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), ("img", "width", None), ("img", "height", None)], 1053 canchangetags = [("link", "name", None)] 1054 ) 1055
1056 -class OpenOfficeChecker(StandardChecker):
1057 - def __init__(self, **kwargs):
1058 checkerconfig = kwargs.get("checkerconfig", None) 1059 if checkerconfig is None: 1060 checkerconfig = CheckerConfig() 1061 kwargs["checkerconfig"] = checkerconfig 1062 checkerconfig.update(openofficeconfig) 1063 StandardChecker.__init__(self, **kwargs)
1064 1065 mozillaconfig = CheckerConfig( 1066 accelmarkers = ["&"], 1067 varmatches = [("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), ("#", 1), ("${", "}"), ("$(^", ")")], 1068 criticaltests = ["accelerators"] 1069 ) 1070
1071 -class MozillaChecker(StandardChecker):
1072 - def __init__(self, **kwargs):
1073 checkerconfig = kwargs.get("checkerconfig", None) 1074 if checkerconfig is None: 1075 checkerconfig = CheckerConfig() 1076 kwargs["checkerconfig"] = checkerconfig 1077 checkerconfig.update(mozillaconfig) 1078 StandardChecker.__init__(self, **kwargs)
1079
1080 - def credits(self, str1, str2):
1081 """checks for messages containing translation credits instead of normal translations.""" 1082 for location in self.locations: 1083 if location in ['MOZ_LANGPACK_CONTRIBUTORS', 'credit.translation']: 1084 return False 1085 return True
1086 1087 drupalconfig = CheckerConfig( 1088 varmatches = [("%", None), ("@", None), ("!", None)], 1089 ) 1090
1091 -class DrupalChecker(StandardChecker):
1092 - def __init__(self, **kwargs):
1093 checkerconfig = kwargs.get("checkerconfig", None) 1094 if checkerconfig is None: 1095 checkerconfig = CheckerConfig() 1096 kwargs["checkerconfig"] = checkerconfig 1097 checkerconfig.update(drupalconfig) 1098 StandardChecker.__init__(self, **kwargs)
1099 1100 gnomeconfig = CheckerConfig( 1101 accelmarkers = ["_"], 1102 varmatches = [("%", 1), ("$(", ")")], 1103 credit_sources = [u"translator-credits"] 1104 ) 1105
1106 -class GnomeChecker(StandardChecker):
1107 - def __init__(self, **kwargs):
1108 checkerconfig = kwargs.get("checkerconfig", None) 1109 if checkerconfig is None: 1110 checkerconfig = CheckerConfig() 1111 kwargs["checkerconfig"] = checkerconfig 1112 checkerconfig.update(gnomeconfig) 1113 StandardChecker.__init__(self, **kwargs)
1114
1115 - def gconf(self, str1, str2):
1116 """Checks if we have any gconf config settings translated.""" 1117 for location in self.locations: 1118 if location.find('schemas.in') != -1: 1119 gconf_attributes = gconf_attribute_re.findall(str1) 1120 #stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 1121 stopwords = [word for word in gconf_attributes if word[1:-1] not in str2] 1122 if stopwords: 1123 raise FilterFailure(u"do not translate gconf attribute: %s" % (u", ".join(stopwords))) 1124 return True
1125 1126 kdeconfig = CheckerConfig( 1127 accelmarkers = ["&"], 1128 varmatches = [("%", 1)], 1129 credit_sources = [u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"] 1130 ) 1131
1132 -class KdeChecker(StandardChecker):
1133 - def __init__(self, **kwargs):
1134 # TODO allow setup of KDE plural and translator comments so that they do 1135 # not create false postives 1136 checkerconfig = kwargs.get("checkerconfig", None) 1137 if checkerconfig is None: 1138 checkerconfig = CheckerConfig() 1139 kwargs["checkerconfig"] = checkerconfig 1140 checkerconfig.update(kdeconfig) 1141 StandardChecker.__init__(self, **kwargs)
1142 1143 cclicenseconfig = CheckerConfig(varmatches = [("@", "@")])
1144 -class CCLicenseChecker(StandardChecker):
1145 - def __init__(self, **kwargs):
1146 checkerconfig = kwargs.get("checkerconfig", None) 1147 if checkerconfig is None: 1148 checkerconfig = CheckerConfig() 1149 kwargs["checkerconfig"] = checkerconfig 1150 checkerconfig.update(cclicenseconfig) 1151 StandardChecker.__init__(self, **kwargs)
1152 1153 projectcheckers = { 1154 "openoffice": OpenOfficeChecker, 1155 "mozilla": MozillaChecker, 1156 "kde": KdeChecker, 1157 "wx": KdeChecker, 1158 "gnome": GnomeChecker, 1159 "creativecommons": CCLicenseChecker, 1160 "drupal": DrupalChecker, 1161 } 1162 1163
1164 -class StandardUnitChecker(UnitChecker):
1165 """The standard checks for common checks on translation units."""
1166 - def isfuzzy(self, unit):
1167 """Check if the unit has been marked fuzzy.""" 1168 return not unit.isfuzzy()
1169
1170 - def isreview(self, unit):
1171 """Check if the unit has been marked review.""" 1172 return not unit.isreview()
1173
1174 - def nplurals(self, unit):
1175 """Checks for the correct number of noun forms for plural translations.""" 1176 if unit.hasplural(): 1177 # if we don't have a valid nplurals value, don't run the test 1178 nplurals = self.config.lang.nplurals 1179 if nplurals > 0: 1180 return len(unit.target.strings) == nplurals 1181 return True
1182
1183 - def hassuggestion(self, unit):
1184 """Checks if there is at least one suggested translation for this unit.""" 1185 self.suggestion_store = getattr(self, 'suggestion_store', None) 1186 suggestions = [] 1187 if self.suggestion_store: 1188 suggestions = self.suggestion_store.findunits(unit.source) 1189 elif xliff and isinstance(unit, xliff.xliffunit): 1190 # TODO: we probably want to filter them somehow 1191 suggestions = unit.getalttrans() 1192 return not bool(suggestions)
1193 1194
1195 -def runtests(str1, str2, ignorelist=()):
1196 """verifies that the tests pass for a pair of strings""" 1197 from translate.storage import base 1198 str1 = data.normalized_unicode(str1) 1199 str2 = data.normalized_unicode(str2) 1200 unit = base.TranslationUnit(str1) 1201 unit.target = str2 1202 checker = StandardChecker(excludefilters=ignorelist) 1203 failures = checker.run_filters(unit) 1204 for test in failures: 1205 print "failure: %s: %s\n %r\n %r" % (test, failures[test], str1, str2) 1206 return failures
1207
1208 -def batchruntests(pairs):
1209 """runs test on a batch of string pairs""" 1210 passed, numpairs = 0, len(pairs) 1211 for str1, str2 in pairs: 1212 if runtests(str1, str2): 1213 passed += 1 1214 print 1215 print "total: %d/%d pairs passed" % (passed, numpairs)
1216 1217 if __name__ == '__main__': 1218 testset = [(r"simple", r"somple"), 1219 (r"\this equals \that", r"does \this equal \that?"), 1220 (r"this \'equals\' that", r"this 'equals' that"), 1221 (r" start and end! they must match.", r"start and end! they must match."), 1222 (r"check for matching %variables marked like %this", r"%this %variable is marked"), 1223 (r"check for mismatching %variables marked like %this", r"%that %variable is marked"), 1224 (r"check for mismatching %variables% too", r"how many %variable% are marked"), 1225 (r"%% %%", r"%%"), 1226 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"), 1227 (r"simple lowercase", r"it is all lowercase"), 1228 (r"simple lowercase", r"It Is All Lowercase"), 1229 (r"Simple First Letter Capitals", r"First Letters"), 1230 (r"SIMPLE CAPITALS", r"First Letters"), 1231 (r"SIMPLE CAPITALS", r"ALL CAPITALS"), 1232 (r"forgot to translate", r" ") 1233 ] 1234 batchruntests(testset) 1235