Package translate :: Package filters :: Module checks
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.checks

   1  #!/usr/bin/env python 
   2  # -*- coding: utf-8 -*- 
   3  #  
   4  # Copyright 2004-2008 Zuza Software Foundation 
   5  #  
   6  # This file is part of translate. 
   7  # 
   8  # translate is free software; you can redistribute it and/or modify 
   9  # it under the terms of the GNU General Public License as published by 
  10  # the Free Software Foundation; either version 2 of the License, or 
  11  # (at your option) any later version. 
  12  #  
  13  # translate is distributed in the hope that it will be useful, 
  14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  16  # GNU General Public License for more details. 
  17  # 
  18  # You should have received a copy of the GNU General Public License 
  19  # along with translate; if not, write to the Free Software 
  20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
  21   
  22  """This is a set of validation checks that can be performed on translation  
  23  units. 
  24   
  25  Derivatives of UnitChecker (like StandardUnitChecker) check translation units, 
  26  and derivatives of TranslationChecker (like StandardChecker) check  
  27  (source, target) translation pairs. 
  28   
  29  When adding a new test here, please document and explain the behaviour on the  
  30  U{wiki <http://translate.sourceforge.net/wiki/toolkit/pofilter_tests>}. 
  31  """ 
  32   
  33  from translate.filters import helpers 
  34  from translate.filters import decoration 
  35  from translate.filters import prefilters 
  36  from translate.filters import spelling 
  37  from translate.lang import factory 
  38  from translate.lang import data 
  39  # The import of xliff could fail if the user doesn't have lxml installed. For 
  40  # now we try to continue gracefully to help users who aren't interested in  
  41  # support for XLIFF or other XML formats. 
  42  try: 
  43      from translate.storage import xliff 
  44  except ImportError, e: 
  45      xliff = None 
  46  # The import of xliff fail silently in the absence of lxml if another module 
  47  # already tried to import it unsuccessfully, so let's make 100% sure: 
  48  if not hasattr(xliff, "xliffunit"): 
  49      xliff = None 
  50  import re 
  51   
  52  # These are some regular expressions that are compiled for use in some tests 
  53   
  54  # printf syntax based on http://en.wikipedia.org/wiki/Printf which doens't cover everything we leave \w instead of specifying the exact letters as 
  55  # this should capture printf types defined in other platforms. 
  56  # extended to support Python named format specifiers 
  57  printf_pat = re.compile('%((?:(?P<ord>\d+)\$|\((?P<key>\w+)\))?(?P<fullvar>[+#-]*(?:\d+)?(?:\.\d+)?(hh\|h\|l\|ll)?(?P<type>[\w%])))') 
  58   
  59  # The name of the XML tag 
  60  tagname_re = re.compile("<[\s]*([\w\/]*)") 
  61   
  62  # We allow escaped quotes, probably for old escaping style of OOo helpcontent 
  63  #TODO: remove escaped strings once usage is audited 
  64  property_re = re.compile(" (\w*)=((\\\\?\".*?\\\\?\")|(\\\\?'.*?\\\\?'))") 
  65   
  66  # The whole tag 
  67  tag_re = re.compile("<[^>]+>") 
  68   
  69  gconf_attribute_re = re.compile('"[a-z_]+?"') 
  70   
71 -def tagname(string):
72 """Returns the name of the XML/HTML tag in string""" 73 return tagname_re.match(string).groups(1)[0]
74
75 -def intuplelist(pair, list):
76 """Tests to see if pair == (a,b,c) is in list, but handles None entries in 77 list as wildcards (only allowed in positions "a" and "c"). We take a shortcut 78 by only considering "c" if "b" has already matched.""" 79 a, b, c = pair 80 if (b, c) == (None, None): 81 #This is a tagname 82 return pair 83 for pattern in list: 84 x, y, z = pattern 85 if (x, y) in [(a, b), (None, b)]: 86 if z in [None, c]: 87 return pattern 88 return pair
89
90 -def tagproperties(strings, ignore):
91 """Returns all the properties in the XML/HTML tag string as 92 (tagname, propertyname, propertyvalue), but ignore those combinations 93 specified in ignore.""" 94 properties = [] 95 for string in strings: 96 tag = tagname(string) 97 properties += [(tag, None, None)] 98 #Now we isolate the attribute pairs. 99 pairs = property_re.findall(string) 100 for property, value, a, b in pairs: 101 #Strip the quotes: 102 value = value[1:-1] 103 104 canignore = False 105 if (tag, property, value) in ignore or \ 106 intuplelist((tag,property,value), ignore) != (tag,property,value): 107 canignore = True 108 break 109 if not canignore: 110 properties += [(tag, property, value)] 111 return properties
112 113
114 -class FilterFailure(Exception):
115 """This exception signals that a Filter didn't pass, and gives an explanation 116 or a comment"""
117 - def __init__(self, messages):
118 if not isinstance(messages, list): 119 messages = [messages] 120 assert isinstance(messages[0], unicode) # Assumption: all of same type 121 joined = u", ".join(messages) 122 Exception.__init__(self, joined) 123 # Python 2.3 doesn't have .args 124 if not hasattr(self, "args"): 125 self.args = joined
126
127 -class SeriousFilterFailure(FilterFailure):
128 """This exception signals that a Filter didn't pass, and the bad translation 129 might break an application (so the string will be marked fuzzy)""" 130 pass
131 132 #(tag, attribute, value) specifies a certain attribute which can be changed/ 133 #ignored if it exists inside tag. In the case where there is a third element 134 #in the tuple, it indicates a property value that can be ignored if present 135 #(like defaults, for example) 136 #If a certain item is None, it indicates that it is relevant for all values of 137 #the property/tag that is specified as None. A non-None value of "value" 138 #indicates that the value of the attribute must be taken into account. 139 common_ignoretags = [(None, "xml-lang", None)] 140 common_canchangetags = [("img", "alt", None)] 141
142 -class CheckerConfig(object):
143 """object representing the configuration of a checker"""
144 - def __init__(self, targetlanguage=None, accelmarkers=None, varmatches=None, 145 notranslatewords=None, musttranslatewords=None, validchars=None, 146 punctuation=None, endpunctuation=None, ignoretags=None, 147 canchangetags=None, criticaltests=None, credit_sources=None):
148 # Init lists 149 self.accelmarkers = self._init_list(accelmarkers) 150 self.varmatches = self._init_list(varmatches) 151 self.criticaltests = self._init_list(criticaltests) 152 self.credit_sources = self._init_list(credit_sources) 153 # Lang data 154 self.targetlanguage = targetlanguage 155 self.updatetargetlanguage(targetlanguage) 156 self.sourcelang = factory.getlanguage('en') 157 # Inits with default values 158 self.punctuation = self._init_default(data.normalized_unicode(punctuation), self.lang.punctuation) 159 self.endpunctuation = self._init_default(data.normalized_unicode(endpunctuation), self.lang.sentenceend) 160 self.ignoretags = self._init_default(ignoretags, common_ignoretags) 161 self.canchangetags = self._init_default(canchangetags, common_canchangetags) 162 # Other data 163 # TODO: allow user configuration of untranslatable words 164 self.notranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(notranslatewords)]) 165 self.musttranslatewords = dict.fromkeys([data.normalized_unicode(key) for key in self._init_list(musttranslatewords)]) 166 validchars = data.normalized_unicode(validchars) 167 self.validcharsmap = {} 168 self.updatevalidchars(validchars)
169
170 - def _init_list(self, list):
171 """initialise configuration paramaters that are lists 172 173 @type list: List 174 @param list: None (we'll initialise a blank list) or a list paramater 175 @rtype: List 176 """ 177 if list is None: 178 list = [] 179 return list
180
181 - def _init_default(self, param, default):
182 """initialise parameters that can have default options 183 184 @param param: the user supplied paramater value 185 @param default: default values when param is not specified 186 @return: the paramater as specified by the user of the default settings 187 """ 188 if param is None: 189 return default 190 return param
191
192 - def update(self, otherconfig):
193 """combines the info in otherconfig into this config object""" 194 self.targetlanguage = otherconfig.targetlanguage or self.targetlanguage 195 self.updatetargetlanguage(self.targetlanguage) 196 self.accelmarkers.extend([c for c in otherconfig.accelmarkers if not c in self.accelmarkers]) 197 self.varmatches.extend(otherconfig.varmatches) 198 self.notranslatewords.update(otherconfig.notranslatewords) 199 self.musttranslatewords.update(otherconfig.musttranslatewords) 200 self.validcharsmap.update(otherconfig.validcharsmap) 201 self.punctuation += otherconfig.punctuation 202 self.endpunctuation += otherconfig.endpunctuation 203 #TODO: consider also updating in the following cases: 204 self.ignoretags = otherconfig.ignoretags 205 self.canchangetags = otherconfig.canchangetags 206 self.criticaltests.extend(otherconfig.criticaltests) 207 self.credit_sources = otherconfig.credit_sources
208
209 - def updatevalidchars(self, validchars):
210 """updates the map that eliminates valid characters""" 211 if validchars is None: 212 return True 213 validcharsmap = dict([(ord(validchar), None) for validchar in data.normalized_unicode(validchars)]) 214 self.validcharsmap.update(validcharsmap)
215
216 - def updatetargetlanguage(self, langcode):
217 """Updates the target language in the config to the given target language""" 218 self.lang = factory.getlanguage(langcode)
219
220 -def cache_results(f):
221 def cached_f(self, param1): 222 key = (f.__name__, param1) 223 res_cache = self.results_cache 224 if key in res_cache: 225 return res_cache[key] 226 else: 227 value = f(self, param1) 228 res_cache[key] = value 229 return value
230 return cached_f 231
232 -class UnitChecker(object):
233 """Parent Checker class which does the checking based on functions available 234 in derived classes.""" 235 preconditions = {} 236
237 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
238 self.errorhandler = errorhandler 239 if checkerconfig is None: 240 self.setconfig(CheckerConfig()) 241 else: 242 self.setconfig(checkerconfig) 243 # exclude functions defined in UnitChecker from being treated as tests... 244 self.helperfunctions = {} 245 for functionname in dir(UnitChecker): 246 function = getattr(self, functionname) 247 if callable(function): 248 self.helperfunctions[functionname] = function 249 self.defaultfilters = self.getfilters(excludefilters, limitfilters) 250 251 self.results_cache = {}
252
253 - def getfilters(self, excludefilters=None, limitfilters=None):
254 """returns dictionary of available filters, including/excluding those in 255 the given lists""" 256 filters = {} 257 if limitfilters is None: 258 # use everything available unless instructed 259 limitfilters = dir(self) 260 if excludefilters is None: 261 excludefilters = {} 262 for functionname in limitfilters: 263 if functionname in excludefilters: continue 264 if functionname in self.helperfunctions: continue 265 if functionname == "errorhandler": continue 266 filterfunction = getattr(self, functionname, None) 267 if not callable(filterfunction): continue 268 filters[functionname] = filterfunction 269 return filters
270
271 - def setconfig(self, config):
272 """sets the accelerator list""" 273 self.config = config 274 self.accfilters = [prefilters.filteraccelerators(accelmarker) for accelmarker in self.config.accelmarkers] 275 self.varfilters = [prefilters.filtervariables(startmatch, endmatch, prefilters.varname) 276 for startmatch, endmatch in self.config.varmatches] 277 self.removevarfilter = [prefilters.filtervariables(startmatch, endmatch, prefilters.varnone) 278 for startmatch, endmatch in self.config.varmatches]
279
280 - def setsuggestionstore(self, store):
281 """Sets the filename that a checker should use for evaluating suggestions.""" 282 self.suggestion_store = store 283 if self.suggestion_store: 284 self.suggestion_store.require_index()
285
286 - def filtervariables(self, str1):
287 """filter out variables from str1""" 288 return helpers.multifilter(str1, self.varfilters)
289 filtervariables = cache_results(filtervariables) 290
291 - def removevariables(self, str1):
292 """remove variables from str1""" 293 return helpers.multifilter(str1, self.removevarfilter)
294 removevariables = cache_results(removevariables) 295
296 - def filteraccelerators(self, str1):
297 """filter out accelerators from str1""" 298 return helpers.multifilter(str1, self.accfilters, None)
299 filteraccelerators = cache_results(filteraccelerators) 300
301 - def filteraccelerators_by_list(self, str1, acceptlist=None):
302 """filter out accelerators from str1""" 303 return helpers.multifilter(str1, self.accfilters, acceptlist)
304
305 - def filterwordswithpunctuation(self, str1):
306 """replaces words with punctuation with their unpunctuated equivalents""" 307 return prefilters.filterwordswithpunctuation(str1)
308 filterwordswithpunctuation = cache_results(filterwordswithpunctuation) 309
310 - def filterxml(self, str1):
311 """filter out XML from the string so only text remains""" 312 return tag_re.sub("", str1)
313 filterxml = cache_results(filterxml) 314
315 - def run_test(self, test, unit):
316 """Runs the given test on the given unit. 317 318 Note that this can raise a FilterFailure as part of normal operation""" 319 return test(unit)
320
321 - def run_filters(self, unit):
322 """run all the tests in this suite, return failures as testname, message_or_exception""" 323 self.results_cache = {} 324 failures = {} 325 ignores = self.config.lang.ignoretests[:] 326 functionnames = self.defaultfilters.keys() 327 priorityfunctionnames = self.preconditions.keys() 328 otherfunctionnames = filter(lambda functionname: functionname not in self.preconditions, functionnames) 329 for functionname in priorityfunctionnames + otherfunctionnames: 330 if functionname in ignores: 331 continue 332 filterfunction = getattr(self, functionname, None) 333 # this filterfunction may only be defined on another checker if using TeeChecker 334 if filterfunction is None: 335 continue 336 filtermessage = filterfunction.__doc__ 337 try: 338 filterresult = self.run_test(filterfunction, unit) 339 except FilterFailure, e: 340 filterresult = False 341 filtermessage = e.args[0] 342 except Exception, e: 343 if self.errorhandler is None: 344 raise ValueError("error in filter %s: %r, %r, %s" % \ 345 (functionname, unit.source, unit.target, e)) 346 else: 347 filterresult = self.errorhandler(functionname, unit.source, unit.target, e) 348 if not filterresult: 349 # we test some preconditions that aren't actually a cause for failure 350 if functionname in self.defaultfilters: 351 failures[functionname] = filtermessage 352 if functionname in self.preconditions: 353 for ignoredfunctionname in self.preconditions[functionname]: 354 ignores.append(ignoredfunctionname) 355 self.results_cache = {} 356 return failures
357
358 -class TranslationChecker(UnitChecker):
359 """A checker that passes source and target strings to the checks, not the 360 whole unit. 361 362 This provides some speedup and simplifies testing."""
363 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, errorhandler=None):
364 super(TranslationChecker, self).__init__(checkerconfig, excludefilters, limitfilters, errorhandler)
365
366 - def run_test(self, test, unit):
367 """Runs the given test on the given unit. 368 369 Note that this can raise a FilterFailure as part of normal operation.""" 370 if self.hasplural: 371 filtermessages = [] 372 filterresult = True 373 for pluralform in unit.target.strings: 374 try: 375 if not test(self.str1, unicode(pluralform)): 376 filterresult = False 377 except FilterFailure, e: 378 filterresult = False 379 filtermessages.append( unicode(e.args) ) 380 if not filterresult and filtermessages: 381 raise FilterFailure(filtermessages) 382 else: 383 return filterresult 384 else: 385 return test(self.str1, self.str2)
386
387 - def run_filters(self, unit):
388 """Do some optimisation by caching some data of the unit for the benefit 389 of run_test().""" 390 self.str1 = data.normalized_unicode(unit.source) 391 self.str2 = data.normalized_unicode(unit.target) 392 self.hasplural = unit.hasplural() 393 self.locations = unit.getlocations() 394 return super(TranslationChecker, self).run_filters(unit)
395
396 -class TeeChecker:
397 """A Checker that controls multiple checkers."""
398 - def __init__(self, checkerconfig=None, excludefilters=None, limitfilters=None, 399 checkerclasses=None, errorhandler=None, languagecode=None):
400 """construct a TeeChecker from the given checkers""" 401 self.limitfilters = limitfilters 402 if checkerclasses is None: 403 checkerclasses = [StandardChecker] 404 self.checkers = [checkerclass(checkerconfig=checkerconfig, excludefilters=excludefilters, limitfilters=limitfilters, errorhandler=errorhandler) for checkerclass in checkerclasses] 405 if languagecode: 406 for checker in self.checkers: 407 checker.config.updatetargetlanguage(languagecode) 408 # Let's hook up the language specific checker 409 lang_checker = self.checkers[0].config.lang.checker 410 if lang_checker: 411 self.checkers.append(lang_checker) 412 413 self.combinedfilters = self.getfilters(excludefilters, limitfilters) 414 self.config = checkerconfig or self.checkers[0].config
415
416 - def getfilters(self, excludefilters=None, limitfilters=None):
417 """returns dictionary of available filters, including/excluding those in 418 the given lists""" 419 if excludefilters is None: 420 excludefilters = {} 421 filterslist = [checker.getfilters(excludefilters, limitfilters) for checker in self.checkers] 422 self.combinedfilters = {} 423 for filters in filterslist: 424 self.combinedfilters.update(filters) 425 # TODO: move this somewhere more sensible (a checkfilters method?) 426 if limitfilters is not None: 427 for filtername in limitfilters: 428 if not filtername in self.combinedfilters: 429 import sys 430 print >> sys.stderr, "warning: could not find filter %s" % filtername 431 return self.combinedfilters
432
433 - def run_filters(self, unit):
434 """run all the tests in the checker's suites""" 435 failures = {} 436 for checker in self.checkers: 437 failures.update(checker.run_filters(unit)) 438 return failures
439
440 - def setsuggestionstore(self, store):
441 """Sets the filename that a checker should use for evaluating suggestions.""" 442 for checker in self.checkers: 443 checker.setsuggestionstore(store)
444 445
446 -class StandardChecker(TranslationChecker):
447 """The basic test suite for source -> target translations."""
448 - def untranslated(self, str1, str2):
449 """checks whether a string has been translated at all""" 450 str2 = prefilters.removekdecomments(str2) 451 return not (len(str1.strip()) > 0 and len(str2) == 0)
452
453 - def unchanged(self, str1, str2):
454 """checks whether a translation is basically identical to the original string""" 455 str1 = self.filteraccelerators(self.removevariables(str1)).strip() 456 str2 = self.filteraccelerators(self.removevariables(str2)).strip() 457 if len(str1) < 2: 458 return True 459 # If the whole string is upperase, or nothing in the string can go 460 # towards uppercase, let's assume there is nothing translatable 461 # TODO: reconsider 462 if (str1.isupper() or str1.upper() == str1) and str1 == str2: 463 return True 464 if self.config.notranslatewords: 465 words1 = str1.split() 466 if len(words1) == 1 and [word for word in words1 if word in self.config.notranslatewords]: 467 #currently equivalent to: 468 # if len(words1) == 1 and words1[0] in self.config.notranslatewords: 469 #why do we only test for one notranslate word? 470 return True 471 # we could also check for things like str1.isnumeric(), but the test 472 # above (str1.upper() == str1) makes this unnecessary 473 if str1.lower() == str2.lower(): 474 raise FilterFailure(u"please translate") 475 return True
476
477 - def blank(self, str1, str2):
478 """checks whether a translation only contains spaces""" 479 len1 = len(str1.strip()) 480 len2 = len(str2.strip()) 481 return not (len1 > 0 and len(str2) != 0 and len2 == 0)
482
483 - def short(self, str1, str2):
484 """checks whether a translation is much shorter than the original string""" 485 len1 = len(str1.strip()) 486 len2 = len(str2.strip()) 487 return not ((len1 > 0) and (0 < len2 < (len1 * 0.1)) or ((len1 > 1) and (len2 == 1)))
488
489 - def long(self, str1, str2):
490 """checks whether a translation is much longer than the original string""" 491 len1 = len(str1.strip()) 492 len2 = len(str2.strip()) 493 return not ((len1 > 0) and (0 < len1 < (len2 * 0.1)) or ((len1 == 1) and (len2 > 1)))
494
495 - def escapes(self, str1, str2):
496 """checks whether escaping is consistent between the two strings""" 497 if not helpers.countsmatch(str1, str2, (u"\\", u"\\\\")): 498 escapes1 = u", ".join([u"'%s'" % word for word in str1.split() if u"\\" in word]) 499 escapes2 = u", ".join([u"'%s'" % word for word in str2.split() if u"\\" in word]) 500 raise SeriousFilterFailure(u"escapes in original (%s) don't match escapes in translation (%s)" % (escapes1, escapes2)) 501 else: 502 return True
503
504 - def newlines(self, str1, str2):
505 """checks whether newlines are consistent between the two strings""" 506 if not helpers.countsmatch(str1, str2, (u"\n", u"\r")): 507 raise FilterFailure(u"line endings in original don't match line endings in translation") 508 else: 509 return True
510
511 - def tabs(self, str1, str2):
512 """checks whether tabs are consistent between the two strings""" 513 if not helpers.countmatch(str1, str2, "\t"): 514 raise SeriousFilterFailure(u"tabs in original don't match tabs in translation") 515 else: 516 return True
517
518 - def singlequoting(self, str1, str2):
519 """checks whether singlequoting is consistent between the two strings""" 520 str1 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str1))) 521 str2 = self.filterwordswithpunctuation(self.filteraccelerators(self.filtervariables(str2))) 522 return helpers.countsmatch(str1, str2, (u"'", u"''", u"\\'"))
523
524 - def doublequoting(self, str1, str2):
525 """checks whether doublequoting is consistent between the two strings""" 526 str1 = self.filteraccelerators(self.filtervariables(str1)) 527 str1 = self.filterxml(str1) 528 str1 = self.config.lang.punctranslate(str1) 529 str2 = self.filteraccelerators(self.filtervariables(str2)) 530 str2 = self.filterxml(str2) 531 return helpers.countsmatch(str1, str2, (u'"', u'""', u'\\"', u"«", u"»", u"“", u"”"))
532
533 - def doublespacing(self, str1, str2):
534 """checks for bad double-spaces by comparing to original""" 535 str1 = self.filteraccelerators(str1) 536 str2 = self.filteraccelerators(str2) 537 return helpers.countmatch(str1, str2, u" ")
538
539 - def puncspacing(self, str1, str2):
540 """checks for bad spacing after punctuation""" 541 if str1.find(u" ") == -1: 542 return True 543 str1 = self.filteraccelerators(self.filtervariables(str1)) 544 str1 = self.config.lang.punctranslate(str1) 545 str2 = self.filteraccelerators(self.filtervariables(str2)) 546 for puncchar in self.config.punctuation: 547 plaincount1 = str1.count(puncchar) 548 plaincount2 = str2.count(puncchar) 549 if not plaincount1 or plaincount1 != plaincount2: 550 continue 551 spacecount1 = str1.count(puncchar + u" ") 552 spacecount2 = str2.count(puncchar + u" ") 553 if spacecount1 != spacecount2: 554 # handle extra spaces that are because of transposed punctuation 555 if str1.endswith(puncchar) != str2.endswith(puncchar) and abs(spacecount1-spacecount2) == 1: 556 continue 557 return False 558 return True
559
560 - def printf(self, str1, str2):
561 """checks whether printf format strings match""" 562 count1 = count2 = plural = None 563 # self.hasplural only set by run_filters, not always available 564 if 'hasplural' in self.__dict__: 565 plural = self.hasplural 566 for var_num2, match2 in enumerate(printf_pat.finditer(str2)): 567 count2 = var_num2 + 1 568 str2key = match2.group('key') 569 if match2.group('ord'): 570 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 571 count1 = var_num1 + 1 572 if int(match2.group('ord')) == var_num1 + 1: 573 if match2.group('fullvar') != match1.group('fullvar'): 574 return 0 575 elif str2key: 576 str1key = None 577 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 578 count1 = var_num1 + 1 579 if match1.group('key') and str2key == match1.group('key'): 580 str1key = match1.group('key') 581 # '%.0s' "placeholder" in plural will match anything 582 if plural and match2.group('fullvar') == '.0s': 583 continue 584 if match1.group('fullvar') != match2.group('fullvar'): 585 return 0 586 if str1key == None: 587 return 0 588 else: 589 for var_num1, match1 in enumerate(printf_pat.finditer(str1)): 590 count1 = var_num1 + 1 591 # '%.0s' "placeholder" in plural will match anything 592 if plural and match2.group('fullvar') == '.0s': 593 continue 594 if (var_num1 == var_num2) and (match1.group('fullvar') != match2.group('fullvar')): 595 return 0 596 597 if count2 is None: 598 if list(printf_pat.finditer(str1)): 599 return 0 600 601 if (count1 or count2) and (count1 != count2): 602 return 0 603 return 1
604
605 - def accelerators(self, str1, str2):
606 """checks whether accelerators are consistent between the two strings""" 607 str1 = self.filtervariables(str1) 608 str2 = self.filtervariables(str2) 609 messages = [] 610 for accelmarker in self.config.accelmarkers: 611 counter1 = decoration.countaccelerators(accelmarker, self.config.sourcelang.validaccel) 612 counter2 = decoration.countaccelerators(accelmarker, self.config.lang.validaccel) 613 count1, countbad1 = counter1(str1) 614 count2, countbad2 = counter2(str2) 615 getaccel = decoration.getaccelerators(accelmarker, self.config.lang.validaccel) 616 accel2, bad2 = getaccel(str2) 617 if count1 == count2: 618 continue 619 if count1 == 1 and count2 == 0: 620 if countbad2 == 1: 621 messages.append(u"accelerator %s appears before an invalid accelerator character '%s' (eg. space)" % (accelmarker, bad2[0])) 622 else: 623 messages.append(u"accelerator %s is missing from translation" % accelmarker) 624 elif count1 == 0: 625 messages.append(u"accelerator %s does not occur in original and should not be in translation" % accelmarker) 626 elif count1 == 1 and count2 > count1: 627 messages.append(u"accelerator %s is repeated in translation" % accelmarker) 628 else: 629 messages.append(u"accelerator %s occurs %d time(s) in original and %d time(s) in translation" % (accelmarker, count1, count2)) 630 if messages: 631 if "accelerators" in self.config.criticaltests: 632 raise SeriousFilterFailure(messages) 633 else: 634 raise FilterFailure(messages) 635 return True
636 637 # def acceleratedvariables(self, str1, str2): 638 # """checks that no variables are accelerated""" 639 # messages = [] 640 # for accelerator in self.config.accelmarkers: 641 # for variablestart, variableend in self.config.varmatches: 642 # error = accelerator + variablestart 643 # if str1.find(error) >= 0: 644 # messages.append(u"original has an accelerated variable") 645 # if str2.find(error) >= 0: 646 # messages.append(u"translation has an accelerated variable") 647 # if messages: 648 # raise FilterFailure(messages) 649 # return True 650
651 - def variables(self, str1, str2):
652 """checks whether variables of various forms are consistent between the two strings""" 653 messages = [] 654 mismatch1, mismatch2 = [], [] 655 varnames1, varnames2 = [], [] 656 for startmarker, endmarker in self.config.varmatches: 657 varchecker = decoration.getvariables(startmarker, endmarker) 658 if startmarker and endmarker: 659 if isinstance(endmarker, int): 660 redecorate = lambda var: startmarker + var 661 else: 662 redecorate = lambda var: startmarker + var + endmarker 663 elif startmarker: 664 redecorate = lambda var: startmarker + var 665 else: 666 redecorate = lambda var: var 667 vars1 = varchecker(str1) 668 vars2 = varchecker(str2) 669 if vars1 != vars2: 670 # we use counts to compare so we can handle multiple variables 671 vars1, vars2 = [var for var in vars1 if vars1.count(var) > vars2.count(var)], [var for var in vars2 if vars1.count(var) < vars2.count(var)] 672 # filter variable names we've already seen, so they aren't matched by more than one filter... 673 vars1, vars2 = [var for var in vars1 if var not in varnames1], [var for var in vars2 if var not in varnames2] 674 varnames1.extend(vars1) 675 varnames2.extend(vars2) 676 vars1 = map(redecorate, vars1) 677 vars2 = map(redecorate, vars2) 678 mismatch1.extend(vars1) 679 mismatch2.extend(vars2) 680 if mismatch1: 681 messages.append(u"do not translate: %s" % u", ".join(mismatch1)) 682 elif mismatch2: 683 messages.append(u"translation contains variables not in original: %s" % u", ".join(mismatch2)) 684 if messages and mismatch1: 685 raise SeriousFilterFailure(messages) 686 elif messages: 687 raise FilterFailure(messages) 688 return True
689
690 - def functions(self, str1, str2):
691 """checks that function names are not translated""" 692 return helpers.funcmatch(str1, str2, decoration.getfunctions, self.config.punctuation)
693
694 - def emails(self, str1, str2):
695 """checks that emails are not translated""" 696 return helpers.funcmatch(str1, str2, decoration.getemails)
697
698 - def urls(self, str1, str2):
699 """checks that URLs are not translated""" 700 return helpers.funcmatch(str1, str2, decoration.geturls)
701
702 - def numbers(self, str1, str2):
703 """checks whether numbers of various forms are consistent between the two strings""" 704 return helpers.countsmatch(str1, str2, decoration.getnumbers(str1))
705
706 - def startwhitespace(self, str1, str2):
707 """checks whether whitespace at the beginning of the strings matches""" 708 return helpers.funcmatch(str1, str2, decoration.spacestart)
709
710 - def endwhitespace(self, str1, str2):
711 """checks whether whitespace at the end of the strings matches""" 712 str1 = self.config.lang.punctranslate(str1) 713 return helpers.funcmatch(str1, str2, decoration.spaceend)
714
715 - def startpunc(self, str1, str2):
716 """checks whether punctuation at the beginning of the strings match""" 717 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))) 718 str1 = self.config.lang.punctranslate(str1) 719 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))) 720 return helpers.funcmatch(str1, str2, decoration.puncstart, self.config.punctuation)
721
722 - def endpunc(self, str1, str2):
723 """checks whether punctuation at the end of the strings match""" 724 str1 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str1))) 725 str1 = self.config.lang.punctranslate(str1) 726 str2 = self.filteraccelerators(self.filtervariables(self.filterwordswithpunctuation(str2))) 727 str1 = str1.rstrip() 728 str2 = str2.rstrip() 729 return helpers.funcmatch(str1, str2, decoration.puncend, self.config.endpunctuation + u":")
730
731 - def purepunc(self, str1, str2):
732 """checks that strings that are purely punctuation are not changed""" 733 # this test is a subset of startandend 734 if (decoration.ispurepunctuation(str1)): 735 return str1 == str2 736 else: 737 return not decoration.ispurepunctuation(str2)
738
739 - def brackets(self, str1, str2):
740 """checks that the number of brackets in both strings match""" 741 str1 = self.filtervariables(str1) 742 str2 = self.filtervariables(str2) 743 messages = [] 744 missing = [] 745 extra = [] 746 for bracket in (u"[", u"]", u"{", u"}", u"(", u")"): 747 count1 = str1.count(bracket) 748 count2 = str2.count(bracket) 749 if count2 < count1: 750 missing.append(u"'%s'" % bracket) 751 elif count2 > count1: 752 extra.append(u"'%s'" % bracket) 753 if missing: 754 messages.append(u"translation is missing %s" % u", ".join(missing)) 755 if extra: 756 messages.append(u"translation has extra %s" % u", ".join(extra)) 757 if messages: 758 raise FilterFailure(messages) 759 return True
760
761 - def sentencecount(self, str1, str2):
762 """checks that the number of sentences in both strings match""" 763 str1 = self.filteraccelerators(str1) 764 str2 = self.filteraccelerators(str2) 765 sentences1 = len(self.config.sourcelang.sentences(str1)) 766 sentences2 = len(self.config.lang.sentences(str2)) 767 if not sentences1 == sentences2: 768 raise FilterFailure(u"The number of sentences differ: %d versus %d" % (sentences1, sentences2)) 769 return True
770
771 - def options(self, str1, str2):
772 """checks that options are not translated""" 773 str1 = self.filtervariables(str1) 774 for word1 in str1.split(): 775 if word1 != u"--" and word1.startswith(u"--") and word1[-1].isalnum(): 776 parts = word1.split(u"=") 777 if not parts[0] in str2: 778 raise FilterFailure(u"The option %s does not occur or is translated in the translation." % parts[0]) 779 if len(parts) > 1 and parts[1] in str2: 780 raise FilterFailure(u"The parameter %(param)s in option %(option)s is not translated." % {"param": parts[1], "option": parts[0]}) 781 return True
782
783 - def startcaps(self, str1, str2):
784 """checks that the message starts with the correct capitalisation""" 785 str1 = self.filteraccelerators(str1) 786 str2 = self.filteraccelerators(str2) 787 if len(str1) > 1 and len(str2) > 1: 788 return self.config.sourcelang.capsstart(str1) == self.config.lang.capsstart(str2) 789 if len(str1) == 0 and len(str2) == 0: 790 return True 791 if len(str1) == 0 or len(str2) == 0: 792 return False 793 return True
794
795 - def simplecaps(self, str1, str2):
796 """checks the capitalisation of two strings isn't wildly different""" 797 str1 = self.removevariables(str1) 798 str2 = self.removevariables(str2) 799 # TODO: review this. The 'I' is specific to English, so it probably serves 800 # no purpose to get sourcelang.sentenceend 801 str1 = re.sub(u"[^%s]( I )" % self.config.sourcelang.sentenceend, u" i ", str1) 802 capitals1 = helpers.filtercount(str1, unicode.isupper) 803 capitals2 = helpers.filtercount(str2, unicode.isupper) 804 alpha1 = helpers.filtercount(str1, unicode.isalpha) 805 alpha2 = helpers.filtercount(str2, unicode.isalpha) 806 # Capture the all caps case 807 if capitals1 == alpha1: 808 return capitals2 == alpha2 809 # some heuristic tests to try and see that the style of capitals is vaguely the same 810 if capitals1 == 0 or capitals1 == 1: 811 return capitals2 == capitals1 812 elif capitals1 < len(str1) / 10: 813 return capitals2 <= len(str2) / 8 814 elif len(str1) < 10: 815 return abs(capitals1 - capitals2) < 3 816 elif capitals1 > len(str1) * 6 / 10: 817 return capitals2 > len(str2) * 6 / 10 818 else: 819 return abs(capitals1 - capitals2) < (len(str1) + len(str2)) / 6
820
821 - def acronyms(self, str1, str2):
822 """checks that acronyms that appear are unchanged""" 823 acronyms = [] 824 allowed = [] 825 for startmatch, endmatch in self.config.varmatches: 826 allowed += decoration.getvariables(startmatch, endmatch)(str1) 827 allowed += self.config.musttranslatewords.keys() 828 str1 = self.filteraccelerators(self.filtervariables(str1)) 829 iter = self.config.lang.word_iter(str1) 830 str2 = self.filteraccelerators(self.filtervariables(str2)) 831 #TODO: strip XML? - should provide better error messsages 832 # see mail/chrome/messanger/smime.properties.po 833 #TODO: consider limiting the word length for recognising acronyms to 834 #something like 5/6 characters 835 for word in iter: 836 if word.isupper() and len(word) > 1 and word not in allowed: 837 if str2.find(word) == -1: 838 acronyms.append(word) 839 if acronyms: 840 raise FilterFailure(u"acronyms should not be translated: " + u", ".join(acronyms)) 841 return True
842
843 - def doublewords(self, str1, str2):
844 """checks for repeated words in the translation""" 845 lastword = "" 846 without_newlines = "\n".join(str2.split("\n")) 847 words = self.filteraccelerators(self.removevariables(without_newlines)).replace(u".", u"").lower().split() 848 for word in words: 849 if word == lastword and word not in self.config.lang.validdoublewords: 850 raise FilterFailure(u"The word '%s' is repeated" % word) 851 lastword = word 852 return True
853
854 - def notranslatewords(self, str1, str2):
855 """checks that words configured as untranslatable appear in the translation too""" 856 if not self.config.notranslatewords: 857 return True 858 str1 = self.filtervariables(str1) 859 str2 = self.filtervariables(str2) 860 #The above is full of strange quotes and things in utf-8 encoding. 861 #single apostrophe perhaps problematic in words like "doesn't" 862 for seperator in self.config.punctuation: 863 str1 = str1.replace(seperator, u" ") 864 str2 = str2.replace(seperator, u" ") 865 words1 = self.filteraccelerators(str1).split() 866 words2 = self.filteraccelerators(str2).split() 867 stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 868 if stopwords: 869 raise FilterFailure(u"do not translate: %s" % (u", ".join(stopwords))) 870 return True
871
872 - def musttranslatewords(self, str1, str2):
873 """checks that words configured as definitely translatable don't appear in 874 the translation""" 875 if not self.config.musttranslatewords: 876 return True 877 str1 = self.removevariables(str1) 878 str2 = self.removevariables(str2) 879 #The above is full of strange quotes and things in utf-8 encoding. 880 #single apostrophe perhaps problematic in words like "doesn't" 881 for seperator in self.config.punctuation: 882 str1 = str1.replace(seperator, u" ") 883 str2 = str2.replace(seperator, u" ") 884 words1 = self.filteraccelerators(str1).split() 885 words2 = self.filteraccelerators(str2).split() 886 stopwords = [word for word in words1 if word in self.config.musttranslatewords and word in words2] 887 if stopwords: 888 raise FilterFailure(u"please translate: %s" % (u", ".join(stopwords))) 889 return True
890
891 - def validchars(self, str1, str2):
892 """checks that only characters specified as valid appear in the translation""" 893 if not self.config.validcharsmap: 894 return True 895 invalid1 = str1.translate(self.config.validcharsmap) 896 invalid2 = str2.translate(self.config.validcharsmap) 897 invalidchars = [u"'%s' (\\u%04x)" % (invalidchar, ord(invalidchar)) for invalidchar in invalid2 if invalidchar not in invalid1] 898 if invalidchars: 899 raise FilterFailure(u"invalid chars: %s" % (u", ".join(invalidchars))) 900 return True
901
902 - def filepaths(self, str1, str2):
903 """checks that file paths have not been translated""" 904 for word1 in self.filteraccelerators(str1).split(): 905 if word1.startswith(u"/"): 906 if not helpers.countsmatch(str1, str2, (word1,)): 907 return False 908 return True
909
910 - def xmltags(self, str1, str2):
911 """checks that XML/HTML tags have not been translated""" 912 tags1 = tag_re.findall(str1) 913 if len(tags1) > 0: 914 if (len(tags1[0]) == len(str1)) and not u"=" in tags1[0]: 915 return True 916 tags2 = tag_re.findall(str2) 917 properties1 = tagproperties(tags1, self.config.ignoretags) 918 properties2 = tagproperties(tags2, self.config.ignoretags) 919 filtered1 = [] 920 filtered2 = [] 921 for property1 in properties1: 922 filtered1 += [intuplelist(property1, self.config.canchangetags)] 923 for property2 in properties2: 924 filtered2 += [intuplelist(property2, self.config.canchangetags)] 925 926 #TODO: consider the consequences of different ordering of attributes/tags 927 if filtered1 != filtered2: 928 return False 929 else: 930 # No tags in str1, let's just check that none were added in str2. This 931 # might be useful for fuzzy strings wrongly unfuzzied, for example. 932 tags2 = tag_re.findall(str2) 933 if len(tags2) > 0: 934 return False 935 return True
936
937 - def kdecomments(self, str1, str2):
938 """checks to ensure that no KDE style comments appear in the translation""" 939 return str2.find(u"\n_:") == -1 and not str2.startswith(u"_:")
940
941 - def compendiumconflicts(self, str1, str2):
942 """checks for Gettext compendium conflicts (#-#-#-#-#)""" 943 return str2.find(u"#-#-#-#-#") == -1
944
945 - def simpleplurals(self, str1, str2):
946 """checks for English style plural(s) for you to review""" 947 def numberofpatterns(string, patterns): 948 number = 0 949 for pattern in patterns: 950 number += len(re.findall(pattern, string)) 951 return number
952 953 sourcepatterns = ["\(s\)"] 954 targetpatterns = ["\(s\)"] 955 sourcecount = numberofpatterns(str1, sourcepatterns) 956 targetcount = numberofpatterns(str2, targetpatterns) 957 if self.config.lang.nplurals == 1: 958 return not targetcount 959 return sourcecount == targetcount
960
961 - def spellcheck(self, str1, str2):
962 """checks words that don't pass a spell check""" 963 if not self.config.targetlanguage: 964 return True 965 if not spelling.available: 966 return True 967 # TODO: filterxml? 968 str1 = self.filteraccelerators_by_list(self.filtervariables(str1), self.config.sourcelang.validaccel) 969 str2 = self.filteraccelerators_by_list(self.filtervariables(str2), self.config.lang.validaccel) 970 ignore1 = [] 971 messages = [] 972 for word, index, suggestions in spelling.check(str1, lang="en"): 973 ignore1.append(word) 974 for word, index, suggestions in spelling.check(str2, lang=self.config.targetlanguage): 975 if word in self.config.notranslatewords: 976 continue 977 if word in ignore1: 978 continue 979 # hack to ignore hyphenisation rules 980 if word in suggestions: 981 continue 982 messages.append(u"check spelling of %s (could be %s)" % (word, u" / ".join(suggestions[:5]))) 983 if messages: 984 raise FilterFailure(messages) 985 return True
986
987 - def credits(self, str1, str2):
988 """checks for messages containing translation credits instead of normal translations.""" 989 return not str1 in self.config.credit_sources
990 991 # If the precondition filter is run and fails then the other tests listed are ignored 992 preconditions = {"untranslated": ("simplecaps", "variables", "startcaps", 993 "accelerators", "brackets", "endpunc", 994 "acronyms", "xmltags", "startpunc", 995 "endwhitespace", "startwhitespace", 996 "escapes", "doublequoting", "singlequoting", 997 "filepaths", "purepunc", "doublespacing", 998 "sentencecount", "numbers", "isfuzzy", 999 "isreview", "notranslatewords", "musttranslatewords", 1000 "emails", "simpleplurals", "urls", "printf", 1001 "tabs", "newlines", "functions", "options", 1002 "blank", "nplurals", "gconf"), 1003 "blank": ("simplecaps", "variables", "startcaps", 1004 "accelerators", "brackets", "endpunc", 1005 "acronyms", "xmltags", "startpunc", 1006 "endwhitespace", "startwhitespace", 1007 "escapes", "doublequoting", "singlequoting", 1008 "filepaths", "purepunc", "doublespacing", 1009 "sentencecount", "numbers", "isfuzzy", 1010 "isreview", "notranslatewords", "musttranslatewords", 1011 "emails", "simpleplurals", "urls", "printf", 1012 "tabs", "newlines", "functions", "options", 1013 "gconf"), 1014 "credits": ("simplecaps", "variables", "startcaps", 1015 "accelerators", "brackets", "endpunc", 1016 "acronyms", "xmltags", "startpunc", 1017 "escapes", "doublequoting", "singlequoting", 1018 "filepaths", "doublespacing", 1019 "sentencecount", "numbers", 1020 "emails", "simpleplurals", "urls", "printf", 1021 "tabs", "newlines", "functions", "options"), 1022 "purepunc": ("startcaps", "options"), 1023 # This is causing some problems since Python 2.6, as 1024 # startcaps is now seen as an important one to always execute 1025 # and could now be done before it is blocked by a failing 1026 # "untranslated" or "blank" test. This is probably happening 1027 # due to slightly different implementation of the internal 1028 # dict handling since Python 2.6. We should never have relied 1029 # on this ordering anyway. 1030 #"startcaps": ("simplecaps",), 1031 "endwhitespace": ("endpunc",), 1032 "startwhitespace":("startpunc",), 1033 "unchanged": ("doublewords",), 1034 "compendiumconflicts": ("accelerators", "brackets", "escapes", 1035 "numbers", "startpunc", "long", "variables", 1036 "startcaps", "sentencecount", "simplecaps", 1037 "doublespacing", "endpunc", "xmltags", 1038 "startwhitespace", "endwhitespace", 1039 "singlequoting", "doublequoting", 1040 "filepaths", "purepunc", "doublewords", "printf") } 1041 1042 # code to actually run the tests (use unittest?) 1043 1044 openofficeconfig = CheckerConfig( 1045 accelmarkers = ["~"], 1046 varmatches = [("&", ";"), ("%", "%"), ("%", None), ("%", 0), ("$(", ")"), ("$", "$"), ("${", "}"), ("#", "#"), ("#", 1), ("#", 0), ("($", ")"), ("$[", "]"), ("[", "]"), ("$", None)], 1047 ignoretags = [("alt", "xml-lang", None), ("ahelp", "visibility", "visible"), ("img", "width", None), ("img", "height", None)], 1048 canchangetags = [("link", "name", None)] 1049 ) 1050
1051 -class OpenOfficeChecker(StandardChecker):
1052 - def __init__(self, **kwargs):
1053 checkerconfig = kwargs.get("checkerconfig", None) 1054 if checkerconfig is None: 1055 checkerconfig = CheckerConfig() 1056 kwargs["checkerconfig"] = checkerconfig 1057 checkerconfig.update(openofficeconfig) 1058 StandardChecker.__init__(self, **kwargs)
1059 1060 mozillaconfig = CheckerConfig( 1061 accelmarkers = ["&"], 1062 varmatches = [("&", ";"), ("%", "%"), ("%", 1), ("$", "$"), ("$", None), ("#", 1), ("${", "}"), ("$(^", ")")], 1063 criticaltests = ["accelerators"] 1064 ) 1065
1066 -class MozillaChecker(StandardChecker):
1067 - def __init__(self, **kwargs):
1068 checkerconfig = kwargs.get("checkerconfig", None) 1069 if checkerconfig is None: 1070 checkerconfig = CheckerConfig() 1071 kwargs["checkerconfig"] = checkerconfig 1072 checkerconfig.update(mozillaconfig) 1073 StandardChecker.__init__(self, **kwargs)
1074
1075 - def credits(self, str1, str2):
1076 """checks for messages containing translation credits instead of normal translations.""" 1077 for location in self.locations: 1078 if location in ['MOZ_LANGPACK_CONTRIBUTORS', 'credit.translation']: 1079 return False 1080 return True
1081 1082 drupalconfig = CheckerConfig( 1083 varmatches = [("%", None), ("@", None), ("!", None)], 1084 ) 1085
1086 -class DrupalChecker(StandardChecker):
1087 - def __init__(self, **kwargs):
1088 checkerconfig = kwargs.get("checkerconfig", None) 1089 if checkerconfig is None: 1090 checkerconfig = CheckerConfig() 1091 kwargs["checkerconfig"] = checkerconfig 1092 checkerconfig.update(drupalconfig) 1093 StandardChecker.__init__(self, **kwargs)
1094 1095 gnomeconfig = CheckerConfig( 1096 accelmarkers = ["_"], 1097 varmatches = [("%", 1), ("$(", ")")], 1098 credit_sources = [u"translator-credits"] 1099 ) 1100
1101 -class GnomeChecker(StandardChecker):
1102 - def __init__(self, **kwargs):
1103 checkerconfig = kwargs.get("checkerconfig", None) 1104 if checkerconfig is None: 1105 checkerconfig = CheckerConfig() 1106 kwargs["checkerconfig"] = checkerconfig 1107 checkerconfig.update(gnomeconfig) 1108 StandardChecker.__init__(self, **kwargs)
1109
1110 - def gconf(self, str1, str2):
1111 """Checks if we have any gconf config settings translated.""" 1112 for location in self.locations: 1113 if location.find('schemas.in') != -1: 1114 gconf_attributes = gconf_attribute_re.findall(str1) 1115 #stopwords = [word for word in words1 if word in self.config.notranslatewords and word not in words2] 1116 stopwords = [word for word in gconf_attributes if word[1:-1] not in str2] 1117 if stopwords: 1118 raise FilterFailure(u"do not translate gconf attribute: %s" % (u", ".join(stopwords))) 1119 return True
1120 1121 kdeconfig = CheckerConfig( 1122 accelmarkers = ["&"], 1123 varmatches = [("%", 1)], 1124 credit_sources = [u"Your names", u"Your emails", u"ROLES_OF_TRANSLATORS"] 1125 ) 1126
1127 -class KdeChecker(StandardChecker):
1128 - def __init__(self, **kwargs):
1129 # TODO allow setup of KDE plural and translator comments so that they do 1130 # not create false postives 1131 checkerconfig = kwargs.get("checkerconfig", None) 1132 if checkerconfig is None: 1133 checkerconfig = CheckerConfig() 1134 kwargs["checkerconfig"] = checkerconfig 1135 checkerconfig.update(kdeconfig) 1136 StandardChecker.__init__(self, **kwargs)
1137 1138 cclicenseconfig = CheckerConfig(varmatches = [("@", "@")])
1139 -class CCLicenseChecker(StandardChecker):
1140 - def __init__(self, **kwargs):
1141 checkerconfig = kwargs.get("checkerconfig", None) 1142 if checkerconfig is None: 1143 checkerconfig = CheckerConfig() 1144 kwargs["checkerconfig"] = checkerconfig 1145 checkerconfig.update(cclicenseconfig) 1146 StandardChecker.__init__(self, **kwargs)
1147 1148 projectcheckers = { 1149 "openoffice": OpenOfficeChecker, 1150 "mozilla": MozillaChecker, 1151 "kde": KdeChecker, 1152 "wx": KdeChecker, 1153 "gnome": GnomeChecker, 1154 "creativecommons": CCLicenseChecker, 1155 "drupal": DrupalChecker, 1156 } 1157 1158
1159 -class StandardUnitChecker(UnitChecker):
1160 """The standard checks for common checks on translation units."""
1161 - def isfuzzy(self, unit):
1162 """Check if the unit has been marked fuzzy.""" 1163 return not unit.isfuzzy()
1164
1165 - def isreview(self, unit):
1166 """Check if the unit has been marked review.""" 1167 return not unit.isreview()
1168
1169 - def nplurals(self, unit):
1170 """Checks for the correct number of noun forms for plural translations.""" 1171 if unit.hasplural(): 1172 # if we don't have a valid nplurals value, don't run the test 1173 nplurals = self.config.lang.nplurals 1174 if nplurals > 0: 1175 return len(unit.target.strings) == nplurals 1176 return True
1177
1178 - def hassuggestion(self, unit):
1179 """Checks if there is at least one suggested translation for this unit.""" 1180 self.suggestion_store = getattr(self, 'suggestion_store', None) 1181 suggestions = [] 1182 if self.suggestion_store: 1183 suggestions = self.suggestion_store.findunits(unit.source) 1184 elif xliff and isinstance(unit, xliff.xliffunit): 1185 # TODO: we probably want to filter them somehow 1186 suggestions = unit.getalttrans() 1187 return not bool(suggestions)
1188 1189
1190 -def runtests(str1, str2, ignorelist=()):
1191 """verifies that the tests pass for a pair of strings""" 1192 from translate.storage import base 1193 str1 = data.normalized_unicode(str1) 1194 str2 = data.normalized_unicode(str2) 1195 unit = base.TranslationUnit(str1) 1196 unit.target = str2 1197 checker = StandardChecker(excludefilters=ignorelist) 1198 failures = checker.run_filters(unit) 1199 for test in failures: 1200 print "failure: %s: %s\n %r\n %r" % (test, failures[test], str1, str2) 1201 return failures
1202
1203 -def batchruntests(pairs):
1204 """runs test on a batch of string pairs""" 1205 passed, numpairs = 0, len(pairs) 1206 for str1, str2 in pairs: 1207 if runtests(str1, str2): 1208 passed += 1 1209 print 1210 print "total: %d/%d pairs passed" % (passed, numpairs)
1211 1212 if __name__ == '__main__': 1213 testset = [(r"simple", r"somple"), 1214 (r"\this equals \that", r"does \this equal \that?"), 1215 (r"this \'equals\' that", r"this 'equals' that"), 1216 (r" start and end! they must match.", r"start and end! they must match."), 1217 (r"check for matching %variables marked like %this", r"%this %variable is marked"), 1218 (r"check for mismatching %variables marked like %this", r"%that %variable is marked"), 1219 (r"check for mismatching %variables% too", r"how many %variable% are marked"), 1220 (r"%% %%", r"%%"), 1221 (r"Row: %1, Column: %2", r"Mothalo: %1, Kholomo: %2"), 1222 (r"simple lowercase", r"it is all lowercase"), 1223 (r"simple lowercase", r"It Is All Lowercase"), 1224 (r"Simple First Letter Capitals", r"First Letters"), 1225 (r"SIMPLE CAPITALS", r"First Letters"), 1226 (r"SIMPLE CAPITALS", r"ALL CAPITALS"), 1227 (r"forgot to translate", r" ") 1228 ] 1229 batchruntests(testset) 1230