Package translate :: Package storage :: Module statsdb
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.statsdb

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2007-2010 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21   
 22  """Module to provide a cache of statistics in a database. 
 23   
 24  @organization: Zuza Software Foundation 
 25  @copyright: 2007 Zuza Software Foundation 
 26  @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>} 
 27  """ 
 28   
 29  try: 
 30      from sqlite3 import dbapi2 
 31  except ImportError: 
 32      from pysqlite2 import dbapi2 
 33  import os.path 
 34  import re 
 35  import sys 
 36  import stat 
 37  import thread 
 38  from UserDict import UserDict 
 39   
 40  from translate import __version__ as toolkitversion 
 41  from translate.lang.common import Common 
 42  from translate.misc.multistring import multistring 
 43  from translate.storage import factory 
 44  from translate.storage.workflow import StateEnum 
 45   
 46  #kdepluralre = re.compile("^_n: ") #Restore this if you really need support for old kdeplurals 
 47  brtagre = re.compile("<br\s*?/?>") 
 48  xmltagre = re.compile("<[^>]+>") 
 49  numberre = re.compile("\\D\\.\\D") 
 50   
 51  extended_state_strings = { 
 52      StateEnum.EMPTY: "empty", 
 53      StateEnum.NEEDS_WORK: "needs-work", 
 54      StateEnum.REJECTED: "rejected", 
 55      StateEnum.NEEDS_REVIEW: "needs-review", 
 56      StateEnum.UNREVIEWED: "unreviewed", 
 57      StateEnum.FINAL: "final", 
 58      } 
 59   
 60  UNTRANSLATED = StateEnum.EMPTY 
 61  FUZZY = StateEnum.NEEDS_WORK 
 62  TRANSLATED = StateEnum.UNREVIEWED 
 63   
 64  state_strings = { 
 65      UNTRANSLATED: "untranslated", 
 66      FUZZY: "fuzzy", 
 67      TRANSLATED: "translated", 
 68  } 
69 70 71 -def wordcount(string):
72 # TODO: po class should understand KDE style plurals ## 73 #string = kdepluralre.sub("", string) #Restore this if you really need support for old kdeplurals 74 string = brtagre.sub("\n", string) 75 string = xmltagre.sub("", string) 76 string = numberre.sub(" ", string) 77 #TODO: This should still use the correct language to count in the target 78 #language 79 return len(Common.words(string))
80
81 82 -def wordsinunit(unit):
83 """Counts the words in the unit's source and target, taking plurals into 84 account. The target words are only counted if the unit is translated.""" 85 (sourcewords, targetwords) = (0, 0) 86 if isinstance(unit.source, multistring): 87 sourcestrings = unit.source.strings 88 else: 89 sourcestrings = [unit.source or ""] 90 for s in sourcestrings: 91 sourcewords += wordcount(s) 92 if not unit.istranslated(): 93 return sourcewords, targetwords 94 if isinstance(unit.target, multistring): 95 targetstrings = unit.target.strings 96 else: 97 targetstrings = [unit.target or ""] 98 for s in targetstrings: 99 targetwords += wordcount(s) 100 return sourcewords, targetwords
101
102 103 -class Record(UserDict):
104
105 - def __init__(self, record_keys, record_values=None, compute_derived_values=lambda x: x):
106 if record_values == None: 107 record_values = (0 for _i in record_keys) 108 self.record_keys = record_keys 109 self.data = dict(zip(record_keys, record_values)) 110 self._compute_derived_values = compute_derived_values 111 self._compute_derived_values(self)
112
113 - def to_tuple(self):
114 return tuple(self[key] for key in self.record_keys)
115
116 - def __add__(self, other):
117 result = Record(self.record_keys) 118 for key in self.keys(): 119 result[key] = self[key] + other[key] 120 self._compute_derived_values(self) 121 return result
122
123 - def __sub__(self, other):
124 result = Record(self.record_keys) 125 for key in self.keys(): 126 result[key] = self[key] - other[key] 127 self._compute_derived_values(self) 128 return result
129
130 - def as_string_for_db(self):
131 return ",".join([repr(x) for x in self.to_tuple()])
132
133 134 -def transaction(f):
135 """Modifies f to commit database changes if it executes without exceptions. 136 Otherwise it rolls back the database. 137 138 ALL publicly accessible methods in StatsCache MUST be decorated with this 139 decorator. 140 """ 141 142 def decorated_f(self, *args, **kwargs): 143 try: 144 result = f(self, *args, **kwargs) 145 self.con.commit() 146 return result 147 except: 148 # If ANY exception is raised, we're left in an 149 # uncertain state and we MUST roll back any changes to avoid getting 150 # stuck in an inconsistent state. 151 if self.con: 152 self.con.rollback() 153 raise
154 return decorated_f 155
156 157 -def statefordb(unit):
158 """Returns the numeric database state for the unit.""" 159 if unit.istranslated(): 160 return TRANSLATED 161 if unit.isfuzzy() and unit.target: 162 return FUZZY 163 return UNTRANSLATED
164
165 166 -class FileTotals(object):
167 keys = ['translatedsourcewords', 168 'fuzzysourcewords', 169 'untranslatedsourcewords', 170 'translated', 171 'fuzzy', 172 'untranslated', 173 'translatedtargetwords'] 174
175 - def db_keys(self):
176 return ",".join(self.keys)
177
178 - def __init__(self, cur):
179 self.cur = cur 180 self.cur.execute(""" 181 CREATE TABLE IF NOT EXISTS filetotals( 182 fileid INTEGER PRIMARY KEY AUTOINCREMENT, 183 translatedsourcewords INTEGER NOT NULL, 184 fuzzysourcewords INTEGER NOT NULL, 185 untranslatedsourcewords INTEGER NOT NULL, 186 translated INTEGER NOT NULL, 187 fuzzy INTEGER NOT NULL, 188 untranslated INTEGER NOT NULL, 189 translatedtargetwords INTEGER NOT NULL);""")
190
191 - def new_record(cls, state_for_db=None, sourcewords=None, targetwords=None):
192 record = Record(cls.keys, compute_derived_values=cls._compute_derived_values) 193 if state_for_db is not None: 194 if state_for_db is UNTRANSLATED: 195 record['untranslated'] = 1 196 record['untranslatedsourcewords'] = sourcewords 197 if state_for_db is TRANSLATED: 198 record['translated'] = 1 199 record['translatedsourcewords'] = sourcewords 200 record['translatedtargetwords'] = targetwords 201 elif state_for_db is FUZZY: 202 record['fuzzy'] = 1 203 record['fuzzysourcewords'] = sourcewords 204 return record
205 206 new_record = classmethod(new_record) 207
208 - def _compute_derived_values(cls, record):
209 record["total"] = record["untranslated"] + \ 210 record["translated"] + \ 211 record["fuzzy"] 212 record["totalsourcewords"] = record["untranslatedsourcewords"] + \ 213 record["translatedsourcewords"] + \ 214 record["fuzzysourcewords"] 215 record["review"] = 0
216 _compute_derived_values = classmethod(_compute_derived_values) 217
218 - def __getitem__(self, fileid):
219 result = self.cur.execute(""" 220 SELECT %(keys)s 221 FROM filetotals 222 WHERE fileid=?;""" % {'keys': self.db_keys()}, (fileid,)) 223 return Record(FileTotals.keys, result.fetchone(), self._compute_derived_values)
224
225 - def __setitem__(self, fileid, record):
226 self.cur.execute(""" 227 INSERT OR REPLACE into filetotals 228 VALUES (%(fileid)d, %(vals)s); 229 """ % {'fileid': fileid, 'vals': record.as_string_for_db()})
230
231 - def __delitem__(self, fileid):
232 self.cur.execute(""" 233 DELETE FROM filetotals 234 WHERE fileid=?; 235 """, (fileid,))
236
237 238 -def emptyfiletotals():
239 """Returns a dictionary with all statistics initalised to 0.""" 240 return FileTotals.new_record()
241
242 243 -def emptyfilechecks():
244 return {}
245
246 247 -def emptyfilestats():
248 return {"total": [], "translated": [], "fuzzy": [], "untranslated": []}
249
250 251 -def emptyunitstats():
252 return {"sourcewordcount": [], "targetwordcount": []}
253
254 255 # We allow the caller to specify which value to return when errors_return_empty 256 # is True. We do this, since Poolte wants None to be returned when it calls 257 # get_mod_info directly, whereas we want an integer to be returned for 258 # uses of get_mod_info within this module. 259 # TODO: Get rid of empty_return when Pootle code is improved to not require 260 # this. 261 262 263 -def get_mod_info(file_path):
264 file_stat = os.stat(file_path) 265 assert not stat.S_ISDIR(file_stat.st_mode) 266 return file_stat.st_mtime, file_stat.st_size
267
268 269 -def suggestion_extension():
270 return os.path.extsep + 'pending'
271
272 273 -def suggestion_filename(filename):
274 return filename + suggestion_extension()
275
276 277 # ALL PUBLICLY ACCESSIBLE METHODS MUST BE DECORATED WITH THE transaction DECORATOR. 278 -class StatsCache(object):
279 """An object instantiated as a singleton for each statsfile that provides 280 access to the database cache from a pool of StatsCache objects.""" 281 _caches = {} 282 defaultfile = None 283 con = None 284 """This cache's connection""" 285 cur = None 286 """The current cursor""" 287
288 - def __new__(cls, statsfile=None):
289 current_thread = thread.get_ident() 290 291 def make_database(statsfile): 292 293 def connect(cache): 294 cache.con = dbapi2.connect(statsfile) 295 cache.cur = cache.con.cursor()
296 297 def clear_old_data(cache): 298 try: 299 cache.cur.execute("""SELECT min(toolkitbuild) FROM files""") 300 val = cache.cur.fetchone() 301 # If the database is empty, we have no idea whether its layout 302 # is correct, so we might as well delete it. 303 if val is None or val[0] < toolkitversion.build: 304 cache.con.close() 305 del cache 306 os.unlink(statsfile) 307 return True 308 return False 309 except dbapi2.OperationalError: 310 return False
311 312 cache = cls._caches.setdefault(current_thread, {})[statsfile] = object.__new__(cls) 313 connect(cache) 314 if clear_old_data(cache): 315 connect(cache) 316 cache.create() 317 return cache 318 319 if not statsfile: 320 if not cls.defaultfile: 321 userdir = os.path.expanduser("~") 322 cachedir = None 323 if os.name == "nt": 324 cachedir = os.path.join(userdir, "Translate Toolkit") 325 else: 326 cachedir = os.path.join(userdir, ".translate_toolkit") 327 if not os.path.exists(cachedir): 328 os.mkdir(cachedir) 329 cls.defaultfile = os.path.realpath(os.path.join(cachedir, "stats.db")) 330 statsfile = cls.defaultfile 331 else: 332 statsfile = os.path.realpath(statsfile) 333 # First see if a cache for this file already exists: 334 if current_thread in cls._caches and statsfile in cls._caches[current_thread]: 335 return cls._caches[current_thread][statsfile] 336 # No existing cache. Let's build a new one and keep a copy 337 return make_database(statsfile) 338 339 @transaction
340 - def create(self):
341 """Create all tables and indexes.""" 342 self.file_totals = FileTotals(self.cur) 343 344 self.cur.execute("""CREATE TABLE IF NOT EXISTS files( 345 fileid INTEGER PRIMARY KEY AUTOINCREMENT, 346 path VARCHAR NOT NULL UNIQUE, 347 st_mtime INTEGER NOT NULL, 348 st_size INTEGER NOT NULL, 349 toolkitbuild INTEGER NOT NULL);""") 350 351 self.cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS filepathindex 352 ON files (path);""") 353 354 self.cur.execute("""CREATE TABLE IF NOT EXISTS units( 355 id INTEGER PRIMARY KEY AUTOINCREMENT, 356 unitid VARCHAR NOT NULL, 357 fileid INTEGER NOT NULL, 358 unitindex INTEGER NOT NULL, 359 source VARCHAR NOT NULL, 360 target VARCHAR, 361 state INTEGER, 362 e_state INTEGER, 363 sourcewords INTEGER, 364 targetwords INTEGER);""") 365 366 self.cur.execute("""CREATE INDEX IF NOT EXISTS fileidindex 367 ON units(fileid);""") 368 369 self.cur.execute("""CREATE TABLE IF NOT EXISTS checkerconfigs( 370 configid INTEGER PRIMARY KEY AUTOINCREMENT, 371 config VARCHAR);""") 372 373 self.cur.execute("""CREATE INDEX IF NOT EXISTS configindex 374 ON checkerconfigs(config);""") 375 376 self.cur.execute("""CREATE TABLE IF NOT EXISTS uniterrors( 377 errorid INTEGER PRIMARY KEY AUTOINCREMENT, 378 unitindex INTEGER NOT NULL, 379 fileid INTEGER NOT NULL, 380 configid INTEGER NOT NULL, 381 name VARCHAR NOT NULL, 382 message VARCHAR);""") 383 384 self.cur.execute("""CREATE INDEX IF NOT EXISTS uniterrorindex 385 ON uniterrors(fileid, configid);""")
386 387 @transaction
388 - def _getfileid(self, filename, check_mod_info=True, store=None):
389 """return fileid representing the given file in the statscache. 390 391 if file not in cache or has been updated since last record 392 update, recalculate stats. 393 394 optional argument store can be used to avoid unnessecary 395 reparsing of already loaded translation files. 396 397 store can be a TranslationFile object or a callback that returns one. 398 """ 399 if isinstance(filename, str): 400 filename = unicode(filename, sys.getfilesystemencoding()) 401 realpath = os.path.realpath(filename) 402 self.cur.execute("""SELECT fileid, st_mtime, st_size FROM files 403 WHERE path=?;""", (realpath,)) 404 filerow = self.cur.fetchone() 405 mod_info = get_mod_info(realpath) 406 if filerow: 407 fileid = filerow[0] 408 if not check_mod_info: 409 # Update the mod_info of the file 410 self.cur.execute("""UPDATE files 411 SET st_mtime=?, st_size=? 412 WHERE fileid=?;""", (mod_info[0], mod_info[1], fileid)) 413 return fileid 414 if (filerow[1], filerow[2]) == mod_info: 415 return fileid 416 417 # file wasn't in db at all, lets recache it 418 if callable(store): 419 store = store() 420 else: 421 store = store or factory.getobject(realpath) 422 423 return self._cachestore(store, realpath, mod_info)
424
425 - def _getstoredcheckerconfig(self, checker):
426 """See if this checker configuration has been used before.""" 427 config = str(checker.config.__dict__) 428 self.cur.execute("""SELECT configid, config FROM checkerconfigs WHERE 429 config=?;""", (config,)) 430 configrow = self.cur.fetchone() 431 if not configrow or configrow[1] != config: 432 return None 433 else: 434 return configrow[0]
435 436 @transaction
437 - def _cacheunitstats(self, units, fileid, unitindex=None, file_totals_record=FileTotals.new_record()):
438 """Cache the statistics for the supplied unit(s).""" 439 unitvalues = [] 440 for index, unit in enumerate(units): 441 if unit.istranslatable(): 442 sourcewords, targetwords = wordsinunit(unit) 443 if unitindex: 444 index = unitindex 445 # what about plurals in .source and .target? 446 unit_state_for_db = statefordb(unit) 447 unitvalues.append((unit.getid(), fileid, index, \ 448 unit.source, unit.target, \ 449 sourcewords, targetwords, \ 450 unit_state_for_db, 451 unit.get_state_id())) 452 file_totals_record = file_totals_record + FileTotals.new_record(unit_state_for_db, sourcewords, targetwords) 453 # XXX: executemany is non-standard 454 self.cur.executemany("""INSERT INTO units 455 (unitid, fileid, unitindex, source, target, sourcewords, targetwords, state, e_state) 456 values (?, ?, ?, ?, ?, ?, ?, ?, ?);""", 457 unitvalues) 458 self.file_totals[fileid] = file_totals_record 459 if unitindex: 460 return state_strings[statefordb(units[0])] 461 return ""
462 463 @transaction
464 - def _cachestore(self, store, realpath, mod_info):
465 """Calculates and caches the statistics of the given store 466 unconditionally.""" 467 self.cur.execute("""DELETE FROM files WHERE 468 path=?;""", (realpath,)) 469 self.cur.execute("""INSERT INTO files 470 (fileid, path, st_mtime, st_size, toolkitbuild) values (NULL, ?, ?, ?, ?);""", 471 (realpath, mod_info[0], mod_info[1], toolkitversion.build)) 472 fileid = self.cur.lastrowid 473 self.cur.execute("""DELETE FROM units WHERE 474 fileid=?""", (fileid,)) 475 self._cacheunitstats(store.units, fileid) 476 return fileid
477
478 - def file_extended_totals(self, filename, store=None):
479 stats = {} 480 fileid = self._getfileid(filename, store=store) 481 482 self.cur.execute("""SELECT e_state, COUNT(id), SUM(sourcewords), SUM(targetwords) 483 FROM units WHERE fileid=? GROUP BY e_state""", (fileid,)) 484 values = self.cur.fetchall() 485 486 for value in values: 487 stats[extended_state_strings[value[0]]] = { 488 "units": value[1], 489 "sourcewords": value[2], 490 "targetwords": value[3], 491 } 492 return stats
493
494 - def filetotals(self, filename, store=None, extended=False):
495 """Retrieves the statistics for the given file if possible, otherwise 496 delegates to cachestore().""" 497 stats = self.file_totals[self._getfileid(filename, store=store)] 498 if extended: 499 stats["extended"] = self.file_extended_totals(filename, store=store) 500 return stats
501 502 @transaction
503 - def _cacheunitschecks(self, units, fileid, configid, checker, unitindex=None):
504 """Helper method for cachestorechecks() and recacheunit()""" 505 # We always want to store one dummy error to know that we have actually 506 # run the checks on this file with the current checker configuration 507 dummy = (-1, fileid, configid, "noerror", "") 508 unitvalues = [dummy] 509 # if we are doing a single unit, we want to return the checknames 510 errornames = [] 511 for index, unit in enumerate(units): 512 if unit.istranslatable(): 513 # Correctly assign the unitindex 514 if unitindex: 515 index = unitindex 516 failures = checker.run_filters(unit) 517 for checkname, checkmessage in failures.iteritems(): 518 unitvalues.append((index, fileid, configid, checkname, checkmessage)) 519 errornames.append("check-" + checkname) 520 checker.setsuggestionstore(None) 521 522 if unitindex: 523 # We are only updating a single unit, so we don't want to add an 524 # extra noerror-entry 525 unitvalues.remove(dummy) 526 errornames.append("total") 527 528 # XXX: executemany is non-standard 529 self.cur.executemany("""INSERT INTO uniterrors 530 (unitindex, fileid, configid, name, message) 531 values (?, ?, ?, ?, ?);""", 532 unitvalues) 533 return errornames
534 535 @transaction
536 - def _cachestorechecks(self, fileid, store, checker, configid):
537 """Calculates and caches the error statistics of the given store 538 unconditionally.""" 539 # Let's purge all previous failures because they will probably just 540 # fill up the database without much use. 541 self.cur.execute("""DELETE FROM uniterrors WHERE 542 fileid=?;""", (fileid,)) 543 self._cacheunitschecks(store.units, fileid, configid, checker) 544 return fileid
545
546 - def get_unit_stats(self, fileid, unitid):
547 values = self.cur.execute(""" 548 SELECT state, sourcewords, targetwords 549 FROM units 550 WHERE fileid=? AND unitid=? 551 """, (fileid, unitid)) 552 result = values.fetchone() 553 if result is not None: 554 return result 555 else: 556 print >> sys.stderr, """WARNING: Database in inconsistent state. 557 fileid %d and unitid %s have no entries in the table units.""" % (fileid, unitid) 558 # If values.fetchone() is None, then we return an empty list, 559 # to make FileTotals.new_record(*self.get_unit_stats(fileid, unitid)) 560 # do the right thing. 561 return []
562 563 @transaction
564 - def recacheunit(self, filename, checker, unit):
565 """Recalculate all information for a specific unit. This is necessary 566 for updating all statistics when a translation of a unit took place, 567 for example. 568 569 This method assumes that everything was up to date before (file totals, 570 checks, checker config, etc.""" 571 fileid = self._getfileid(filename, check_mod_info=False) 572 configid = self._get_config_id(fileid, checker) 573 unitid = unit.getid() 574 # get the unit index 575 totals_without_unit = self.file_totals[fileid] - \ 576 FileTotals.new_record(*self.get_unit_stats(fileid, unitid)) 577 self.cur.execute("""SELECT unitindex FROM units WHERE 578 fileid=? AND unitid=?;""", (fileid, unitid)) 579 unitindex = self.cur.fetchone()[0] 580 self.cur.execute("""DELETE FROM units WHERE 581 fileid=? AND unitid=?;""", (fileid, unitid)) 582 state = [self._cacheunitstats([unit], fileid, unitindex, totals_without_unit)] 583 # remove the current errors 584 self.cur.execute("""DELETE FROM uniterrors WHERE 585 fileid=? AND unitindex=?;""", (fileid, unitindex)) 586 if os.path.exists(suggestion_filename(filename)): 587 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension())) 588 state.extend(self._cacheunitschecks([unit], fileid, configid, checker, unitindex)) 589 return state
590
591 - def _checkerrors(self, filename, fileid, configid, checker, store):
592 593 def geterrors(): 594 self.cur.execute("""SELECT 595 name, 596 unitindex 597 FROM uniterrors WHERE fileid=? and configid=? 598 ORDER BY unitindex;""", (fileid, configid)) 599 return self.cur.fetchone(), self.cur
600 601 first, cur = geterrors() 602 if first is not None: 603 return first, cur 604 605 # This could happen if we haven't done the checks before, or the 606 # file changed, or we are using a different configuration 607 if callable(store): 608 store = store() 609 else: 610 store = store or factory.getobject(filename) 611 612 if os.path.exists(suggestion_filename(filename)): 613 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension())) 614 self._cachestorechecks(fileid, store, checker, configid) 615 return geterrors() 616
617 - def _geterrors(self, filename, fileid, configid, checker, store):
618 result = [] 619 first, cur = self._checkerrors(filename, fileid, configid, checker, store) 620 result.append(first) 621 result.extend(cur.fetchall()) 622 return result
623 624 @transaction
625 - def _get_config_id(self, fileid, checker):
626 configid = self._getstoredcheckerconfig(checker) 627 if configid: 628 return configid 629 self.cur.execute("""INSERT INTO checkerconfigs 630 (configid, config) values (NULL, ?);""", 631 (str(checker.config.__dict__),)) 632 return self.cur.lastrowid
633
634 - def filechecks(self, filename, checker, store=None):
635 """Retrieves the error statistics for the given file if possible, 636 otherwise delegates to cachestorechecks().""" 637 fileid = self._getfileid(filename, store=store) 638 configid = self._get_config_id(fileid, checker) 639 values = self._geterrors(filename, fileid, configid, checker, store) 640 641 errors = emptyfilechecks() 642 for value in values: 643 if value[1] == -1: 644 continue 645 checkkey = 'check-' + value[0] #value[0] is the error name 646 if not checkkey in errors: 647 errors[checkkey] = [] 648 errors[checkkey].append(value[1]) #value[1] is the unitindex 649 650 return errors
651
652 - def file_fails_test(self, filename, checker, name):
653 fileid = self._getfileid(filename) 654 configid = self._get_config_id(fileid, checker) 655 self._checkerrors(filename, fileid, configid, checker, None) 656 self.cur.execute("""SELECT 657 name, 658 unitindex 659 FROM uniterrors 660 WHERE fileid=? and configid=? and name=?;""", (fileid, configid, name)) 661 return self.cur.fetchone() is not None
662
663 - def filestatestats(self, filename, store=None, extended=False):
664 """Return a dictionary of unit stats mapping sets of unit 665 indices with those states""" 666 stats = emptyfilestats() 667 if extended: 668 stats["extended"] = {} 669 670 fileid = self._getfileid(filename, store=store) 671 672 self.cur.execute("""SELECT state, e_state, unitindex 673 FROM units WHERE fileid=? ORDER BY unitindex;""", (fileid,)) 674 values = self.cur.fetchall() 675 676 for value in values: 677 stats[state_strings[value[0]]].append(value[2]) 678 if extended: 679 if value[1] not in stats["extended"]: 680 stats["extended"][value[1]] = [] 681 stats["extended"][value[1]].append(value[2]) 682 stats["total"].append(value[2]) 683 return stats
684
685 - def filestats(self, filename, checker, store=None, extended=False):
686 """Return a dictionary of property names mapping sets of unit 687 indices with those properties.""" 688 stats = emptyfilestats() 689 stats.update(self.filechecks(filename, checker, store)) 690 stats.update(self.filestatestats(filename, store, extended=extended)) 691 return stats
692
693 - def unitstats(self, filename, _lang=None, store=None):
694 # For now, lang and store are unused. lang will allow the user to 695 # base stats information on the given language. See the commented 696 # line containing stats.update below. 697 """Return a dictionary of property names mapping to arrays which 698 map unit indices to property values. 699 700 Please note that this is different from filestats, since filestats 701 supplies sets of unit indices with a given property, whereas this 702 method supplies arrays which map unit indices to given values.""" 703 stats = emptyunitstats() 704 705 #stats.update(self.unitchecks(filename, lang, store)) 706 fileid = self._getfileid(filename, store=store) 707 708 self.cur.execute("""SELECT 709 sourcewords, targetwords 710 FROM units WHERE fileid=? 711 ORDER BY unitindex;""", (fileid,)) 712 713 for sourcecount, targetcount in self.cur.fetchall(): 714 stats["sourcewordcount"].append(sourcecount) 715 stats["targetwordcount"].append(targetcount) 716 717 return stats
718