1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Module to provide a cache of statistics in a database.
23
24 @organization: Zuza Software Foundation
25 @copyright: 2007 Zuza Software Foundation
26 @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
27 """
28
29 from UserDict import UserDict
30
31 from translate import __version__ as toolkitversion
32 from translate.storage import factory
33 from translate.misc.multistring import multistring
34 from translate.lang.common import Common
35
36 try:
37 from sqlite3 import dbapi2
38 except ImportError:
39 from pysqlite2 import dbapi2
40 import os.path
41 import re
42 import sys
43 import stat
44 import thread
45
46 kdepluralre = re.compile("^_n: ")
47 brtagre = re.compile("<br\s*?/?>")
48 xmltagre = re.compile("<[^>]+>")
49 numberre = re.compile("\\D\\.\\D")
50
51 state_strings = {0: "untranslated", 1: "translated", 2: "fuzzy"}
62
64 """Counts the words in the unit's source and target, taking plurals into
65 account. The target words are only counted if the unit is translated."""
66 (sourcewords, targetwords) = (0, 0)
67 if isinstance(unit.source, multistring):
68 sourcestrings = unit.source.strings
69 else:
70 sourcestrings = [unit.source or ""]
71 for s in sourcestrings:
72 sourcewords += wordcount(s)
73 if not unit.istranslated():
74 return sourcewords, targetwords
75 if isinstance(unit.target, multistring):
76 targetstrings = unit.target.strings
77 else:
78 targetstrings = [unit.target or ""]
79 for s in targetstrings:
80 targetwords += wordcount(s)
81 return sourcewords, targetwords
82
84 - def __init__(self, record_keys, record_values=None, compute_derived_values = lambda x: x):
91
93 return tuple(self[key] for key in self.record_keys)
94
101
108
111
113 """Modifies f to commit database changes if it executes without exceptions.
114 Otherwise it rolls back the database.
115
116 ALL publicly accessible methods in StatsCache MUST be decorated with this
117 decorator.
118 """
119
120 def decorated_f(self, *args, **kwargs):
121 try:
122 result = f(self, *args, **kwargs)
123 self.con.commit()
124 return result
125 except:
126
127
128
129 if self.con:
130 self.con.rollback()
131 raise
132 return decorated_f
133
134 UNTRANSLATED, TRANSLATED, FUZZY = 0, 1, 2
136 """Returns the numeric database state for the unit."""
137 if unit.istranslated():
138 return TRANSLATED
139 if unit.isfuzzy() and unit.target:
140 return FUZZY
141 return UNTRANSLATED
142
144 keys = ['translatedsourcewords',
145 'fuzzysourcewords',
146 'untranslatedsourcewords',
147 'translated',
148 'fuzzy',
149 'untranslated',
150 'translatedtargetwords']
151
154
156 self.cur = cur
157 self.cur.execute("""
158 CREATE TABLE IF NOT EXISTS filetotals(
159 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
160 translatedsourcewords INTEGER NOT NULL,
161 fuzzysourcewords INTEGER NOT NULL,
162 untranslatedsourcewords INTEGER NOT NULL,
163 translated INTEGER NOT NULL,
164 fuzzy INTEGER NOT NULL,
165 untranslated INTEGER NOT NULL,
166 translatedtargetwords INTEGER NOT NULL);""")
167
168 - def new_record(cls, state_for_db=None, sourcewords=None, targetwords=None):
169 record = Record(cls.keys, compute_derived_values = cls._compute_derived_values)
170 if state_for_db is not None:
171 if state_for_db is UNTRANSLATED:
172 record['untranslated'] = 1
173 record['untranslatedsourcewords'] = sourcewords
174 if state_for_db is TRANSLATED:
175 record['translated'] = 1
176 record['translatedsourcewords'] = sourcewords
177 record['translatedtargetwords'] = targetwords
178 elif state_for_db is FUZZY:
179 record['fuzzy'] = 1
180 record['fuzzysourcewords'] = sourcewords
181 return record
182
183 new_record = classmethod(new_record)
184
186 record["total"] = record["untranslated"] + \
187 record["translated"] + \
188 record["fuzzy"]
189 record["totalsourcewords"] = record["untranslatedsourcewords"] + \
190 record["translatedsourcewords"] + \
191 record["fuzzysourcewords"]
192 record["review"] = 0
193 _compute_derived_values = classmethod(_compute_derived_values)
194
201
203 self.cur.execute("""
204 INSERT OR REPLACE into filetotals
205 VALUES (%(fileid)d, %(vals)s);
206 """ % {'fileid': fileid, 'vals': record.as_string_for_db()})
207
209 self.cur.execute("""
210 DELETE FROM filetotals
211 WHERE fileid=?;
212 """, (fileid,))
213
215 """Returns a dictionary with all statistics initalised to 0."""
216 return FileTotals.new_record()
217
220
222 return {"total": [], "translated": [], "fuzzy": [], "untranslated": []}
223
225 return {"sourcewordcount": [], "targetwordcount": []}
226
234 file_stat = os.stat(file_path)
235 assert not stat.S_ISDIR(file_stat.st_mode)
236 return file_stat.st_mtime, file_stat.st_size
237
239 return os.path.extsep + 'pending'
240
243
246 """An object instantiated as a singleton for each statsfile that provides
247 access to the database cache from a pool of StatsCache objects."""
248 _caches = {}
249 defaultfile = None
250 con = None
251 """This cache's connection"""
252 cur = None
253 """The current cursor"""
254
256 current_thread = thread.get_ident()
257 def make_database(statsfile):
258 def connect(cache):
259 cache.con = dbapi2.connect(statsfile)
260 cache.cur = cache.con.cursor()
261
262 def clear_old_data(cache):
263 try:
264 cache.cur.execute("""SELECT toolkitbuild FROM files""")
265 val = cache.cur.fetchone()
266
267
268 if val is None or val[0] < toolkitversion.build:
269 cache.con.close()
270 del cache
271 os.unlink(statsfile)
272 return True
273 return False
274 except dbapi2.OperationalError:
275 return False
276
277 cache = cls._caches.setdefault(current_thread, {})[statsfile] = object.__new__(cls)
278 connect(cache)
279 if clear_old_data(cache):
280 connect(cache)
281 cache.create()
282 return cache
283
284 if not statsfile:
285 if not cls.defaultfile:
286 userdir = os.path.expanduser("~")
287 cachedir = None
288 if os.name == "nt":
289 cachedir = os.path.join(userdir, "Translate Toolkit")
290 else:
291 cachedir = os.path.join(userdir, ".translate_toolkit")
292 if not os.path.exists(cachedir):
293 os.mkdir(cachedir)
294 cls.defaultfile = os.path.realpath(os.path.join(cachedir, "stats.db"))
295 statsfile = cls.defaultfile
296 else:
297 statsfile = os.path.realpath(statsfile)
298
299 if current_thread in cls._caches and statsfile in cls._caches[current_thread]:
300 return cls._caches[current_thread][statsfile]
301
302 return make_database(statsfile)
303
304 @transaction
306 """Create all tables and indexes."""
307 self.file_totals = FileTotals(self.cur)
308
309 self.cur.execute("""CREATE TABLE IF NOT EXISTS files(
310 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
311 path VARCHAR NOT NULL UNIQUE,
312 st_mtime INTEGER NOT NULL,
313 st_size INTEGER NOT NULL,
314 toolkitbuild INTEGER NOT NULL);""")
315
316 self.cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS filepathindex
317 ON files (path);""")
318
319 self.cur.execute("""CREATE TABLE IF NOT EXISTS units(
320 id INTEGER PRIMARY KEY AUTOINCREMENT,
321 unitid VARCHAR NOT NULL,
322 fileid INTEGER NOT NULL,
323 unitindex INTEGER NOT NULL,
324 source VARCHAR NOT NULL,
325 target VARCHAR,
326 state INTEGER,
327 sourcewords INTEGER,
328 targetwords INTEGER);""")
329
330 self.cur.execute("""CREATE INDEX IF NOT EXISTS fileidindex
331 ON units(fileid);""")
332
333 self.cur.execute("""CREATE TABLE IF NOT EXISTS checkerconfigs(
334 configid INTEGER PRIMARY KEY AUTOINCREMENT,
335 config VARCHAR);""")
336
337 self.cur.execute("""CREATE INDEX IF NOT EXISTS configindex
338 ON checkerconfigs(config);""")
339
340 self.cur.execute("""CREATE TABLE IF NOT EXISTS uniterrors(
341 errorid INTEGER PRIMARY KEY AUTOINCREMENT,
342 unitindex INTEGER NOT NULL,
343 fileid INTEGER NOT NULL,
344 configid INTEGER NOT NULL,
345 name VARCHAR NOT NULL,
346 message VARCHAR);""")
347
348 self.cur.execute("""CREATE INDEX IF NOT EXISTS uniterrorindex
349 ON uniterrors(fileid, configid);""")
350
351 @transaction
352 - def _getfileid(self, filename, check_mod_info=True, store=None):
353 """return fileid representing the given file in the statscache.
354
355 if file not in cache or has been updated since last record
356 update, recalculate stats.
357
358 optional argument store can be used to avoid unnessecary
359 reparsing of already loaded translation files.
360
361 store can be a TranslationFile object or a callback that returns one.
362 """
363 if isinstance(filename, str):
364 filename = unicode(filename, sys.getfilesystemencoding())
365 realpath = os.path.realpath(filename)
366 self.cur.execute("""SELECT fileid, st_mtime, st_size FROM files
367 WHERE path=?;""", (realpath,))
368 filerow = self.cur.fetchone()
369 mod_info = get_mod_info(realpath)
370 if filerow:
371 fileid = filerow[0]
372 if not check_mod_info:
373
374 self.cur.execute("""UPDATE files
375 SET st_mtime=?, st_size=?
376 WHERE fileid=?;""", (mod_info[0], mod_info[1], fileid))
377 return fileid
378 if (filerow[1], filerow[2]) == mod_info:
379 return fileid
380
381
382 if callable(store):
383 store = store()
384 else:
385 store = store or factory.getobject(realpath)
386
387 return self._cachestore(store, realpath, mod_info)
388
390 """See if this checker configuration has been used before."""
391 config = str(checker.config.__dict__)
392 self.cur.execute("""SELECT configid, config FROM checkerconfigs WHERE
393 config=?;""", (config,))
394 configrow = self.cur.fetchone()
395 if not configrow or configrow[1] != config:
396 return None
397 else:
398 return configrow[0]
399
400 @transaction
402 """Cache the statistics for the supplied unit(s)."""
403 unitvalues = []
404 for index, unit in enumerate(units):
405 if unit.istranslatable():
406 sourcewords, targetwords = wordsinunit(unit)
407 if unitindex:
408 index = unitindex
409
410 unitvalues.append((unit.getid(), fileid, index, \
411 unit.source, unit.target, \
412 sourcewords, targetwords, \
413 statefordb(unit)))
414 file_totals_record = file_totals_record + FileTotals.new_record(statefordb(unit), sourcewords, targetwords)
415
416 self.cur.executemany("""INSERT INTO units
417 (unitid, fileid, unitindex, source, target, sourcewords, targetwords, state)
418 values (?, ?, ?, ?, ?, ?, ?, ?);""",
419 unitvalues)
420 self.file_totals[fileid] = file_totals_record
421 if unitindex:
422 return state_strings[statefordb(units[0])]
423 return ""
424
425 @transaction
427 """Calculates and caches the statistics of the given store
428 unconditionally."""
429 self.cur.execute("""DELETE FROM files WHERE
430 path=?;""", (realpath,))
431 self.cur.execute("""INSERT INTO files
432 (fileid, path, st_mtime, st_size, toolkitbuild) values (NULL, ?, ?, ?, ?);""",
433 (realpath, mod_info[0], mod_info[1], toolkitversion.build))
434 fileid = self.cur.lastrowid
435 self.cur.execute("""DELETE FROM units WHERE
436 fileid=?""", (fileid,))
437 self._cacheunitstats(store.units, fileid)
438 return fileid
439
441 """Retrieves the statistics for the given file if possible, otherwise
442 delegates to cachestore()."""
443 return self.file_totals[self._getfileid(filename, store=store)]
444
445 @transaction
447 """Helper method for cachestorechecks() and recacheunit()"""
448
449
450 dummy = (-1, fileid, configid, "noerror", "")
451 unitvalues = [dummy]
452
453 errornames = []
454 for index, unit in enumerate(units):
455 if unit.istranslatable():
456
457 if unitindex:
458 index = unitindex
459 failures = checker.run_filters(unit)
460 for checkname, checkmessage in failures.iteritems():
461 unitvalues.append((index, fileid, configid, checkname, checkmessage))
462 errornames.append("check-" + checkname)
463 checker.setsuggestionstore(None)
464
465 if unitindex:
466
467
468 unitvalues.remove(dummy)
469 errornames.append("total")
470
471
472 self.cur.executemany("""INSERT INTO uniterrors
473 (unitindex, fileid, configid, name, message)
474 values (?, ?, ?, ?, ?);""",
475 unitvalues)
476 return errornames
477
478 @transaction
480 """Calculates and caches the error statistics of the given store
481 unconditionally."""
482
483
484 self.cur.execute("""DELETE FROM uniterrors WHERE
485 fileid=?;""", (fileid,))
486 self._cacheunitschecks(store.units, fileid, configid, checker)
487 return fileid
488
490 values = self.cur.execute("""
491 SELECT state, sourcewords, targetwords
492 FROM units
493 WHERE fileid=? AND unitid=?
494 """, (fileid, unitid))
495 result = values.fetchone()
496 if result is not None:
497 return result
498 else:
499 print >> sys.stderr, """WARNING: Database in inconsistent state.
500 fileid %d and unitid %s have no entries in the table units.""" % (fileid, unitid)
501
502
503
504 return []
505
506 @transaction
508 """Recalculate all information for a specific unit. This is necessary
509 for updating all statistics when a translation of a unit took place,
510 for example.
511
512 This method assumes that everything was up to date before (file totals,
513 checks, checker config, etc."""
514 fileid = self._getfileid(filename, check_mod_info=False)
515 configid = self._get_config_id(fileid, checker)
516 unitid = unit.getid()
517
518 totals_without_unit = self.file_totals[fileid] - \
519 FileTotals.new_record(*self.get_unit_stats(fileid, unitid))
520 self.cur.execute("""SELECT unitindex FROM units WHERE
521 fileid=? AND unitid=?;""", (fileid, unitid))
522 unitindex = self.cur.fetchone()[0]
523 self.cur.execute("""DELETE FROM units WHERE
524 fileid=? AND unitid=?;""", (fileid, unitid))
525 state = [self._cacheunitstats([unit], fileid, unitindex, totals_without_unit)]
526
527 self.cur.execute("""DELETE FROM uniterrors WHERE
528 fileid=? AND unitindex=?;""", (fileid, unitindex))
529 if os.path.exists(suggestion_filename(filename)):
530 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
531 state.extend(self._cacheunitschecks([unit], fileid, configid, checker, unitindex))
532 return state
533
534 - def _checkerrors(self, filename, fileid, configid, checker, store):
535 def geterrors():
536 self.cur.execute("""SELECT
537 name,
538 unitindex
539 FROM uniterrors WHERE fileid=? and configid=?
540 ORDER BY unitindex;""", (fileid, configid))
541 return self.cur.fetchone(), self.cur
542
543 first, cur = geterrors()
544 if first is not None:
545 return first, cur
546
547
548
549 if callable(store):
550 store = store()
551 else:
552 store = store or factory.getobject(filename)
553
554 if os.path.exists(suggestion_filename(filename)):
555 checker.setsuggestionstore(factory.getobject(suggestion_filename(filename), ignore=suggestion_extension()))
556 self._cachestorechecks(fileid, store, checker, configid)
557 return geterrors()
558
559 - def _geterrors(self, filename, fileid, configid, checker, store):
560 result = []
561 first, cur = self._checkerrors(filename, fileid, configid, checker, store)
562 result.append(first)
563 result.extend(cur.fetchall())
564 return result
565
566 @transaction
568 configid = self._getstoredcheckerconfig(checker)
569 if configid:
570 return configid
571 self.cur.execute("""INSERT INTO checkerconfigs
572 (configid, config) values (NULL, ?);""",
573 (str(checker.config.__dict__),))
574 return self.cur.lastrowid
575
576 - def filechecks(self, filename, checker, store=None):
577 """Retrieves the error statistics for the given file if possible,
578 otherwise delegates to cachestorechecks()."""
579 fileid = self._getfileid(filename, store=store)
580 configid = self._get_config_id(fileid, checker)
581 values = self._geterrors(filename, fileid, configid, checker, store)
582
583 errors = emptyfilechecks()
584 for value in values:
585 if value[1] == -1:
586 continue
587 checkkey = 'check-' + value[0]
588 if not checkkey in errors:
589 errors[checkkey] = []
590 errors[checkkey].append(value[1])
591
592 return errors
593
595 fileid = self._getfileid(filename)
596 configid = self._get_config_id(fileid, checker)
597 self._checkerrors(filename, fileid, configid, checker, None)
598 self.cur.execute("""SELECT
599 name,
600 unitindex
601 FROM uniterrors
602 WHERE fileid=? and configid=? and name=?;""", (fileid, configid, name))
603 return self.cur.fetchone() is not None
604
606 """Return a dictionary of unit stats mapping sets of unit
607 indices with those states"""
608 stats = emptyfilestats()
609 fileid = self._getfileid(filename, store=store)
610
611 self.cur.execute("""SELECT
612 state,
613 unitindex
614 FROM units WHERE fileid=?
615 ORDER BY unitindex;""", (fileid,))
616 values = self.cur.fetchall()
617
618 for value in values:
619 stats[state_strings[value[0]]].append(value[1])
620 stats["total"].append(value[1])
621
622 return stats
623
624 - def filestats(self, filename, checker, store=None):
631
632 - def unitstats(self, filename, _lang=None, store=None):
633
634
635
636 """Return a dictionary of property names mapping to arrays which
637 map unit indices to property values.
638
639 Please note that this is different from filestats, since filestats
640 supplies sets of unit indices with a given property, whereas this
641 method supplies arrays which map unit indices to given values."""
642 stats = emptyunitstats()
643
644
645 fileid = self._getfileid(filename, store=store)
646
647 self.cur.execute("""SELECT
648 sourcewords, targetwords
649 FROM units WHERE fileid=?
650 ORDER BY unitindex;""", (fileid,))
651
652 for sourcecount, targetcount in self.cur.fetchall():
653 stats["sourcewordcount"].append(sourcecount)
654 stats["targetwordcount"].append(targetcount)
655
656 return stats
657