Package translate :: Package storage :: Module statistics
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.statistics

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2007 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """Module to provide statistics and related functionality. 
 23   
 24  @organization: Zuza Software Foundation 
 25  @copyright: 2007 Zuza Software Foundation 
 26  @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>} 
 27  """ 
 28   
 29  from translate import lang 
 30  from translate.lang import factory 
 31   
 32  # calling classifyunits() in the constructor is probably not ideal. 
 33  # idea: have a property for .classification that calls it if necessary 
 34   
 35  # If we add units or change translations, statistics are out of date 
 36  # Compare with modules/Status.py in pootling that uses a bitmask to 
 37  # filter units 
 38   
 39  # Add support for reading and writing Pootle style .stats files 
 40   
 41  # Consider providing quickstats 
 42   
 43   
44 -class Statistics(object):
45 """Manages statistics for storage objects.""" 46
47 - def __init__(self, sourcelanguage='en', targetlanguage='en', checkerstyle=None):
48 self.sourcelanguage = sourcelanguage 49 self.targetlanguage = targetlanguage 50 self.language = lang.factory.getlanguage(self.sourcelanguage) 51 # self.init_checker(checkerstyle) 52 53 self.classification = {}
54
55 - def init_checker(self, checkerstyle=None):
56 from translate.filters import checks 57 from translate.filters import pofilter 58 checkerclasses = [checkerstyle or checks.StandardChecker, pofilter.StandardPOChecker] 59 self.checker = pofilter.POTeeChecker(checkerclasses=checkerclasses)
60
61 - def fuzzy_units(self):
62 """Return a list of fuzzy units.""" 63 if not self.classification: 64 self.classifyunits() 65 units = self.getunits() 66 return [units[item] for item in self.classification["fuzzy"]]
67
68 - def fuzzy_unitcount(self):
69 """Returns the number of fuzzy units.""" 70 return len(self.fuzzy_units())
71
72 - def translated_units(self):
73 """Return a list of translated units.""" 74 if not self.classification: 75 self.classifyunits() 76 units = self.getunits() 77 return [units[item] for item in self.classification["translated"]]
78
79 - def translated_unitcount(self):
80 """Returns the number of translated units.""" 81 return len(self.translated_units())
82
83 - def untranslated_units(self):
84 """Return a list of untranslated units.""" 85 if not self.classification: 86 self.classifyunits() 87 units = self.getunits() 88 return [units[item] for item in self.classification["blank"]]
89
90 - def untranslated_unitcount(self):
91 """Returns the number of untranslated units.""" 92 93 return len(self.untranslated_units())
94
95 - def getunits(self):
96 """Returns a list of all units in this object.""" 97 return []
98
99 - def get_source_text(self, units):
100 """Joins the unit source strings in a single string of text.""" 101 source_text = "" 102 for unit in units: 103 source_text += unit.source + "\n" 104 plurals = getattr(unit.source, "strings", []) 105 if plurals: 106 source_text += "\n".join(plurals[1:]) 107 return source_text
108
109 - def wordcount(self, text):
110 """Returns the number of words in the given text.""" 111 return len(self.language.words(text))
112
113 - def source_wordcount(self):
114 """Returns the number of words in the source text.""" 115 source_text = self.get_source_text(self.getunits()) 116 return self.wordcount(source_text)
117
118 - def translated_wordcount(self):
119 """Returns the number of translated words in this object.""" 120 121 text = self.get_source_text(self.translated_units()) 122 return self.wordcount(text)
123
124 - def untranslated_wordcount(self):
125 """Returns the number of untranslated words in this object.""" 126 127 text = self.get_source_text(self.untranslated_units()) 128 return self.wordcount(text)
129
130 - def classifyunit(self, unit):
131 """Returns a list of the classes that the unit belongs to. 132 133 @param unit: the unit to classify 134 """ 135 classes = ["total"] 136 if unit.isfuzzy(): 137 classes.append("fuzzy") 138 if unit.gettargetlen() == 0: 139 classes.append("blank") 140 if unit.istranslated(): 141 classes.append("translated") 142 #TODO: we don't handle checking plurals at all yet, as this is tricky... 143 source = unit.source 144 target = unit.target 145 if isinstance(source, str) and isinstance(target, unicode): 146 source = source.decode(getattr(unit, "encoding", "utf-8")) 147 #TODO: decoding should not be done here 148 # checkresult = self.checker.run_filters(unit, source, target) 149 checkresult = {} 150 for checkname, checkmessage in checkresult.iteritems(): 151 classes.append("check-" + checkname) 152 return classes
153
154 - def classifyunits(self):
155 """Makes a dictionary of which units fall into which classifications. 156 157 This method iterates over all units. 158 """ 159 self.classification = {} 160 self.classification["fuzzy"] = [] 161 self.classification["blank"] = [] 162 self.classification["translated"] = [] 163 self.classification["has-suggestion"] = [] 164 self.classification["total"] = [] 165 # for checkname in self.checker.getfilters().keys(): 166 # self.classification["check-" + checkname] = [] 167 for item, unit in enumerate(self.unit_iter()): 168 classes = self.classifyunit(unit) 169 # if self.basefile.getsuggestions(item): 170 # classes.append("has-suggestion") 171 for classname in classes: 172 if classname in self.classification: 173 self.classification[classname].append(item) 174 else: 175 self.classification[classname] = item 176 self.countwords()
177
178 - def countwords(self):
179 """Counts the source and target words in each of the units.""" 180 self.sourcewordcounts = [] 181 self.targetwordcounts = [] 182 for unit in self.unit_iter(): 183 self.sourcewordcounts.append([self.wordcount(text) for text in getattr(unit.source, "strings", [""])]) 184 self.targetwordcounts.append([self.wordcount(text) for text in getattr(unit.target, "strings", [""])])
185
186 - def reclassifyunit(self, item):
187 """Updates the classification of a unit in self.classification. 188 189 @param item: an integer that is an index in .getunits(). 190 """ 191 unit = self.getunits()[item] 192 self.sourcewordcounts[item] = [self.wordcount(text) for text in unit.source.strings] 193 self.targetwordcounts[item] = [self.wordcount(text) for text in unit.target.strings] 194 classes = self.classifyunit(unit) 195 # if self.basefile.getsuggestions(item): 196 # classes.append("has-suggestion") 197 for classname, matchingitems in self.classification.items(): 198 if (classname in classes) != (item in matchingitems): 199 if classname in classes: 200 self.classification[classname].append(item) 201 else: 202 self.classification[classname].remove(item) 203 self.classification[classname].sort()
204 # self.savestats() 205