Package translate :: Package storage :: Module statistics
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.statistics

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2007 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """Module to provide statistics and related functionality. 
 23   
 24  @organization: Zuza Software Foundation 
 25  @copyright: 2007 Zuza Software Foundation 
 26  @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>} 
 27  """ 
 28   
 29  from translate import lang 
 30  from translate.lang import factory 
 31   
 32  # calling classifyunits() in the constructor is probably not ideal.  
 33  # idea: have a property for .classification that calls it if necessary 
 34   
 35  # If we add units or change translations, statistics are out of date 
 36  # Compare with modules/Status.py in pootling that uses a bitmask to  
 37  # filter units 
 38   
 39  # Add support for reading and writing Pootle style .stats files 
 40   
 41  # Consider providing quickstats 
 42   
43 -class Statistics(object):
44 """Manages statistics for storage objects.""" 45
46 - def __init__(self, sourcelanguage='en', targetlanguage='en', checkerstyle=None):
47 self.sourcelanguage = sourcelanguage 48 self.targetlanguage = targetlanguage 49 self.language = lang.factory.getlanguage(self.sourcelanguage) 50 # self.init_checker(checkerstyle) 51 52 self.classification = {}
53
54 - def init_checker(self, checkerstyle=None):
55 from translate.filters import checks 56 from translate.filters import pofilter 57 checkerclasses = [checkerstyle or checks.StandardChecker, pofilter.StandardPOChecker] 58 self.checker = pofilter.POTeeChecker(checkerclasses=checkerclasses)
59
60 - def fuzzy_units(self):
61 """Return a list of fuzzy units.""" 62 if not self.classification: 63 self.classifyunits() 64 units = self.getunits() 65 return [units[item] for item in self.classification["fuzzy"]]
66
67 - def fuzzy_unitcount(self):
68 """Returns the number of fuzzy units.""" 69 return len(self.fuzzy_units())
70
71 - def translated_units(self):
72 """Return a list of translated units.""" 73 if not self.classification: 74 self.classifyunits() 75 units = self.getunits() 76 return [units[item] for item in self.classification["translated"]]
77
78 - def translated_unitcount(self):
79 """Returns the number of translated units.""" 80 return len(self.translated_units())
81
82 - def untranslated_units(self):
83 """Return a list of untranslated units.""" 84 if not self.classification: 85 self.classifyunits() 86 units = self.getunits() 87 return [units[item] for item in self.classification["blank"]]
88
89 - def untranslated_unitcount(self):
90 """Returns the number of untranslated units.""" 91 92 return len(self.untranslated_units())
93
94 - def getunits(self):
95 """Returns a list of all units in this object.""" 96 return []
97
98 - def get_source_text(self, units):
99 """Joins the unit source strings in a single string of text.""" 100 source_text = "" 101 for unit in units: 102 source_text += unit.source + "\n" 103 plurals = getattr(unit.source, "strings", []) 104 if plurals: 105 source_text += "\n".join(plurals[1:]) 106 return source_text
107
108 - def wordcount(self, text):
109 """Returns the number of words in the given text.""" 110 return len(self.language.words(text))
111
112 - def source_wordcount(self):
113 """Returns the number of words in the source text.""" 114 source_text = self.get_source_text(self.getunits()) 115 return self.wordcount(source_text)
116
117 - def translated_wordcount(self):
118 """Returns the number of translated words in this object.""" 119 120 text = self.get_source_text(self.translated_units()) 121 return self.wordcount(text)
122
123 - def untranslated_wordcount(self):
124 """Returns the number of untranslated words in this object.""" 125 126 text = self.get_source_text(self.untranslated_units()) 127 return self.wordcount(text)
128
129 - def classifyunit(self, unit):
130 """Returns a list of the classes that the unit belongs to. 131 132 @param unit: the unit to classify 133 """ 134 classes = ["total"] 135 if unit.isfuzzy(): 136 classes.append("fuzzy") 137 if unit.gettargetlen() == 0: 138 classes.append("blank") 139 if unit.istranslated(): 140 classes.append("translated") 141 #TODO: we don't handle checking plurals at all yet, as this is tricky... 142 source = unit.source 143 target = unit.target 144 if isinstance(source, str) and isinstance(target, unicode): 145 source = source.decode(getattr(unit, "encoding", "utf-8")) 146 #TODO: decoding should not be done here 147 # checkresult = self.checker.run_filters(unit, source, target) 148 checkresult = {} 149 for checkname, checkmessage in checkresult.iteritems(): 150 classes.append("check-" + checkname) 151 return classes
152
153 - def classifyunits(self):
154 """Makes a dictionary of which units fall into which classifications. 155 156 This method iterates over all units. 157 """ 158 self.classification = {} 159 self.classification["fuzzy"] = [] 160 self.classification["blank"] = [] 161 self.classification["translated"] = [] 162 self.classification["has-suggestion"] = [] 163 self.classification["total"] = [] 164 # for checkname in self.checker.getfilters().keys(): 165 # self.classification["check-" + checkname] = [] 166 for item, unit in enumerate(self.unit_iter()): 167 classes = self.classifyunit(unit) 168 # if self.basefile.getsuggestions(item): 169 # classes.append("has-suggestion") 170 for classname in classes: 171 if classname in self.classification: 172 self.classification[classname].append(item) 173 else: 174 self.classification[classname] = item 175 self.countwords()
176
177 - def countwords(self):
178 """Counts the source and target words in each of the units.""" 179 self.sourcewordcounts = [] 180 self.targetwordcounts = [] 181 for unit in self.unit_iter(): 182 self.sourcewordcounts.append([self.wordcount(text) for text in getattr(unit.source, "strings", [""])]) 183 self.targetwordcounts.append([self.wordcount(text) for text in getattr(unit.target, "strings", [""])])
184
185 - def reclassifyunit(self, item):
186 """Updates the classification of a unit in self.classification. 187 188 @param item: an integer that is an index in .getunits(). 189 """ 190 unit = self.getunits()[item] 191 self.sourcewordcounts[item] = [self.wordcount(text) for text in unit.source.strings] 192 self.targetwordcounts[item] = [self.wordcount(text) for text in unit.target.strings] 193 classes = self.classifyunit(unit) 194 # if self.basefile.getsuggestions(item): 195 # classes.append("has-suggestion") 196 for classname, matchingitems in self.classification.items(): 197 if (classname in classes) != (item in matchingitems): 198 if classname in classes: 199 self.classification[classname].append(item) 200 else: 201 self.classification[classname].remove(item) 202 self.classification[classname].sort()
203 # self.savestats() 204