Package translate :: Package convert :: Module dtd2po
[hide private]
[frames] | no frames]

Source Code for Module translate.convert.dtd2po

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2006 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """script to convert a mozilla .dtd UTF-8 localization format to a 
 23  gettext .po localization file using the po and dtd modules, and the 
 24  dtd2po convertor class which is in this module 
 25  You can convert back to .dtd using po2dtd.py""" 
 26   
 27  from translate.storage import po 
 28  from translate.storage import dtd 
 29  from translate.misc import quote 
 30  from translate.convert import accesskey as accesskeyfn 
 31   
32 -def is_css_entity(entity):
33 """Says if the given entity is likely to contain CSS that should not be 34 translated.""" 35 if '.' in entity: 36 prefix, suffix = entity.rsplit('.', 1) 37 if suffix in ["height", "width", "unixWidth", "macWidth", "size"] or suffix.startswith("style"): 38 return True 39 return False
40
41 -class dtd2po:
42 - def __init__(self, blankmsgstr=False, duplicatestyle="msgctxt"):
43 self.currentgroup = None 44 self.blankmsgstr = blankmsgstr 45 self.duplicatestyle = duplicatestyle
46
47 - def convertcomments(self, thedtd, thepo):
48 entity = quote.rstripeol(thedtd.entity) 49 if len(entity) > 0: 50 thepo.addlocation(thedtd.entity) 51 for commenttype, comment in thedtd.comments: 52 # handle groups 53 if (commenttype == "locgroupstart"): 54 groupcomment = comment.replace('BEGIN','GROUP') 55 self.currentgroup = groupcomment 56 elif (commenttype == "locgroupend"): 57 groupcomment = comment.replace('END','GROUP') 58 self.currentgroup = None 59 # handle automatic comment 60 if commenttype == "automaticcomment": 61 thepo.addnote(comment, origin="developer") 62 # handle normal comments 63 else: 64 thepo.addnote(quote.stripcomment(comment), origin="developer") 65 # handle group stuff 66 if self.currentgroup is not None: 67 thepo.addnote(quote.stripcomment(self.currentgroup), origin="translator") 68 if is_css_entity(entity): 69 thepo.addnote("Do not translate this. Only change the numeric values if you need this dialogue box to appear bigger", origin="developer")
70
71 - def convertstrings(self, thedtd, thepo):
72 # extract the string, get rid of quoting 73 unquoted = dtd.unquotefromdtd(thedtd.definition).replace("\r", "") 74 # escape backslashes... but not if they're for a newline 75 # unquoted = unquoted.replace("\\", "\\\\").replace("\\\\n", "\\n") 76 # now split the string into lines and quote them 77 lines = unquoted.split('\n') 78 while lines and not lines[0].strip(): 79 del lines[0] 80 while lines and not lines[-1].strip(): 81 del lines[-1] 82 # quotes have been escaped already by escapeforpo, so just add the start and end quotes 83 if len(lines) > 1: 84 thepo.source = "\n".join([lines[0].rstrip() + ' '] + \ 85 [line.strip() + ' ' for line in lines[1:-1]] + \ 86 [lines[-1].lstrip()]) 87 elif lines: 88 thepo.source = lines[0] 89 else: 90 thepo.source = "" 91 thepo.target = ""
92
93 - def convertunit(self, thedtd):
94 """converts a dtd unit to a po unit, returns None if empty or not for translation""" 95 if thedtd is None: 96 return None 97 if getattr(thedtd, "entityparameter", None) == "SYSTEM": 98 return None 99 thepo = po.pounit(encoding="UTF-8") 100 # remove unwanted stuff 101 for commentnum in range(len(thedtd.comments)): 102 commenttype, locnote = thedtd.comments[commentnum] 103 # if this is a localization note 104 if commenttype == 'locnote': 105 # parse the locnote into the entity and the actual note 106 typeend = quote.findend(locnote,'LOCALIZATION NOTE') 107 # parse the id 108 idstart = locnote.find('(', typeend) 109 if idstart == -1: continue 110 idend = locnote.find(')', idstart+1) 111 entity = locnote[idstart+1:idend].strip() 112 # parse the actual note 113 actualnotestart = locnote.find(':', idend+1) 114 actualnoteend = locnote.find('-->', idend) 115 actualnote = locnote[actualnotestart+1:actualnoteend].strip() 116 # if it's for this entity, process it 117 if thedtd.entity == entity: 118 # if it says don't translate (and nothing more), 119 if actualnote.startswith("DONT_TRANSLATE"): 120 # take out the entity,definition and the DONT_TRANSLATE comment 121 thedtd.entity = "" 122 thedtd.definition = "" 123 del thedtd.comments[commentnum] 124 # finished this for loop 125 break 126 else: 127 # convert it into an automatic comment, to be processed by convertcomments 128 thedtd.comments[commentnum] = ("automaticcomment", actualnote) 129 # do a standard translation 130 self.convertcomments(thedtd, thepo) 131 self.convertstrings(thedtd, thepo) 132 if thepo.isblank() and not thepo.getlocations(): 133 return None 134 else: 135 return thepo
136
137 - def convertmixedunit(self, labeldtd, accesskeydtd):
138 labelpo = self.convertunit(labeldtd) 139 accesskeypo = self.convertunit(accesskeydtd) 140 if labelpo is None: 141 return accesskeypo 142 if accesskeypo is None: 143 return labelpo 144 thepo = po.pounit(encoding="UTF-8") 145 thepo.addlocations(labelpo.getlocations()) 146 thepo.addlocations(accesskeypo.getlocations()) 147 thepo.msgidcomment = thepo._extract_msgidcomments() + labelpo._extract_msgidcomments() 148 thepo.msgidcomment = thepo._extract_msgidcomments() + accesskeypo._extract_msgidcomments() 149 thepo.addnote(labelpo.getnotes("developer"), "developer") 150 thepo.addnote(accesskeypo.getnotes("developer"), "developer") 151 thepo.addnote(labelpo.getnotes("translator"), "translator") 152 thepo.addnote(accesskeypo.getnotes("translator"), "translator") 153 # redo the strings from original dtd... 154 label = dtd.unquotefromdtd(labeldtd.definition).decode('UTF-8') 155 accesskey = dtd.unquotefromdtd(accesskeydtd.definition).decode('UTF-8') 156 label = accesskeyfn.combine(label, accesskey) 157 if label is None: 158 return None 159 thepo.source = label 160 thepo.target = "" 161 return thepo
162
163 - def findmixedentities(self, thedtdfile):
164 """creates self.mixedentities from the dtd file...""" 165 self.mixedentities = {} # those entities which have a .label/.title and .accesskey combined 166 for entity in thedtdfile.index.keys(): 167 for labelsuffix in dtd.labelsuffixes: 168 if entity.endswith(labelsuffix): 169 entitybase = entity[:entity.rfind(labelsuffix)] 170 # see if there is a matching accesskey in this line, making this a 171 # mixed entity 172 for akeytype in dtd.accesskeysuffixes: 173 if thedtdfile.index.has_key(entitybase + akeytype): 174 # add both versions to the list of mixed entities 175 self.mixedentities[entity] = {} 176 self.mixedentities[entitybase+akeytype] = {}
177 # check if this could be a mixed entity (labelsuffix and ".accesskey") 178
179 - def convertdtdunit(self, thedtdfile, thedtd, mixbucket="dtd"):
180 """converts a dtd unit from thedtdfile to a po unit, handling mixed entities along the way...""" 181 # keep track of whether accesskey and label were combined 182 if thedtd.entity in self.mixedentities: 183 # use special convertmixed unit which produces one pounit with 184 # both combined for the label and None for the accesskey 185 alreadymixed = self.mixedentities[thedtd.entity].get(mixbucket, None) 186 if alreadymixed: 187 # we are successfully throwing this away... 188 return None 189 elif alreadymixed is None: 190 # depending on what we come across first, work out the label and the accesskey 191 labeldtd, accesskeydtd = None, None 192 labelentity, accesskeyentity = None, None 193 for labelsuffix in dtd.labelsuffixes: 194 if thedtd.entity.endswith(labelsuffix): 195 entitybase = thedtd.entity[:thedtd.entity.rfind(labelsuffix)] 196 for akeytype in dtd.accesskeysuffixes: 197 if thedtdfile.index.has_key(entitybase + akeytype): 198 labelentity, labeldtd = thedtd.entity, thedtd 199 accesskeyentity = labelentity[:labelentity.rfind(labelsuffix)]+akeytype 200 accesskeydtd = thedtdfile.index[accesskeyentity] 201 break 202 else: 203 for akeytype in dtd.accesskeysuffixes: 204 if thedtd.entity.endswith(akeytype): 205 accesskeyentity, accesskeydtd = thedtd.entity, thedtd 206 for labelsuffix in dtd.labelsuffixes: 207 labelentity = accesskeyentity[:accesskeyentity.rfind(akeytype)]+labelsuffix 208 if thedtdfile.index.has_key(labelentity): 209 labeldtd = thedtdfile.index[labelentity] 210 break 211 else: 212 labelentity = None 213 accesskeyentity = None 214 thepo = self.convertmixedunit(labeldtd, accesskeydtd) 215 if thepo is not None: 216 if accesskeyentity is not None: 217 self.mixedentities[accesskeyentity][mixbucket] = True 218 if labelentity is not None: 219 self.mixedentities[labelentity][mixbucket] = True 220 return thepo 221 else: 222 # otherwise the mix failed. add each one separately and remember they weren't mixed 223 if accesskeyentity is not None: 224 self.mixedentities[accesskeyentity][mixbucket] = False 225 if labelentity is not None: 226 self.mixedentities[labelentity][mixbucket] = False 227 return self.convertunit(thedtd)
228
229 - def convertstore(self, thedtdfile):
230 thetargetfile = po.pofile() 231 targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit", x_accelerator_marker="&") 232 targetheader.addnote("extracted from %s" % thedtdfile.filename, "developer") 233 234 thedtdfile.makeindex() 235 self.findmixedentities(thedtdfile) 236 # go through the dtd and convert each unit 237 for thedtd in thedtdfile.units: 238 if thedtd.isnull(): 239 continue 240 thepo = self.convertdtdunit(thedtdfile, thedtd) 241 if thepo is not None: 242 thetargetfile.addunit(thepo) 243 thetargetfile.removeduplicates(self.duplicatestyle) 244 return thetargetfile
245
246 - def mergestore(self, origdtdfile, translateddtdfile):
247 thetargetfile = po.pofile() 248 targetheader = thetargetfile.init_headers(charset="UTF-8", encoding="8bit") 249 targetheader.addnote("extracted from %s, %s" % (origdtdfile.filename, translateddtdfile.filename), "developer") 250 251 origdtdfile.makeindex() 252 self.findmixedentities(origdtdfile) 253 translateddtdfile.makeindex() 254 self.findmixedentities(translateddtdfile) 255 # go through the dtd files and convert each unit 256 for origdtd in origdtdfile.units: 257 if origdtd.isnull(): 258 continue 259 origpo = self.convertdtdunit(origdtdfile, origdtd, mixbucket="orig") 260 if origdtd.entity in self.mixedentities: 261 mixedentitydict = self.mixedentities[origdtd.entity] 262 if "orig" not in mixedentitydict: 263 # this means that the entity is mixed in the translation, but not the original - treat as unmixed 264 mixbucket = "orig" 265 del self.mixedentities[origdtd.entity] 266 elif mixedentitydict["orig"]: 267 # the original entity is already mixed successfully 268 mixbucket = "translate" 269 else: 270 # ?? 271 mixbucket = "orig" 272 else: 273 mixbucket = "translate" 274 if origpo is None: 275 # this means its a mixed entity (with accesskey) that's already been dealt with) 276 continue 277 if origdtd.entity in translateddtdfile.index: 278 translateddtd = translateddtdfile.index[origdtd.entity] 279 translatedpo = self.convertdtdunit(translateddtdfile, translateddtd, mixbucket=mixbucket) 280 else: 281 translatedpo = None 282 if origpo is not None: 283 if translatedpo is not None and not self.blankmsgstr: 284 origpo.target = translatedpo.source 285 thetargetfile.addunit(origpo) 286 thetargetfile.removeduplicates(self.duplicatestyle) 287 return thetargetfile
288
289 -def convertdtd(inputfile, outputfile, templatefile, pot=False, duplicatestyle="msgctxt"):
290 """reads in inputfile and templatefile using dtd, converts using dtd2po, writes to outputfile""" 291 inputstore = dtd.dtdfile(inputfile) 292 convertor = dtd2po(blankmsgstr=pot, duplicatestyle=duplicatestyle) 293 if templatefile is None: 294 outputstore = convertor.convertstore(inputstore) 295 else: 296 templatestore = dtd.dtdfile(templatefile) 297 outputstore = convertor.mergestore(templatestore, inputstore) 298 if outputstore.isempty(): 299 return 0 300 outputfile.write(str(outputstore)) 301 return 1
302
303 -def main(argv=None):
304 from translate.convert import convert 305 formats = {"dtd": ("po", convertdtd), ("dtd", "dtd"): ("po", convertdtd)} 306 parser = convert.ConvertOptionParser(formats, usetemplates=True, usepots=True, description=__doc__) 307 parser.add_duplicates_option() 308 parser.passthrough.append("pot") 309 parser.run(argv)
310 311 if __name__ == '__main__': 312 main() 313