Package translate :: Package storage :: Module qm
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.qm

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2007 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21  # 
 22   
 23  """Module for parsing Qt .qm files 
 24   
 25  @note: based on documentation from Gettext's .qm implementation (see write-qt.c) and on observation 
 26  of the output of lrelease. 
 27  @note: Certain deprecated section tags are not implemented.  These will break and print out 
 28  the missing tag.  They are easy to implement and should follow the structure in 03  
 29  (Translation).  We could find no examples that use these so we'd rather leave it  
 30  unimplemented until we actually have test data. 
 31  @note: Many .qm files are unable to be parsed as they do not have the source text.  We assume 
 32  that since they use a hash table to lookup the data there is actually no need for the  
 33  source text.  It seems however that in Qt4's lrelease all data is included in the resultant .qm 
 34  file. 
 35  @todo: We can only parse, not create, a .qm file.  The main issue is that we need to  
 36  implement the hashing algorithm (which seems to be identical to the Gettext hash algorithm).  Unlike 
 37  Gettext it seems that the hash is required, but that has not been validated. 
 38  @todo: The code can parse files correctly.  But it could be cleaned up to be more readable, especially  
 39  the part that breaks the file into sections. 
 40   
 41  U{http://qt.gitorious.org/+kde-developers/qt/kde-qt/blobs/master/tools/linguist/shared/qm.cpp} 
 42  U{Plural information<http://qt.gitorious.org/+kde-developers/qt/kde-qt/blobs/master/tools/linguist/shared/numerus.cpp>} 
 43  U{QLocale languages<http://docs.huihoo.com/qt/4.5/qlocale.html#Language-enum>} 
 44  """ 
 45   
 46  from translate.storage import base 
 47  from translate.misc.multistring import multistring 
 48  import codecs 
 49  import struct 
 50  import sys 
 51   
 52  QM_MAGIC_NUMBER = (0x3CB86418L, 0xCAEF9C95L, 0xCD211CBFL, 0x60A1BDDDL) 
 53   
54 -def qmunpack(qmfile='messages.mo'):
55 """Helper to unpack Qt .qm files into a Python string""" 56 f = open(qmfile) 57 s = f.read() 58 print "\\x%02x"*len(s) % tuple(map(ord, s)) 59 f.close()
60
61 -class qmunit(base.TranslationUnit):
62 """A class representing a .qm translation message."""
63 - def __init__(self, source=None):
64 super(qmunit, self).__init__(source)
65
66 -class qmfile(base.TranslationStore):
67 """A class representing a .qm file.""" 68 UnitClass = qmunit 69 Name = _("Qt .qm file") 70 Mimetypes = ["application/x-qm"] 71 Extensions = ["qm"] 72 _binary = True
73 - def __init__(self, inputfile=None, unitclass=qmunit):
74 self.UnitClass = unitclass 75 base.TranslationStore.__init__(self, unitclass=unitclass) 76 self.units = [] 77 self.filename = '' 78 if inputfile is not None: 79 self.parsestring(inputfile)
80
81 - def __str__(self):
82 """Output a string representation of the .qm data file""" 83 return ""
84
85 - def parse(self, input):
86 """parses the given file or file source string""" 87 if hasattr(input, 'name'): 88 self.filename = input.name 89 elif not getattr(self, 'filename', ''): 90 self.filename = '' 91 if hasattr(input, "read"): 92 qmsrc = input.read() 93 input.close() 94 input = qmsrc 95 if len(input) < 16: 96 raise ValueError("This is not a .qm file: file empty or too small") 97 magic = struct.unpack(">4L", input[:16]) 98 if magic != QM_MAGIC_NUMBER: 99 raise ValueError("This is not a .qm file: invalid magic number") 100 startsection = 16 101 sectionheader = 5 102 while startsection < len(input): 103 section_type, length = struct.unpack(">bL", input[startsection:startsection+sectionheader]) 104 if section_type == 0x42: 105 #print "Section: hash" 106 hashash = True 107 hash_start = startsection+sectionheader 108 hash_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length]) 109 elif section_type == 0x69: 110 #print "Section: messages" 111 hasmessages = True 112 messages_start = startsection+sectionheader 113 messages_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length]) 114 elif section_type == 0x2f: 115 #print "Section: contexts" 116 hascontexts = True 117 contexts_start = startsection+sectionheader 118 contexts_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length]) 119 startsection = startsection+sectionheader+length 120 pos = messages_start 121 source = target = None 122 while pos < messages_start + len(messages_data): 123 subsection, = struct.unpack(">b", input[pos:pos+1]) 124 if subsection == 0x01: # End 125 #print "End" 126 pos = pos+1 127 if not source is None and not target is None: 128 newunit = self.addsourceunit(source) 129 newunit.target = target 130 source = target = None 131 else: 132 raise ValueError("Old .qm format with no source defined") 133 continue 134 #print pos, subsection 135 pos = pos+1 136 length, = struct.unpack(">l", input[pos:pos+4]) 137 if subsection == 0x03: # Translation 138 if length != -1: 139 raw, = struct.unpack(">%ds" % length, input[pos+4:pos+4+length]) 140 string, templen = codecs.utf_16_be_decode(raw) 141 if target: 142 target.strings.append(string) 143 else: 144 target = multistring(string) 145 pos = pos+4+length 146 else: 147 target = "" 148 pos = pos+4 149 #print "Translation: %s" % target.encode('utf-8') 150 elif subsection == 0x06: # SourceText 151 source = input[pos+4:pos+4+length].decode('iso-8859-1') 152 #print "SourceText: %s" % source 153 pos = pos+4+length 154 elif subsection == 0x07: # Context 155 context = input[pos+4:pos+4+length].decode('iso-8859-1') 156 #print "Context: %s" % context 157 pos = pos+4+length 158 elif subsection == 0x08: # Disambiguating-comment 159 comment = input[pos+4:pos+4+length] 160 #print "Disambiguating-comment: %s" % comment 161 pos = pos+4+length 162 elif subsection == 0x05: # hash 163 hash = input[pos:pos+4] 164 #print "Hash: %s" % hash 165 pos = pos+4 166 else: 167 if subsection == 0x02: # SourceText16 168 subsection_name = "SourceText16" 169 elif subsection == 0x04: # Context16 170 subsection_name = "Context16" 171 else: 172 subsection_name = "Unkown" 173 print >> sys.stderr, "Unimplemented: %s %s" % (subsection, subsection_name) 174 return
175
176 - def savefile(self, storefile):
177 raise Exception("Writing of .qm files is not supported yet")
178