1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Module for parsing Qt .qm files
24
25 @note: based on documentation from Gettext's .qm implementation (see write-qt.c) and on observation
26 of the output of lrelease.
27 @note: Certain deprecated section tags are not implemented. These will break and print out
28 the missing tag. They are easy to implement and should follow the structure in 03
29 (Translation). We could find no examples that use these so we'd rather leave it
30 unimplemented until we actually have test data.
31 @note: Many .qm files are unable to be parsed as they do not have the source text. We assume
32 that since they use a hash table to lookup the data there is actually no need for the
33 source text. It seems however that in Qt4's lrelease all data is included in the resultant .qm
34 file.
35 @todo: We can only parse, not create, a .qm file. The main issue is that we need to
36 implement the hashing algorithm (which seems to be identical to the Gettext hash algorithm). Unlike
37 Gettext it seems that the hash is required, but that has not been validated.
38 @todo: The code can parse files correctly. But it could be cleaned up to be more readable, especially
39 the part that breaks the file into sections.
40
41 U{http://qt.gitorious.org/+kde-developers/qt/kde-qt/blobs/master/tools/linguist/shared/qm.cpp}
42 U{Plural information<http://qt.gitorious.org/+kde-developers/qt/kde-qt/blobs/master/tools/linguist/shared/numerus.cpp>}
43 U{QLocale languages<http://docs.huihoo.com/qt/4.5/qlocale.html#Language-enum>}
44 """
45
46 from translate.storage import base
47 from translate.misc.multistring import multistring
48 import codecs
49 import struct
50 import sys
51
52 QM_MAGIC_NUMBER = (0x3CB86418L, 0xCAEF9C95L, 0xCD211CBFL, 0x60A1BDDDL)
53
55 """Helper to unpack Qt .qm files into a Python string"""
56 f = open(qmfile)
57 s = f.read()
58 print "\\x%02x"*len(s) % tuple(map(ord, s))
59 f.close()
60
61 -class qmunit(base.TranslationUnit):
62 """A class representing a .qm translation message."""
65
66 -class qmfile(base.TranslationStore):
67 """A class representing a .qm file."""
68 UnitClass = qmunit
69 Name = _("Qt .qm file")
70 Mimetypes = ["application/x-qm"]
71 Extensions = ["qm"]
72 _binary = True
80
82 """Output a string representation of the .qm data file"""
83 return ""
84
86 """parses the given file or file source string"""
87 if hasattr(input, 'name'):
88 self.filename = input.name
89 elif not getattr(self, 'filename', ''):
90 self.filename = ''
91 if hasattr(input, "read"):
92 qmsrc = input.read()
93 input.close()
94 input = qmsrc
95 if len(input) < 16:
96 raise ValueError("This is not a .qm file: file empty or too small")
97 magic = struct.unpack(">4L", input[:16])
98 if magic != QM_MAGIC_NUMBER:
99 raise ValueError("This is not a .qm file: invalid magic number")
100 startsection = 16
101 sectionheader = 5
102 while startsection < len(input):
103 section_type, length = struct.unpack(">bL", input[startsection:startsection+sectionheader])
104 if section_type == 0x42:
105
106 hashash = True
107 hash_start = startsection+sectionheader
108 hash_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
109 elif section_type == 0x69:
110
111 hasmessages = True
112 messages_start = startsection+sectionheader
113 messages_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
114 elif section_type == 0x2f:
115
116 hascontexts = True
117 contexts_start = startsection+sectionheader
118 contexts_data = struct.unpack(">%db" % length, input[startsection+sectionheader:startsection+sectionheader+length])
119 startsection = startsection+sectionheader+length
120 pos = messages_start
121 source = target = None
122 while pos < messages_start + len(messages_data):
123 subsection, = struct.unpack(">b", input[pos:pos+1])
124 if subsection == 0x01:
125
126 pos = pos+1
127 if not source is None and not target is None:
128 newunit = self.addsourceunit(source)
129 newunit.target = target
130 source = target = None
131 else:
132 raise ValueError("Old .qm format with no source defined")
133 continue
134
135 pos = pos+1
136 length, = struct.unpack(">l", input[pos:pos+4])
137 if subsection == 0x03:
138 if length != -1:
139 raw, = struct.unpack(">%ds" % length, input[pos+4:pos+4+length])
140 string, templen = codecs.utf_16_be_decode(raw)
141 if target:
142 target.strings.append(string)
143 else:
144 target = multistring(string)
145 pos = pos+4+length
146 else:
147 target = ""
148 pos = pos+4
149
150 elif subsection == 0x06:
151 source = input[pos+4:pos+4+length].decode('iso-8859-1')
152
153 pos = pos+4+length
154 elif subsection == 0x07:
155 context = input[pos+4:pos+4+length].decode('iso-8859-1')
156
157 pos = pos+4+length
158 elif subsection == 0x08:
159 comment = input[pos+4:pos+4+length]
160
161 pos = pos+4+length
162 elif subsection == 0x05:
163 hash = input[pos:pos+4]
164
165 pos = pos+4
166 else:
167 if subsection == 0x02:
168 subsection_name = "SourceText16"
169 elif subsection == 0x04:
170 subsection_name = "Context16"
171 else:
172 subsection_name = "Unkown"
173 print >> sys.stderr, "Unimplemented: %s %s" % (subsection, subsection_name)
174 return
175
177 raise Exception("Writing of .qm files is not supported yet")
178