Package translate :: Package storage :: Module mo
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.mo

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2007 Zuza Software Foundation 
  5  # 
  6  # the function "__str__" was derived from Python v2.4 
  7  #       (Tools/i18n/msgfmt.py - function "generate"): 
  8  #   Written by Martin v. Löwis <loewis@informatik.hu-berlin.de> 
  9  #   Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Python Software Foundation. 
 10  #   All rights reserved. 
 11  #   original license: Python Software Foundation (version 2) 
 12  #  
 13  # 
 14  # This file is part of translate. 
 15  # 
 16  # translate is free software; you can redistribute it and/or modify 
 17  # it under the terms of the GNU General Public License as published by 
 18  # the Free Software Foundation; either version 2 of the License, or 
 19  # (at your option) any later version. 
 20  #  
 21  # translate is distributed in the hope that it will be useful, 
 22  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 23  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 24  # GNU General Public License for more details. 
 25  # 
 26  # You should have received a copy of the GNU General Public License 
 27  # along with translate; if not, write to the Free Software 
 28  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 29  # 
 30   
 31  """Module for parsing Gettext .mo files for translation. 
 32   
 33  The coding of .mo files was produced from U{Gettext documentation 
 34  <http://www.gnu.org/software/gettext/manual/gettext.html#MO-Files>}, 
 35  Pythons msgfmt.py and by observing and testing existing .mo files in the wild. 
 36   
 37  The hash algorithm is implemented for MO files, this should result in  
 38  faster access of the MO file.  The hash is optional for Gettext 
 39  and is not needed for reading or writing MO files, in this implementation 
 40  it is always on and does produce sometimes different results to Gettext 
 41  in very small files. 
 42  """ 
 43   
 44  from translate.storage import base 
 45  from translate.storage import po 
 46  from translate.storage import poheader 
 47  from translate.misc.multistring import multistring 
 48  import struct 
 49  import array 
 50  import re 
 51   
 52  MO_MAGIC_NUMBER = 0x950412deL 
 53   
54 -def mounpack(filename='messages.mo'):
55 """Helper to unpack Gettext MO files into a Python string""" 56 f = open(filename) 57 s = f.read() 58 print "\\x%02x"*len(s) % tuple(map(ord, s)) 59 f.close()
60
61 -def my_swap4(result):
62 c0 = (result >> 0) & 0xff 63 c1 = (result >> 8) & 0xff 64 c2 = (result >> 16) & 0xff 65 c3 = (result >> 24) & 0xff 66 67 return (c0 << 24) | (c1 << 16) | (c2 << 8) | c3
68
69 -def hashpjw(str_param):
70 HASHWORDBITS = 32 71 hval = 0 72 g = None 73 s = str_param 74 for s in str_param: 75 hval = hval << 4 76 hval += ord(s) 77 g = hval & 0xf << (HASHWORDBITS - 4) 78 if (g != 0): 79 hval = hval ^ g >> (HASHWORDBITS - 8) 80 hval = hval ^ g 81 return hval
82
83 -def get_next_prime_number(start):
84 # find the smallest prime number that is greater or equal "start" 85 def is_prime(num): 86 # special small numbers 87 if (num < 2) or (num == 4): 88 return False 89 if (num == 2) or (num == 3): 90 return True 91 # check for numbers > 4 92 for divider in range(2, num/2): 93 if num % divider == 0: 94 return False 95 return True
96 97 candidate = start 98 while not is_prime(candidate): 99 candidate += 1 100 return candidate 101 102
103 -class mounit(base.TranslationUnit):
104 """A class representing a .mo translation message."""
105 - def __init__(self, source=None):
106 self.msgctxt = [] 107 self.msgidcomments = [] 108 super(mounit, self).__init__(source)
109
110 - def getcontext(self):
111 """Get the message context""" 112 # Still need to handle KDE comments 113 if self.msgctxt is None: 114 return None 115 return "".join(self.msgctxt)
116
117 - def isheader(self):
118 """Is this a header entry?""" 119 return self.source == u""
120
121 - def istranslatable(self):
122 """Is this message translateable?""" 123 return bool(self.source)
124
125 -class mofile(base.TranslationStore, poheader.poheader):
126 """A class representing a .mo file.""" 127 UnitClass = mounit 128 Name = _("Gettext MO file") 129 Mimetypes = ["application/x-gettext-catalog", "application/x-mo"] 130 Extensions = ["mo", "gmo"] 131 _binary = True 132
133 - def __init__(self, inputfile=None, unitclass=mounit):
134 self.UnitClass = unitclass 135 base.TranslationStore.__init__(self, unitclass=unitclass) 136 self.filename = '' 137 if inputfile is not None: 138 self.parsestring(inputfile)
139
140 - def __str__(self):
141 """Output a string representation of the MO data file""" 142 # check the header of this file for the copyright note of this function 143 def add_to_hash_table(string, i): 144 V = hashpjw(string) 145 S = hash_size <= 2 and 3 or hash_size # Taken from gettext-0.17:gettext-tools/src/write-mo.c:408-409 146 hash_cursor = V % S; 147 orig_hash_cursor = hash_cursor; 148 increment = 1 + (V % (S - 2)); 149 while True: 150 index = hash_table[hash_cursor] 151 if (index == 0): 152 hash_table[hash_cursor] = i + 1 153 break 154 hash_cursor += increment 155 hash_cursor = hash_cursor % S 156 assert (hash_cursor != orig_hash_cursor)
157 158 # hash_size should be the smallest prime number that is greater 159 # or equal (4 / 3 * N) - where N is the number of keys/units. 160 # see gettext-0.17:gettext-tools/src/write-mo.c:406 161 hash_size = get_next_prime_number(int((len(self.units) * 4) / 3)) 162 if hash_size <= 2: 163 hash_size = 3 164 MESSAGES = {} 165 for unit in self.units: 166 if isinstance(unit.source, multistring): 167 source = "".join(unit.msgidcomments) + "\0".join(unit.source.strings) 168 else: 169 source = "".join(unit.msgidcomments) + unit.source 170 if unit.msgctxt: 171 source = "".join(unit.msgctxt) + "\x04" + source 172 if isinstance(unit.target, multistring): 173 target = "\0".join(unit.target.strings) 174 else: 175 target = unit.target 176 if unit.target: 177 MESSAGES[source.encode("utf-8")] = target 178 # using "I" works for 32- and 64-bit systems, but not for 16-bit! 179 hash_table = array.array("I", [0] * hash_size) 180 keys = MESSAGES.keys() 181 # the keys are sorted in the .mo file 182 keys.sort() 183 offsets = [] 184 ids = strs = '' 185 for i, id in enumerate(keys): 186 # For each string, we need size and file offset. Each string is NUL 187 # terminated; the NUL does not count into the size. 188 # TODO: We don't do any encoding detection from the PO Header 189 add_to_hash_table(id, i) 190 string = MESSAGES[id] # id is already encoded for use as a dictionary key 191 if isinstance(string, unicode): 192 string = string.encode('utf-8') 193 offsets.append((len(ids), len(id), len(strs), len(string))) 194 ids = ids + id + '\0' 195 strs = strs + string + '\0' 196 output = '' 197 # The header is 7 32-bit unsigned integers 198 keystart = 7*4+16*len(keys)+hash_size*4 199 # and the values start after the keys 200 valuestart = keystart + len(ids) 201 koffsets = [] 202 voffsets = [] 203 # The string table first has the list of keys, then the list of values. 204 # Each entry has first the size of the string, then the file offset. 205 for o1, l1, o2, l2 in offsets: 206 koffsets = koffsets + [l1, o1+keystart] 207 voffsets = voffsets + [l2, o2+valuestart] 208 offsets = koffsets + voffsets 209 output = struct.pack("Iiiiiii", 210 MO_MAGIC_NUMBER, # Magic 211 0, # Version 212 len(keys), # # of entries 213 7*4, # start of key index 214 7*4+len(keys)*8, # start of value index 215 hash_size, 7*4+2*(len(keys)*8)) # size and offset of hash table 216 # additional data is not necessary for empty mo files 217 if (len(keys) > 0): 218 output = output + array.array("i", offsets).tostring() 219 output = output + hash_table.tostring() 220 output = output + ids 221 output = output + strs 222 return output
223
224 - def parse(self, input):
225 """parses the given file or file source string""" 226 if hasattr(input, 'name'): 227 self.filename = input.name 228 elif not getattr(self, 'filename', ''): 229 self.filename = '' 230 if hasattr(input, "read"): 231 mosrc = input.read() 232 input.close() 233 input = mosrc 234 little, = struct.unpack("<L", input[:4]) 235 big, = struct.unpack(">L", input[:4]) 236 if little == MO_MAGIC_NUMBER: 237 endian = "<" 238 elif big == MO_MAGIC_NUMBER: 239 endian = ">" 240 else: 241 raise ValueError("This is not an MO file") 242 magic, version, lenkeys, startkey, startvalue, sizehash, offsethash = struct.unpack("%sLiiiiii" % endian, input[:(7*4)]) 243 if version > 1: 244 raise ValueError("Unable to process MO files with versions > 1. This is a %d version MO file" % version) 245 encoding = 'UTF-8' 246 for i in range(lenkeys): 247 nextkey = startkey+(i*2*4) 248 nextvalue = startvalue+(i*2*4) 249 klength, koffset = struct.unpack("%sii" % endian, input[nextkey:nextkey+(2*4)]) 250 vlength, voffset = struct.unpack("%sii" % endian, input[nextvalue:nextvalue+(2*4)]) 251 source = input[koffset:koffset+klength] 252 context = None 253 if "\x04" in source: 254 context, source = source.split("\x04") 255 # Still need to handle KDE comments 256 source = multistring(source.split("\0"), encoding=encoding) 257 if source == "": 258 charset = re.search("charset=([^\\s]+)", input[voffset:voffset+vlength]) 259 if charset: 260 encoding = po.encodingToUse(charset.group(1)) 261 target = multistring(input[voffset:voffset+vlength].split("\0"), encoding=encoding) 262 newunit = mounit(source) 263 newunit.settarget(target) 264 if context is not None: 265 newunit.msgctxt.append(context) 266 self.addunit(newunit)
267