Package translate :: Package lang :: Module data
[hide private]
[frames] | no frames]

Source Code for Module translate.lang.data

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2007-2009 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """This module stores information and functionality that relates to plurals.""" 
 23   
 24  import unicodedata 
 25   
 26  from translate.storage.placeables import StringElem 
 27   
 28   
 29  languages = { 
 30  'af': (u'Afrikaans', 2, '(n != 1)'), 
 31  'ak': (u'Akan', 2, 'n > 1'), 
 32  'am': (u'Amharic', 2, 'n > 1'), 
 33  'ar': (u'Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5'), 
 34  'arn': (u'Mapudungun; Mapuche', 2, 'n > 1'), 
 35  'ast': (u'Asturian; Bable; Leonese; Asturleonese', 2, 'n != 1'), 
 36  'az': (u'Azerbaijani', 2, '(n != 1)'), 
 37  'be': (u'Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 38  'bg': (u'Bulgarian', 2, '(n != 1)'), 
 39  'bn': (u'Bengali', 2, '(n != 1)'), 
 40  'bn_IN': (u'Bengali (India)', 2, '(n != 1)'), 
 41  'bo': (u'Tibetan', 1, '0'), 
 42  'br': (u'Breton', 2, 'n > 1'), 
 43  'bs': (u'Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 44  'ca': (u'Catalan; Valencian', 2, '(n != 1)'), 
 45  'cs': (u'Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'), 
 46  'csb': (u'Kashubian', 3, 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'), 
 47  'cy': (u'Welsh', 2, '(n==2) ? 1 : 0'), 
 48  'da': (u'Danish', 2, '(n != 1)'), 
 49  'de': (u'German', 2, '(n != 1)'), 
 50  'dz': (u'Dzongkha', 1, '0'), 
 51  'el': (u'Greek, Modern (1453-)', 2, '(n != 1)'), 
 52  'en': (u'English', 2, '(n != 1)'), 
 53  'en_GB': (u'English (United Kingdom)', 2, '(n != 1)'), 
 54  'en_ZA': (u'English (South Africa)', 2, '(n != 1)'), 
 55  'eo': (u'Esperanto', 2, '(n != 1)'), 
 56  'es': (u'Spanish; Castilian', 2, '(n != 1)'), 
 57  'et': (u'Estonian', 2, '(n != 1)'), 
 58  'eu': (u'Basque', 2, '(n != 1)'), 
 59  'fa': (u'Persian', 1, '0'), 
 60  'fi': (u'Finnish', 2, '(n != 1)'), 
 61  'fil': (u'Filipino; Pilipino', 2, '(n > 1)'), 
 62  'fo': (u'Faroese', 2, '(n != 1)'), 
 63  'fr': (u'French', 2, '(n > 1)'), 
 64  'fur': (u'Friulian', 2, '(n != 1)'), 
 65  'fy': (u'Frisian', 2, '(n != 1)'), 
 66  'ga': (u'Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'), 
 67  'gl': (u'Galician', 2, '(n != 1)'), 
 68  'gu': (u'Gujarati', 2, '(n != 1)'), 
 69  'gun': (u'Gun', 2, '(n > 1)'), 
 70  'ha': (u'Hausa', 2, '(n != 1)'), 
 71  'he': (u'Hebrew', 2, '(n != 1)'), 
 72  'hi': (u'Hindi', 2, '(n != 1)'), 
 73  'hy': (u'Armenian', 1, '0'), 
 74  'hr': (u'Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
 75  'hu': (u'Hungarian', 2, '(n != 1)'), 
 76  'id': (u'Indonesian', 1, '0'), 
 77  'is': (u'Icelandic', 2, '(n != 1)'), 
 78  'it': (u'Italian', 2, '(n != 1)'), 
 79  'ja': (u'Japanese', 1, '0'), 
 80  'jv': (u'Javanese', 2, '(n != 1)'), 
 81  'ka': (u'Georgian', 1, '0'), 
 82  'km': (u'Central Khmer', 1, '0'), 
 83  'kn': (u'Kannada', 2, '(n != 1)'), 
 84  'ko': (u'Korean', 1, '0'), 
 85  'ku': (u'Kurdish', 2, '(n != 1)'), 
 86  'kw': (u'Cornish', 4, '(n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3'), 
 87  'ky': (u'Kirghiz; Kyrgyz', 1, '0'), 
 88  'lb': (u'Luxembourgish; Letzeburgesch', 2, '(n != 1)'), 
 89  'ln': (u'Lingala', 2, '(n > 1)'), 
 90  'lo': (u'Lao', 1, '0'), 
 91  'lt': (u'Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
 92  'lv': (u'Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'), 
 93  'mg': (u'Malagasy', 2, '(n > 1)'), 
 94  'mi': (u'Maori', 2, '(n > 1)'), 
 95  'mk': (u'Macedonian', 2, 'n==1 || n%10==1 ? 0 : 1'), 
 96  'ml': (u'Malayalam', 2, '(n != 1)'), 
 97  'mn': (u'Mongolian', 2, '(n != 1)'), 
 98  'mr': (u'Marathi', 2, '(n != 1)'), 
 99  'ms': (u'Malay', 1, '0'), 
100  'mt': (u'Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'), 
101  'nah': (u'Nahuatl languages', 2, '(n != 1)'), 
102  'nap': (u'Neapolitan', 2, '(n != 1)'), 
103  'nb': (u'Bokmål, Norwegian; Norwegian Bokmål', 2, '(n != 1)'), 
104  'ne': (u'Nepali', 2, '(n != 1)'), 
105  'nl': (u'Dutch; Flemish', 2, '(n != 1)'), 
106  'nn': (u'Norwegian Nynorsk; Nynorsk, Norwegian', 2, '(n != 1)'), 
107  'nso': (u'Pedi; Sepedi; Northern Sotho', 2, '(n > 1)'), 
108  'oc': (u'Occitan (post 1500)', 2, '(n > 1)'), 
109  'or': (u'Oriya', 2, '(n != 1)'), 
110  'pa': (u'Panjabi; Punjabi', 2, '(n != 1)'), 
111  'pap': (u'Papiamento', 2, '(n != 1)'), 
112  'pl': (u'Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
113  'pms': (u'Piemontese', 2, '(n != 1)'), 
114  'ps': (u'Pushto; Pashto', 2, '(n != 1)'), 
115  'pt': (u'Portuguese', 2, '(n != 1)'), 
116  'pt_BR': (u'Portuguese (Brazil)', 2, '(n > 1)'), 
117  'ro': (u'Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'), 
118  'ru': (u'Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
119  'sco': (u'Scots', 2, '(n != 1)'), 
120  'sk': (u'Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'), 
121  'sl': (u'Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'), 
122  'so': (u'Somali', 2, '(n != 1)'), 
123  'sq': (u'Albanian', 2, '(n != 1)'), 
124  'sr': (u'Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
125  'st': (u'Sotho, Southern', 2, '(n != 1)'), 
126  'su': (u'Sundanese', 1, '0'), 
127  'sv': (u'Swedish', 2, '(n != 1)'), 
128  'sw': (u'Swahili', 2, '(n != 1)'), 
129  'ta': (u'Tamil', 2, '(n != 1)'), 
130  'te': (u'Telugu', 2, '(n != 1)'), 
131  'tg': (u'Tajik', 2, '(n != 1)'), 
132  'ti': (u'Tigrinya', 2, '(n > 1)'), 
133  'th': (u'Thai', 1, '0'), 
134  'tk': (u'Turkmen', 2, '(n != 1)'), 
135  'tr': (u'Turkish', 1, '0'), 
136  'uk': (u'Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'), 
137  'vi': (u'Vietnamese', 1, '0'), 
138  'wa': (u'Walloon', 2, '(n > 1)'), 
139  # Chinese is difficult because the main divide is on script, not really  
140  # country. Simplified Chinese is used mostly in China, Singapore and Malaysia. 
141  # Traditional Chinese is used mostly in Hong Kong, Taiwan and Macau. 
142  'zh_CN': (u'Chinese (China)', 1, '0'), 
143  'zh_HK': (u'Chinese (Hong Kong)', 1, '0'), 
144  'zh_TW': (u'Chinese (Taiwan)', 1, '0'), 
145  'zu': (u'Zulu', 2, '(n != 1)'), 
146  } 
147  """Dictionary of language data. 
148  The language code is the dictionary key (which may contain country codes and modifiers). 
149  The value is a tuple: (Full name in English from iso-codes, nplurals, plural equation). 
150   
151  Note that the English names should not be used in user facing places - it 
152  should always be passed through the function returned from tr_lang(), or at 
153  least passed through _fix_language_name().""" 
154   
155  _fixed_names = { 
156          u"Asturian; Bable; Leonese; Asturleonese": u"Asturian", 
157          u"Bokmål, Norwegian; Norwegian Bokmål": u"Norwegian Bokmål", 
158          u"Catalan; Valencian": u"Catalan", 
159          u"Central Khmer": u"Khmer", 
160          u"Chichewa; Chewa; Nyanja": u"Chewa; Nyanja", 
161          u"Divehi; Dhivehi; Maldivian": u"Divehi", 
162          u"Dutch; Flemish": u"Dutch", 
163          u"Filipino; Pilipino": u"Filipino", 
164          u"Greek, Modern (1453-)": u"Greek", 
165          u"Kirghiz; Kyrgyz": u"Kirghiz", 
166          u"Klingon; tlhIngan-Hol": u"Klingon", 
167          u"Limburgan; Limburger; Limburgish": u"Limburgish", 
168          u"Low German; Low Saxon; German, Low; Saxon, Low": u"Low German", 
169          u"Luxembourgish; Letzeburgesch": u"Luxembourgish", 
170          u"Ndebele, South; South Ndebele": u"Southern Ndebele", 
171          u"Norwegian Nynorsk; Nynorsk, Norwegian": u"Norwegian Nynorsk", 
172          u"Occitan (post 1500)": u"Occitan", 
173          u"Panjabi; Punjabi": u"Punjabi", 
174          u"Pedi; Sepedi; Northern Sotho": u"Northern Sotho", 
175          u"Pushto; Pashto": u"Pashto", 
176          u"Sinhala; Sinhalese": u"Sinhala", 
177          u"Sotho, Southern": u"Sotho", 
178          u"Spanish; Castilian": u"Spanish", 
179          u"Uighur; Uyghur": u"Uighur", 
180  } 
181   
182 -def simplercode(code):
183 """This attempts to simplify the given language code by ignoring country 184 codes, for example. 185 186 @see: 187 - U{http://www.rfc-editor.org/rfc/bcp/bcp47.txt} 188 - U{http://www.rfc-editor.org/rfc/rfc4646.txt} 189 - U{http://www.rfc-editor.org/rfc/rfc4647.txt} 190 - U{http://www.w3.org/International/articles/language-tags/} 191 """ 192 if not code: 193 return code 194 195 normalized = normalize_code(code) 196 separator = normalized.rfind('-') 197 if separator >= 0: 198 return code[:separator] 199 else: 200 return ""
201 202 203 expansion_factors = { 204 'af': 0.1, 205 'ar': -0.09, 206 'es': 0.21, 207 'fr': 0.28, 208 'it': 0.2, 209 } 210 """Source to target string length expansion factors.""" 211 212 import gettext 213 import locale 214 import re 215 import os 216 217 iso639 = {} 218 """ISO 639 language codes""" 219 iso3166 = {} 220 """ISO 3166 country codes""" 221 222 langcode_re = re.compile("^[a-z]{2,3}([_-][A-Z]{2,3}|)(@[a-zA-Z0-9]+|)$") 223 variant_re = re.compile("^[_-][A-Z]{2,3}(@[a-zA-Z0-9]+|)$") 224
225 -def languagematch(languagecode, otherlanguagecode):
226 """matches a languagecode to another, ignoring regions in the second""" 227 if languagecode is None: 228 return langcode_re.match(otherlanguagecode) 229 return languagecode == otherlanguagecode or \ 230 (otherlanguagecode.startswith(languagecode) and variant_re.match(otherlanguagecode[len(languagecode):]))
231 232 dialect_name_re = re.compile(r"(.+)\s\(([^)\d]+)\)$") 233
234 -def tr_lang(langcode=None):
235 """Gives a function that can translate a language name, even in the form C{"language (country)"}, 236 into the language with iso code langcode, or the system language if no language is specified.""" 237 langfunc = gettext_lang(langcode) 238 countryfunc = gettext_country(langcode) 239 240 def handlelanguage(name): 241 match = dialect_name_re.match(name) 242 if match: 243 language, country = match.groups() 244 return u"%s (%s)" % (_fix_language_name(langfunc(language)), countryfunc(country)) 245 else: 246 return _fix_language_name(langfunc(name))
247 248 return handlelanguage 249
250 -def _fix_language_name(name):
251 """Identify and replace some unsightly names present in iso-codes. 252 253 If the name is present in _fixed_names we assume it is untranslated and 254 we replace it with a more usable rendering.""" 255 return _fixed_names.get(name, name)
256
257 -def gettext_lang(langcode=None):
258 """Returns a gettext function to translate language names into the given 259 language, or the system language if no language is specified.""" 260 if not langcode in iso639: 261 if not langcode: 262 langcode = "" 263 if os.name == "nt": 264 # On Windows the default locale is not used for some reason 265 t = gettext.translation('iso_639', languages=[locale.getdefaultlocale()[0]], fallback=True) 266 else: 267 t = gettext.translation('iso_639', fallback=True) 268 else: 269 t = gettext.translation('iso_639', languages=[langcode], fallback=True) 270 iso639[langcode] = t.ugettext 271 return iso639[langcode]
272
273 -def gettext_country(langcode=None):
274 """Returns a gettext function to translate country names into the given 275 language, or the system language if no language is specified.""" 276 if not langcode in iso3166: 277 if not langcode: 278 langcode = "" 279 if os.name == "nt": 280 # On Windows the default locale is not used for some reason 281 t = gettext.translation('iso_3166', languages=[locale.getdefaultlocale()[0]], fallback=True) 282 else: 283 t = gettext.translation('iso_3166', fallback=True) 284 else: 285 t = gettext.translation('iso_3166', languages=[langcode], fallback=True) 286 iso3166[langcode] = t.ugettext 287 return iso3166[langcode]
288
289 -def normalize(string, normal_form="NFC"):
290 """Return a unicode string in its normalized form 291 292 @param string: The string to be normalized 293 @param normal_form: NFC (default), NFD, NFCK, NFDK 294 @return: Normalized string 295 """ 296 if string is None: 297 return None 298 else: 299 return unicodedata.normalize(normal_form, string)
300
301 -def forceunicode(string):
302 """Ensures that the string is in unicode. 303 304 @param string: A text string 305 @type string: Unicode, String 306 @return: String converted to Unicode and normalized as needed. 307 @rtype: Unicode 308 """ 309 if string is None: 310 return None 311 if isinstance(string, str): 312 encoding = getattr(string, "encoding", "utf-8") 313 string = string.decode(encoding) 314 elif isinstance(string, StringElem): 315 string = unicode(string) 316 return string
317
318 -def normalized_unicode(string):
319 """Forces the string to unicode and does normalization.""" 320 return normalize(forceunicode(string))
321
322 -def normalize_code(code):
323 return code.replace("_", "-").replace("@", "-").lower()
324
325 -def simplify_to_common(language_code, languages=languages):
326 """Simplify language code to the most commonly used form for the 327 language, stripping country information for languages that tend 328 not to be localized differently for different countries""" 329 simpler = simplercode(language_code) 330 if normalize_code(language_code) in [normalize_code(key) for key in languages.keys()] or simpler == "": 331 return language_code 332 else: 333 return simplify_to_common(simpler)
334