1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """This module stores information and functionality that relates to plurals."""
23
24 import unicodedata
25
26 from translate.storage.placeables import StringElem
27
28
29 languages = {
30 'af': (u'Afrikaans', 2, '(n != 1)'),
31 'ak': (u'Akan', 2, 'n > 1'),
32 'am': (u'Amharic', 2, 'n > 1'),
33 'ar': (u'Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 ? 4 : 5'),
34 'arn': (u'Mapudungun; Mapuche', 2, 'n > 1'),
35 'ast': (u'Asturian; Bable; Leonese; Asturleonese', 2, 'n != 1'),
36 'az': (u'Azerbaijani', 2, '(n != 1)'),
37 'be': (u'Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
38 'bg': (u'Bulgarian', 2, '(n != 1)'),
39 'bn': (u'Bengali', 2, '(n != 1)'),
40 'bn_IN': (u'Bengali (India)', 2, '(n != 1)'),
41 'bo': (u'Tibetan', 1, '0'),
42 'br': (u'Breton', 2, 'n > 1'),
43 'bs': (u'Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
44 'ca': (u'Catalan; Valencian', 2, '(n != 1)'),
45 'cs': (u'Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
46 'csb': (u'Kashubian', 3, 'n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
47 'cy': (u'Welsh', 2, '(n==2) ? 1 : 0'),
48 'da': (u'Danish', 2, '(n != 1)'),
49 'de': (u'German', 2, '(n != 1)'),
50 'dz': (u'Dzongkha', 1, '0'),
51 'el': (u'Greek, Modern (1453-)', 2, '(n != 1)'),
52 'en': (u'English', 2, '(n != 1)'),
53 'en_GB': (u'English (United Kingdom)', 2, '(n != 1)'),
54 'en_ZA': (u'English (South Africa)', 2, '(n != 1)'),
55 'eo': (u'Esperanto', 2, '(n != 1)'),
56 'es': (u'Spanish; Castilian', 2, '(n != 1)'),
57 'et': (u'Estonian', 2, '(n != 1)'),
58 'eu': (u'Basque', 2, '(n != 1)'),
59 'fa': (u'Persian', 1, '0'),
60 'fi': (u'Finnish', 2, '(n != 1)'),
61 'fil': (u'Filipino; Pilipino', 2, '(n > 1)'),
62 'fo': (u'Faroese', 2, '(n != 1)'),
63 'fr': (u'French', 2, '(n > 1)'),
64 'fur': (u'Friulian', 2, '(n != 1)'),
65 'fy': (u'Frisian', 2, '(n != 1)'),
66 'ga': (u'Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'),
67 'gl': (u'Galician', 2, '(n != 1)'),
68 'gu': (u'Gujarati', 2, '(n != 1)'),
69 'gun': (u'Gun', 2, '(n > 1)'),
70 'ha': (u'Hausa', 2, '(n != 1)'),
71 'he': (u'Hebrew', 2, '(n != 1)'),
72 'hi': (u'Hindi', 2, '(n != 1)'),
73 'hy': (u'Armenian', 1, '0'),
74 'hr': (u'Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
75 'hu': (u'Hungarian', 2, '(n != 1)'),
76 'id': (u'Indonesian', 1, '0'),
77 'is': (u'Icelandic', 2, '(n != 1)'),
78 'it': (u'Italian', 2, '(n != 1)'),
79 'ja': (u'Japanese', 1, '0'),
80 'jv': (u'Javanese', 2, '(n != 1)'),
81 'ka': (u'Georgian', 1, '0'),
82 'km': (u'Central Khmer', 1, '0'),
83 'kn': (u'Kannada', 2, '(n != 1)'),
84 'ko': (u'Korean', 1, '0'),
85 'ku': (u'Kurdish', 2, '(n != 1)'),
86 'kw': (u'Cornish', 4, '(n==1) ? 0 : (n==2) ? 1 : (n == 3) ? 2 : 3'),
87 'ky': (u'Kirghiz; Kyrgyz', 1, '0'),
88 'lb': (u'Luxembourgish; Letzeburgesch', 2, '(n != 1)'),
89 'ln': (u'Lingala', 2, '(n > 1)'),
90 'lo': (u'Lao', 1, '0'),
91 'lt': (u'Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'),
92 'lv': (u'Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'),
93 'mg': (u'Malagasy', 2, '(n > 1)'),
94 'mi': (u'Maori', 2, '(n > 1)'),
95 'mk': (u'Macedonian', 2, 'n==1 || n%10==1 ? 0 : 1'),
96 'ml': (u'Malayalam', 2, '(n != 1)'),
97 'mn': (u'Mongolian', 2, '(n != 1)'),
98 'mr': (u'Marathi', 2, '(n != 1)'),
99 'ms': (u'Malay', 1, '0'),
100 'mt': (u'Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'),
101 'nah': (u'Nahuatl languages', 2, '(n != 1)'),
102 'nap': (u'Neapolitan', 2, '(n != 1)'),
103 'nb': (u'Bokmål, Norwegian; Norwegian Bokmål', 2, '(n != 1)'),
104 'ne': (u'Nepali', 2, '(n != 1)'),
105 'nl': (u'Dutch; Flemish', 2, '(n != 1)'),
106 'nn': (u'Norwegian Nynorsk; Nynorsk, Norwegian', 2, '(n != 1)'),
107 'nso': (u'Pedi; Sepedi; Northern Sotho', 2, '(n > 1)'),
108 'oc': (u'Occitan (post 1500)', 2, '(n > 1)'),
109 'or': (u'Oriya', 2, '(n != 1)'),
110 'pa': (u'Panjabi; Punjabi', 2, '(n != 1)'),
111 'pap': (u'Papiamento', 2, '(n != 1)'),
112 'pl': (u'Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
113 'pms': (u'Piemontese', 2, '(n != 1)'),
114 'ps': (u'Pushto; Pashto', 2, '(n != 1)'),
115 'pt': (u'Portuguese', 2, '(n != 1)'),
116 'pt_BR': (u'Portuguese (Brazil)', 2, '(n > 1)'),
117 'ro': (u'Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'),
118 'ru': (u'Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
119 'sco': (u'Scots', 2, '(n != 1)'),
120 'sk': (u'Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
121 'sl': (u'Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'),
122 'so': (u'Somali', 2, '(n != 1)'),
123 'sq': (u'Albanian', 2, '(n != 1)'),
124 'sr': (u'Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
125 'st': (u'Sotho, Southern', 2, '(n != 1)'),
126 'su': (u'Sundanese', 1, '0'),
127 'sv': (u'Swedish', 2, '(n != 1)'),
128 'sw': (u'Swahili', 2, '(n != 1)'),
129 'ta': (u'Tamil', 2, '(n != 1)'),
130 'te': (u'Telugu', 2, '(n != 1)'),
131 'tg': (u'Tajik', 2, '(n != 1)'),
132 'ti': (u'Tigrinya', 2, '(n > 1)'),
133 'th': (u'Thai', 1, '0'),
134 'tk': (u'Turkmen', 2, '(n != 1)'),
135 'tr': (u'Turkish', 1, '0'),
136 'uk': (u'Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
137 'vi': (u'Vietnamese', 1, '0'),
138 'wa': (u'Walloon', 2, '(n > 1)'),
139
140
141
142 'zh_CN': (u'Chinese (China)', 1, '0'),
143 'zh_HK': (u'Chinese (Hong Kong)', 1, '0'),
144 'zh_TW': (u'Chinese (Taiwan)', 1, '0'),
145 'zu': (u'Zulu', 2, '(n != 1)'),
146 }
147 """Dictionary of language data.
148 The language code is the dictionary key (which may contain country codes and modifiers).
149 The value is a tuple: (Full name in English from iso-codes, nplurals, plural equation).
150
151 Note that the English names should not be used in user facing places - it
152 should always be passed through the function returned from tr_lang(), or at
153 least passed through _fix_language_name()."""
154
155 _fixed_names = {
156 u"Asturian; Bable; Leonese; Asturleonese": u"Asturian",
157 u"Bokmål, Norwegian; Norwegian Bokmål": u"Norwegian Bokmål",
158 u"Catalan; Valencian": u"Catalan",
159 u"Central Khmer": u"Khmer",
160 u"Chichewa; Chewa; Nyanja": u"Chewa; Nyanja",
161 u"Divehi; Dhivehi; Maldivian": u"Divehi",
162 u"Dutch; Flemish": u"Dutch",
163 u"Filipino; Pilipino": u"Filipino",
164 u"Greek, Modern (1453-)": u"Greek",
165 u"Kirghiz; Kyrgyz": u"Kirghiz",
166 u"Klingon; tlhIngan-Hol": u"Klingon",
167 u"Limburgan; Limburger; Limburgish": u"Limburgish",
168 u"Low German; Low Saxon; German, Low; Saxon, Low": u"Low German",
169 u"Luxembourgish; Letzeburgesch": u"Luxembourgish",
170 u"Ndebele, South; South Ndebele": u"Southern Ndebele",
171 u"Norwegian Nynorsk; Nynorsk, Norwegian": u"Norwegian Nynorsk",
172 u"Occitan (post 1500)": u"Occitan",
173 u"Panjabi; Punjabi": u"Punjabi",
174 u"Pedi; Sepedi; Northern Sotho": u"Northern Sotho",
175 u"Pushto; Pashto": u"Pashto",
176 u"Sinhala; Sinhalese": u"Sinhala",
177 u"Sotho, Southern": u"Sotho",
178 u"Spanish; Castilian": u"Spanish",
179 u"Uighur; Uyghur": u"Uighur",
180 }
181
183 """This attempts to simplify the given language code by ignoring country
184 codes, for example.
185
186 @see:
187 - U{http://www.rfc-editor.org/rfc/bcp/bcp47.txt}
188 - U{http://www.rfc-editor.org/rfc/rfc4646.txt}
189 - U{http://www.rfc-editor.org/rfc/rfc4647.txt}
190 - U{http://www.w3.org/International/articles/language-tags/}
191 """
192 if not code:
193 return code
194
195 normalized = normalize_code(code)
196 separator = normalized.rfind('-')
197 if separator >= 0:
198 return code[:separator]
199 else:
200 return ""
201
202
203 expansion_factors = {
204 'af': 0.1,
205 'ar': -0.09,
206 'es': 0.21,
207 'fr': 0.28,
208 'it': 0.2,
209 }
210 """Source to target string length expansion factors."""
211
212 import gettext
213 import locale
214 import re
215 import os
216
217 iso639 = {}
218 """ISO 639 language codes"""
219 iso3166 = {}
220 """ISO 3166 country codes"""
221
222 langcode_re = re.compile("^[a-z]{2,3}([_-][A-Z]{2,3}|)(@[a-zA-Z0-9]+|)$")
223 variant_re = re.compile("^[_-][A-Z]{2,3}(@[a-zA-Z0-9]+|)$")
224
226 """matches a languagecode to another, ignoring regions in the second"""
227 if languagecode is None:
228 return langcode_re.match(otherlanguagecode)
229 return languagecode == otherlanguagecode or \
230 (otherlanguagecode.startswith(languagecode) and variant_re.match(otherlanguagecode[len(languagecode):]))
231
232 dialect_name_re = re.compile(r"(.+)\s\(([^)\d]+)\)$")
233
235 """Gives a function that can translate a language name, even in the form C{"language (country)"},
236 into the language with iso code langcode, or the system language if no language is specified."""
237 langfunc = gettext_lang(langcode)
238 countryfunc = gettext_country(langcode)
239
240 def handlelanguage(name):
241 match = dialect_name_re.match(name)
242 if match:
243 language, country = match.groups()
244 return u"%s (%s)" % (_fix_language_name(langfunc(language)), countryfunc(country))
245 else:
246 return _fix_language_name(langfunc(name))
247
248 return handlelanguage
249
251 """Identify and replace some unsightly names present in iso-codes.
252
253 If the name is present in _fixed_names we assume it is untranslated and
254 we replace it with a more usable rendering."""
255 return _fixed_names.get(name, name)
256
257 -def gettext_lang(langcode=None):
258 """Returns a gettext function to translate language names into the given
259 language, or the system language if no language is specified."""
260 if not langcode in iso639:
261 if not langcode:
262 langcode = ""
263 if os.name == "nt":
264
265 t = gettext.translation('iso_639', languages=[locale.getdefaultlocale()[0]], fallback=True)
266 else:
267 t = gettext.translation('iso_639', fallback=True)
268 else:
269 t = gettext.translation('iso_639', languages=[langcode], fallback=True)
270 iso639[langcode] = t.ugettext
271 return iso639[langcode]
272
273 -def gettext_country(langcode=None):
274 """Returns a gettext function to translate country names into the given
275 language, or the system language if no language is specified."""
276 if not langcode in iso3166:
277 if not langcode:
278 langcode = ""
279 if os.name == "nt":
280
281 t = gettext.translation('iso_3166', languages=[locale.getdefaultlocale()[0]], fallback=True)
282 else:
283 t = gettext.translation('iso_3166', fallback=True)
284 else:
285 t = gettext.translation('iso_3166', languages=[langcode], fallback=True)
286 iso3166[langcode] = t.ugettext
287 return iso3166[langcode]
288
290 """Return a unicode string in its normalized form
291
292 @param string: The string to be normalized
293 @param normal_form: NFC (default), NFD, NFCK, NFDK
294 @return: Normalized string
295 """
296 if string is None:
297 return None
298 else:
299 return unicodedata.normalize(normal_form, string)
300
302 """Ensures that the string is in unicode.
303
304 @param string: A text string
305 @type string: Unicode, String
306 @return: String converted to Unicode and normalized as needed.
307 @rtype: Unicode
308 """
309 if string is None:
310 return None
311 if isinstance(string, str):
312 encoding = getattr(string, "encoding", "utf-8")
313 string = string.decode(encoding)
314 elif isinstance(string, StringElem):
315 string = unicode(string)
316 return string
317
319 """Forces the string to unicode and does normalization."""
320 return normalize(forceunicode(string))
321
324
326 """Simplify language code to the most commonly used form for the
327 language, stripping country information for languages that tend
328 not to be localized differently for different countries"""
329 simpler = simplercode(language_code)
330 if normalize_code(language_code) in [normalize_code(key) for key in languages.keys()] or simpler == "":
331 return language_code
332 else:
333 return simplify_to_common(simpler)
334