Home | Trees | Indices | Help |
|
---|
|
1 #!/usr/bin/env python 2 # -*- coding: utf-8 -*- 3 # 4 # Copyright 2009 Zuza Software Foundation 5 # 6 # This file is part of the Translate Toolkit. 7 # 8 # This program is free software; you can redistribute it and/or modify 9 # it under the terms of the GNU General Public License as published by 10 # the Free Software Foundation; either version 2 of the License, or 11 # (at your option) any later version. 12 # 13 # This program is distributed in the hope that it will be useful, 14 # but WITHOUT ANY WARRANTY; without even the implied warranty of 15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 # GNU General Public License for more details. 17 # 18 # You should have received a copy of the GNU General Public License 19 # along with this program; if not, see <http://www.gnu.org/licenses/>. 20 21 """ 22 Contains general placeable implementations. That is placeables that does not 23 fit into any other sub-category. 24 """ 25 26 import re 27 28 __all__ = ['AltAttrPlaceable', 'XMLEntityPlaceable', 'XMLTagPlaceable', 'parsers', 'to_general_placeables'] 29 30 from translate.storage.placeables.base import G, Ph, StringElem 31 3234 """A parser method to extract placeables from a string based on a regular 35 expression. Use this function as the C{@parse()} method of a placeable 36 class.""" 37 if cls.regex is None: 38 return None 39 matches = [] 40 oldend = 0 41 for match in cls.regex.finditer(pstr): 42 start, end = match.start(), match.end() 43 if oldend != start: 44 matches.append(StringElem(pstr[oldend:start])) 45 matches.append(cls([pstr[start:end]])) 46 oldend = end 47 if oldend != len(pstr) and matches: 48 matches.append(StringElem(pstr[oldend:])) 49 return matches or None50 5153 """Placeable for the "alt=..." attributes inside XML tags.""" 54 55 regex = re.compile(r'alt=".*?"') 56 parse = classmethod(regex_parse)57 5860 """Matches new-lines.""" 61 62 iseditable = False 63 isfragile = True 64 istranslatable = False 65 regex = re.compile(r'\n') 66 parse = classmethod(regex_parse)67 6870 """Placeable for numbers.""" 71 72 istranslatable = False 73 regex = re.compile(ur"[-+]?[0-9]+([\u00a0.,][0-9]+)*") 74 parse = classmethod(regex_parse)75 7678 """Placeable representing a Qt string formatting variable. 79 80 Implemented following Qt documentation on 81 U{QString::arg<http://doc.trolltech.com/4.5/qstring.html#arg>} where 82 the placeables are refered to as 'place markers' 83 84 Notes: 85 - Place markers can be reordered 86 - Place markers may be repeated 87 - 'L' use a localised representation e.g. in a number 88 - %% some in the wild to escape real %, not documented (not in regex) 89 """ 90 iseditable = False 91 istranslatable = False 92 regex = re.compile(r"""(?x) 93 % # Start of a place marker 94 L? # The sequence is replaced with a localized representation (optional) 95 [1-9]\d{0,1} # Place marker numbers must be in the range 1 to 99. 96 (?=([^\d]|$)) # Double check that we aren't matching %100+ (non consuming match) 97 """) 98 parse = classmethod(regex_parse)99 100102 """Placeable representing a Python string formatting variable. 103 104 Implemented following Python documentation on 105 U{String Formatting Operations<http://docs.python.org/library/stdtypes.html#string-formatting-operations>}""" 106 107 iseditable = False 108 istranslatable = False 109 # Need to correctly define a python identifier. 110 regex = re.compile(r"""(?x) 111 % # Start of formatting specifier 112 (%| # No argument converted %% creates a % 113 (\([a-z_]+\)){0,1} # Mapping key value (optional) 114 [\-\+0\s\#]{0,1} # Conversion flags (optional) 115 (\d+|\*){0,1} # Minimum field width (optional) 116 (\.(\d+|\*)){0,1} # Precision (optional) 117 [hlL]{0,1} # Length modifier (optional) 118 [diouxXeEfFgGcrs]{1}) # Conversion type""") 119 parse = classmethod(regex_parse)120 121123 """Placeable representing a Java MessageFormat formatting variable. 124 125 Implemented according to the Java U{MessageFormat 126 documentation<http://java.sun.com/j2se/1.4.2/docs/api/java/text/MessageFormat.html>}. 127 128 Information about custom formats: 129 - number - U{DecimalFormat<http://java.sun.com/j2se/1.4.2/docs/api/java/text/DecimalFormat.html>} 130 - date/time - U{SimpleDateFormat<http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat.html>} 131 - choice - U{ChoiceFormat<http://java.sun.com/j2se/1.4.2/docs/api/java/text/ChoiceFormat.html>} 132 """ 133 134 iseditable = False # TODO: Technically incorrect as you need to change 135 istranslatable = False 136 # things in a choice entry 137 regex = re.compile(r"""(?x) 138 { # Start of MessageFormat 139 [0-9]+ # Number, positive array reference 140 (,\s* # FormatType (optional) one of number,date,time,choice 141 (number(,\s*(integer|currency|percent|[-0#.,E;%\u2030\u00a4']+)?)?| # number FormatStyle (optional) 142 (date|time)(,\s*(short|medium|long|full|.+?))?| # date/time FormatStyle (optional) 143 choice,([^{]+({.+})?)+)? # choice with format, format required 144 )? # END: (optional) FormatType 145 } # END: MessageFormat""") 146 parse = classmethod(regex_parse)147 148150 """Placeable representing string formatting variables.""" 151 #For more information, see man 3 printf 152 #We probably don't want to support absolutely everything 153 154 iseditable = False 155 istranslatable = False 156 regex = re.compile(r""" 157 % # introduction 158 (\d+\$)? # selection of non-next variable (reordering) 159 [\-\+0 \#'I]? # optional flag 160 ((\d+)|[*])? # field width 161 (\.\d+)? # precision 162 [hlI]? # length 163 [cCdiouxXeEfgGnpsS] # conversion specifier 164 """, re.VERBOSE) 165 parse = classmethod(regex_parse)166 167169 """Placeable handling URI.""" 170 171 istranslatable = False 172 regex = re.compile(r""" 173 ((((news|nttp|file|https?|ftp|irc)://) # has to start with a protocol 174 |((www|ftp)[-A-Za-z0-9]*\.)) # or www... or ftp... hostname 175 ([-A-Za-z0-9]+(\.[-A-Za-z0-9]+)*) # hostname 176 |(\d{1,3}(\.\d{1,3}){3,3})) # or IP address 177 (:[0-9]{1,5})? # optional port 178 (/[-A-Za-z0-9_\$\.\+\!\*\(\),;:@&=\?/~\#\%]*)? # optional trailing path 179 (?=$|\s|([]'}>\),\"])) 180 """, re.VERBOSE) 181 parse = classmethod(regex_parse)182 183185 """Placeable handling file locations.""" 186 187 istranslatable = False 188 regex = re.compile(r"(~/|/|\./)([-A-Za-z0-9_\$\.\+\!\*\(\),;:@&=\?/~\#\%]|\\){3,}") 189 #TODO: Handle Windows drive letters. Some common Windows paths won't be 190 # handled correctly while note allowing spaces, such as 191 # "C:\Documents and Settings" 192 # "C:\Program Files" 193 parse = classmethod(regex_parse)194 195197 """Placeable handling emails.""" 198 199 istranslatable = False 200 regex = re.compile(r"((mailto:)|)[A-Za-z0-9]+[-a-zA-Z0-9._%]*@(([-A-Za-z0-9]+)\.)+[a-zA-Z]{2,4}") 201 # TODO: What about internationalised domain names? ;-) 202 parse = classmethod(regex_parse)203 204206 """Placeable handling punctuation.""" 207 208 iseditable = False 209 istranslatable = False 210 # FIXME this should really be a list created as being the inverse of what 211 # is available on the translators keyboard. Or easily expanded by their 212 # configuration. 213 regex = re.compile(ur'''([™©®]| # Marks 214 [℃℉°]| # Degree related 215 [±πθ×÷−√∞∆Σ′″]| # Maths 216 [‘’ʼ‚‛“”„‟]| # Quote characters 217 [£¥]| # Currencies 218 …| # U2026 - horizontal ellipsis 219 —| # U2014 - em dash 220 –| # U2013 - en dash 221 [ ] # U202F - narrow no-break space 222 )+''', re.VERBOSE) 223 parse = classmethod(regex_parse)224 225227 """Placeable handling XML entities (C{&xxxxx;}-style entities).""" 228 229 iseditable = False 230 istranslatable = False 231 regex = re.compile(r'''&( 232 ([a-zA-Z][a-zA-Z0-9\.-]*) #named entity 233 |([#](\d{1,5}|x[a-fA-F0-9]{1,5})+) #numeric entity 234 );''', re.VERBOSE) 235 parse = classmethod(regex_parse)236 237239 """Placeable handling long all-caps strings.""" 240 241 iseditable = True 242 regex = re.compile(r'\b[A-Z][A-Z_/\-:*0-9]{2,}\b[+]?') 243 parse = classmethod(regex_parse)244 245247 """Placeable handling camel case strings.""" 248 249 iseditable = True 250 regex = re.compile(r'''(?x) 251 \b( 252 [a-z]+[A-Z]| #Not that strict if we start with lower (iPod) 253 [A-Z]+[a-z]+[A-Z]| #One capital at the start is not enough (OpenTran) 254 [A-Z]{2,}[a-z] #Two capitals at the start is enough (KBabel) 255 )[a-zA-Z0-9]* #Let's allow any final lower/upper/digit 256 \b''') 257 parse = classmethod(regex_parse)258 259261 """Placeable handling unusual spaces in strings.""" 262 263 iseditable = True 264 istranslatable = False 265 regex = re.compile(r"""(?m) #Multiline expression 266 [ ]{2,}| #More than two consecutive 267 ^[ ]+| #At start of a line 268 [ ]+$ #At end of line""", re.VERBOSE) 269 270 parse = classmethod(regex_parse)271 272274 """Placeable handling XML tags.""" 275 276 iseditable = True 277 istranslatable = False 278 regex = re.compile(r'<([\w:]+)(\s([\w:]+=".*?")?)*/?>|</(\w+)>') 279 parse = classmethod(regex_parse)280 281283 """Placeble handling command line options e.g. --help""" 284 285 istranslatable = False 286 regex = re.compile(r'''(?x) 287 \B( # Empty string at the start of a non-word, ensures [space]- 288 -[a-zA-Z]| # Single letter options: -i, -I 289 --[a-z\-]+ # Word options: --help 290 )\b''') 291 #regex = re.compile(r'''(-[a-zA-Z]|--[-a-z]+)\b''') 292 parse = classmethod(regex_parse)293 294295 -def to_general_placeables(tree, classmap={ 296 G: (AltAttrPlaceable,), 297 Ph: ( 298 NumberPlaceable, 299 XMLEntityPlaceable, 300 XMLTagPlaceable, 301 UrlPlaceable, 302 FilePlaceable, 303 EmailPlaceable, 304 OptionPlaceable, 305 PunctuationPlaceable, 306 ) 307 }):308 if not isinstance(tree, StringElem): 309 return tree 310 311 newtree = None 312 313 for baseclass, gclasslist in classmap.items(): 314 if isinstance(tree, baseclass): 315 gclass = [c for c in gclasslist if c.parse(unicode(tree))] 316 if gclass: 317 newtree = gclass[0]() 318 319 if newtree is None: 320 newtree = tree.__class__() 321 322 newtree.id = tree.id 323 newtree.rid = tree.rid 324 newtree.xid = tree.xid 325 newtree.sub = [] 326 327 for subtree in tree.sub: 328 newtree.sub.append(to_general_placeables(subtree)) 329 330 return newtree331 332 # The order of these parsers are very important 333 parsers = [ 334 NewlinePlaceable.parse, 335 XMLTagPlaceable.parse, 336 AltAttrPlaceable.parse, 337 XMLEntityPlaceable.parse, 338 PythonFormattingPlaceable.parse, 339 JavaMessageFormatPlaceable.parse, 340 FormattingPlaceable.parse, 341 # The Qt variables can consume the %1 in %1$s which will mask a printf 342 # placeable, so it has to come later. 343 QtFormattingPlaceable.parse, 344 UrlPlaceable.parse, 345 FilePlaceable.parse, 346 EmailPlaceable.parse, 347 CapsPlaceable.parse, 348 CamelCasePlaceable.parse, 349 OptionPlaceable.parse, 350 PunctuationPlaceable.parse, 351 NumberPlaceable.parse, 352 ] 353
Home | Trees | Indices | Help |
|
---|
Generated by Epydoc 3.0.1 on Wed Mar 3 16:38:40 2010 | http://epydoc.sourceforge.net |