Package translate :: Package filters :: Module decoration
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.decoration

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2004-2008 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """functions to get decorative/informative text out of strings...""" 
 23   
 24  import re 
 25  import unicodedata 
 26  from translate.lang import data 
 27   
28 -def spacestart(str1):
29 """returns all the whitespace from the start of the string""" 30 newstring = u"" 31 for c in str1: 32 if c.isspace(): 33 newstring += c 34 else: 35 break 36 return newstring
37
38 -def spaceend(str1):
39 """returns all the whitespace from the end of the string""" 40 newstring = u"" 41 for n in range(len(str1)): 42 c = str1[-1-n] 43 if c.isspace(): 44 newstring = c + newstring 45 else: 46 break 47 return newstring
48
49 -def puncstart(str1, punctuation):
50 """returns all the punctuation from the start of the string""" 51 newstring = u"" 52 for c in str1: 53 if c in punctuation or c.isspace(): 54 newstring += c 55 else: 56 break 57 return newstring
58
59 -def puncend(str1, punctuation):
60 """returns all the punctuation from the end of the string""" 61 # An implementation with regular expressions was slightly slower. 62 63 newstring = u"" 64 for n in range(len(str1)): 65 c = str1[-1-n] 66 if c in punctuation or c.isspace(): 67 newstring = c + newstring 68 else: 69 break 70 return newstring.replace(u"\u00a0", u" ")
71
72 -def ispurepunctuation(str1):
73 """checks whether the string is entirely punctuation""" 74 for c in str1: 75 if c.isalnum(): 76 return False 77 return len(str1)
78
79 -def isvalidaccelerator(accelerator, acceptlist=None):
80 """returns whether the given accelerator character is valid 81 82 @type accelerator: character 83 @param accelerator: A character to be checked for accelerator validity 84 @type acceptlist: String 85 @param acceptlist: A list of characters that are permissible as accelerators 86 @rtype: Boolean 87 @return: True if the supplied character is an acceptable accelerator 88 """ 89 assert isinstance(accelerator, unicode) 90 assert isinstance(acceptlist, unicode) or acceptlist is None 91 if len(accelerator) == 0: 92 return False 93 if acceptlist is not None: 94 acceptlist = data.normalize(acceptlist) 95 if accelerator in acceptlist: 96 return True 97 return False 98 else: 99 # Old code path - ensures that we don't get a large number of regressions 100 accelerator = accelerator.replace("_","") 101 if accelerator in u"-?": 102 return True 103 if not accelerator.isalnum(): 104 return False 105 106 # We don't want to have accelerators on characters with diacritics, so let's 107 # see if the character can decompose. 108 decomposition = unicodedata.decomposition(accelerator) 109 # Next we strip out any extra information like <this> 110 decomposition = re.sub("<[^>]+>", "", decomposition).strip() 111 return decomposition.count(" ") == 0
112
113 -def findaccelerators(str1, accelmarker, acceptlist=None):
114 """returns all the accelerators and locations in str1 marked with a given marker""" 115 accelerators = [] 116 badaccelerators = [] 117 currentpos = 0 118 while currentpos >= 0: 119 currentpos = str1.find(accelmarker, currentpos) 120 if currentpos >= 0: 121 accelstart = currentpos 122 currentpos += len(accelmarker) 123 # we assume accelerators are single characters 124 accelend = currentpos + 1 125 if accelend > len(str1): break 126 accelerator = str1[currentpos:accelend] 127 currentpos = accelend 128 if isvalidaccelerator(accelerator, acceptlist): 129 accelerators.append((accelstart, accelerator)) 130 else: 131 badaccelerators.append((accelstart, accelerator)) 132 return accelerators, badaccelerators
133
134 -def findmarkedvariables(str1, startmarker, endmarker, ignorelist=[]):
135 """returns all the variables and locations in str1 marked with a given marker""" 136 variables = [] 137 currentpos = 0 138 while currentpos >= 0: 139 variable = None 140 currentpos = str1.find(startmarker, currentpos) 141 if currentpos >= 0: 142 startmatch = currentpos 143 currentpos += len(startmarker) 144 if endmarker is None: 145 # handle case without an end marker - use any non-alphanumeric character as the end marker, var must be len > 1 146 endmatch = currentpos 147 for n in range(currentpos, len(str1)): 148 if not (str1[n].isalnum() or str1[n] == '_'): 149 endmatch = n 150 break 151 if currentpos == endmatch: endmatch = len(str1) 152 if currentpos < endmatch: 153 variable = str1[currentpos:endmatch] 154 currentpos = endmatch 155 elif type(endmarker) == int: 156 # setting endmarker to an int means it is a fixed-length variable string (usually endmarker==1) 157 endmatch = currentpos + endmarker 158 if endmatch > len(str1): break 159 variable = str1[currentpos:endmatch] 160 currentpos = endmatch 161 else: 162 endmatch = str1.find(endmarker, currentpos) 163 if endmatch == -1: break 164 # search backwards in case there's an intervening startmarker (if not it's OK)... 165 start2 = str1.rfind(startmarker, currentpos, endmatch) 166 if start2 != -1: 167 startmatch2 = start2 168 start2 += len(startmarker) 169 if start2 != currentpos: 170 currentpos = start2 171 startmatch = startmatch2 172 variable = str1[currentpos:endmatch] 173 currentpos = endmatch + len(endmarker) 174 if variable is not None and variable not in ignorelist: 175 if not variable or variable.replace("_","").replace(".","").isalnum(): 176 variables.append((startmatch, variable)) 177 return variables
178
179 -def getaccelerators(accelmarker, acceptlist=None):
180 """returns a function that gets a list of accelerators marked using accelmarker""" 181 def getmarkedaccelerators(str1): 182 """returns all the accelerators in str1 marked with a given marker""" 183 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist) 184 accelerators = [accelerator for accelstart, accelerator in acclocs] 185 badaccelerators = [accelerator for accelstart, accelerator in badlocs] 186 return accelerators, badaccelerators
187 return getmarkedaccelerators 188
189 -def getvariables(startmarker, endmarker):
190 """returns a function that gets a list of variables marked using startmarker and endmarker""" 191 def getmarkedvariables(str1): 192 """returns all the variables in str1 marked with a given marker""" 193 varlocs = findmarkedvariables(str1, startmarker, endmarker) 194 variables = [variable for accelstart, variable in varlocs] 195 return variables
196 return getmarkedvariables 197
198 -def getnumbers(str1):
199 """returns any numbers that are in the string""" 200 # TODO: handle locale-based periods e.g. 2,5 for Afrikaans 201 assert isinstance(str1, unicode) 202 numbers = [] 203 innumber = False 204 degreesign = u'\xb0' 205 lastnumber = "" 206 carryperiod = "" 207 for chr1 in str1: 208 if chr1.isdigit(): 209 innumber = True 210 elif innumber: 211 if not (chr1 == '.' or chr1 == degreesign): 212 innumber = False 213 if lastnumber: 214 numbers.append(lastnumber) 215 lastnumber = "" 216 if innumber: 217 if chr1 == degreesign: 218 lastnumber += chr1 219 elif chr1 == '.': 220 carryperiod += chr1 221 else: 222 lastnumber += carryperiod + chr1 223 carryperiod = "" 224 else: 225 carryperiod = "" 226 if innumber: 227 if lastnumber: 228 numbers.append(lastnumber) 229 return numbers
230
231 -def getfunctions(str1, punctuation):
232 """returns the functions() that are in a string, while ignoring the trailing 233 punctuation in the given parameter""" 234 punctuation = punctuation.replace("(", "").replace(")", "") 235 return [word.rstrip(punctuation) for word in str1.split() if word.rstrip(punctuation).endswith("()")]
236
237 -def getemails(str1):
238 """returns the email addresses that are in a string""" 239 return re.findall('[\w\.\-]+@[\w\.\-]+', str1)
240
241 -def geturls(str1):
242 """returns the URIs in a string""" 243 URLPAT = 'https?:[\w/\.:;+\-~\%#\$?=&,()]+|www\.[\w/\.:;+\-~\%#\$?=&,()]+|' +\ 244 'ftp:[\w/\.:;+\-~\%#?=&,]+' 245 return re.findall(URLPAT, str1)
246
247 -def countaccelerators(accelmarker, acceptlist=None):
248 """returns a function that counts the number of accelerators marked with the given marker""" 249 def countmarkedaccelerators(str1): 250 """returns all the variables in str1 marked with a given marker""" 251 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist) 252 return len(acclocs), len(badlocs)
253 return countmarkedaccelerators 254