1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """functions to get decorative/informative text out of strings..."""
23
24 import re
25 import unicodedata
26 from translate.lang import data
27
29 """returns all the whitespace from the start of the string"""
30 newstring = u""
31 for c in str1:
32 if c.isspace():
33 newstring += c
34 else:
35 break
36 return newstring
37
39 """returns all the whitespace from the end of the string"""
40 newstring = u""
41 for n in range(len(str1)):
42 c = str1[-1-n]
43 if c.isspace():
44 newstring = c + newstring
45 else:
46 break
47 return newstring
48
50 """returns all the punctuation from the start of the string"""
51 newstring = u""
52 for c in str1:
53 if c in punctuation or c.isspace():
54 newstring += c
55 else:
56 break
57 return newstring
58
60 """returns all the punctuation from the end of the string"""
61
62
63 newstring = u""
64 for n in range(len(str1)):
65 c = str1[-1-n]
66 if c in punctuation or c.isspace():
67 newstring = c + newstring
68 else:
69 break
70 return newstring.replace(u"\u00a0", u" ")
71
73 """checks whether the string is entirely punctuation"""
74 for c in str1:
75 if c.isalnum():
76 return False
77 return len(str1)
78
80 """returns whether the given accelerator character is valid
81
82 @type accelerator: character
83 @param accelerator: A character to be checked for accelerator validity
84 @type acceptlist: String
85 @param acceptlist: A list of characters that are permissible as accelerators
86 @rtype: Boolean
87 @return: True if the supplied character is an acceptable accelerator
88 """
89 assert isinstance(accelerator, unicode)
90 assert isinstance(acceptlist, unicode) or acceptlist is None
91 if len(accelerator) == 0:
92 return False
93 if acceptlist is not None:
94 acceptlist = data.normalize(acceptlist)
95 if accelerator in acceptlist:
96 return True
97 return False
98 else:
99
100 accelerator = accelerator.replace("_","")
101 if accelerator in u"-?":
102 return True
103 if not accelerator.isalnum():
104 return False
105
106
107
108 decomposition = unicodedata.decomposition(accelerator)
109
110 decomposition = re.sub("<[^>]+>", "", decomposition).strip()
111 return decomposition.count(" ") == 0
112
114 """returns all the accelerators and locations in str1 marked with a given marker"""
115 accelerators = []
116 badaccelerators = []
117 currentpos = 0
118 while currentpos >= 0:
119 currentpos = str1.find(accelmarker, currentpos)
120 if currentpos >= 0:
121 accelstart = currentpos
122 currentpos += len(accelmarker)
123
124 accelend = currentpos + 1
125 if accelend > len(str1): break
126 accelerator = str1[currentpos:accelend]
127 currentpos = accelend
128 if isvalidaccelerator(accelerator, acceptlist):
129 accelerators.append((accelstart, accelerator))
130 else:
131 badaccelerators.append((accelstart, accelerator))
132 return accelerators, badaccelerators
133
135 """returns all the variables and locations in str1 marked with a given marker"""
136 variables = []
137 currentpos = 0
138 while currentpos >= 0:
139 variable = None
140 currentpos = str1.find(startmarker, currentpos)
141 if currentpos >= 0:
142 startmatch = currentpos
143 currentpos += len(startmarker)
144 if endmarker is None:
145
146 endmatch = currentpos
147 for n in range(currentpos, len(str1)):
148 if not (str1[n].isalnum() or str1[n] == '_'):
149 endmatch = n
150 break
151 if currentpos == endmatch: endmatch = len(str1)
152 if currentpos < endmatch:
153 variable = str1[currentpos:endmatch]
154 currentpos = endmatch
155 elif type(endmarker) == int:
156
157 endmatch = currentpos + endmarker
158 if endmatch > len(str1): break
159 variable = str1[currentpos:endmatch]
160 currentpos = endmatch
161 else:
162 endmatch = str1.find(endmarker, currentpos)
163 if endmatch == -1: break
164
165 start2 = str1.rfind(startmarker, currentpos, endmatch)
166 if start2 != -1:
167 startmatch2 = start2
168 start2 += len(startmarker)
169 if start2 != currentpos:
170 currentpos = start2
171 startmatch = startmatch2
172 variable = str1[currentpos:endmatch]
173 currentpos = endmatch + len(endmarker)
174 if variable is not None and variable not in ignorelist:
175 if not variable or variable.replace("_","").replace(".","").isalnum():
176 variables.append((startmatch, variable))
177 return variables
178
180 """returns a function that gets a list of accelerators marked using accelmarker"""
181 def getmarkedaccelerators(str1):
182 """returns all the accelerators in str1 marked with a given marker"""
183 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist)
184 accelerators = [accelerator for accelstart, accelerator in acclocs]
185 badaccelerators = [accelerator for accelstart, accelerator in badlocs]
186 return accelerators, badaccelerators
187 return getmarkedaccelerators
188
190 """returns a function that gets a list of variables marked using startmarker and endmarker"""
191 def getmarkedvariables(str1):
192 """returns all the variables in str1 marked with a given marker"""
193 varlocs = findmarkedvariables(str1, startmarker, endmarker)
194 variables = [variable for accelstart, variable in varlocs]
195 return variables
196 return getmarkedvariables
197
199 """returns any numbers that are in the string"""
200
201 assert isinstance(str1, unicode)
202 numbers = []
203 innumber = False
204 degreesign = u'\xb0'
205 lastnumber = ""
206 carryperiod = ""
207 for chr1 in str1:
208 if chr1.isdigit():
209 innumber = True
210 elif innumber:
211 if not (chr1 == '.' or chr1 == degreesign):
212 innumber = False
213 if lastnumber:
214 numbers.append(lastnumber)
215 lastnumber = ""
216 if innumber:
217 if chr1 == degreesign:
218 lastnumber += chr1
219 elif chr1 == '.':
220 carryperiod += chr1
221 else:
222 lastnumber += carryperiod + chr1
223 carryperiod = ""
224 else:
225 carryperiod = ""
226 if innumber:
227 if lastnumber:
228 numbers.append(lastnumber)
229 return numbers
230
236
238 """returns the email addresses that are in a string"""
239 return re.findall('[\w\.\-]+@[\w\.\-]+', str1)
240
242 """returns the URIs in a string"""
243 URLPAT = 'https?:[\w/\.:;+\-~\%#\$?=&,()]+|www\.[\w/\.:;+\-~\%#\$?=&,()]+|' +\
244 'ftp:[\w/\.:;+\-~\%#?=&,]+'
245 return re.findall(URLPAT, str1)
246
248 """returns a function that counts the number of accelerators marked with the given marker"""
249 def countmarkedaccelerators(str1):
250 """returns all the variables in str1 marked with a given marker"""
251 acclocs, badlocs = findaccelerators(str1, accelmarker, acceptlist)
252 return len(acclocs), len(badlocs)
253 return countmarkedaccelerators
254