1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 import re
18
19 from libxyz.parser import BaseParser
20 from libxyz.parser import Lexer
21 from libxyz.parser import ParsedData
22 from libxyz.exceptions import XYZValueError
23 from libxyz.exceptions import LexerError
24
25 import libxyz
26
28 """
29 BaseParser is used to parse block structures.
30 Format:
31
32 name {
33 var1 <assign> val1 <delimiter>
34 var2 <assign> val2 [<list_separator>val3...] <delimiter>
35 ...
36 }
37 """
38
39 STATE_INIT = 0
40 STATE_BLOCK_OPEN = 1
41 STATE_VARIABLE = 2
42 STATE_ASSIGN = 3
43 STATE_VALUE = 4
44 STATE_DELIM = 5
45 STATE_LIST_VALUE = 6
46
47 DEFAULT_OPT = {
48 u"comment": u"#",
49 u"varre": re.compile(r"^[\w-]+$"),
50 u"assignchar": u"=",
51 u"delimiter": u"\n",
52 u"validvars": (),
53 u"value_validator": None,
54 u"count": 0,
55 u"list_separator": u",",
56 u"macrochar": u"&",
57 u"var_transform": None,
58 }
59
61 """
62 @param opt: Parser options.
63 @type opt: dict
64
65 Available options:
66 - comment: Comment character.
67 Everything else ignored until EOL.
68 Type: I{string (single char)}
69 Default: #
70 - delimiter: Character to use as delimiter between statements.
71 Type: I{string (single char)}
72 Default: \\n
73 - varre: Valid variable name regular expression.
74 Type: I{Compiled re object (L{re.compile})}
75 Default: ^[\w-]+$
76 - assignchar: Variable-value split character.
77 Type: I{string (single char)}
78 Default: =
79 - validvars: List of variables valid within block.
80 Type: I{sequence}
81 Default: ()
82 - value_validator: Value validator
83 Type: A function that takes three args:
84 current block name, var and value and validates them.
85 In case value is invalid, XYZValueError must be raised.
86 Otherwise function must return required value, possibly modified.
87 Default: None
88 - count: How many blocks to parse. If count <= 0 - will parse
89 all available.
90 Type: integer
91 Default: 0
92 - list_separator: Character to separate elements in list
93 Type: I{string (single char)}
94 Default: ,
95 - macrochar: Macro character (None to disable macros)
96 Type: I{string (single char)}
97 Default: &
98 - var_transform: A function which is called with variable name
99 as single argument, and which returns new variable object
100 or raises XYZValueError
101 """
102
103 super(BlockParser, self).__init__()
104
105 if opt and not isinstance(opt, dict):
106 raise XYZValueError(_(u"Invalid opt type: %s. "\
107 u"Dictionary expected." % type(opt)))
108
109 self.opt = opt or self.DEFAULT_OPT
110 self.set_opt(self.DEFAULT_OPT, self.opt)
111
112 self._default_data = None
113 self._state = self.STATE_INIT
114 self._parsed_obj = None
115 self._varname = None
116 self._macroname = None
117 self._sdata = None
118 self._result = {}
119 self._current_list = []
120 self._macros = {}
121 self._lexer = None
122 self._openblock = u"{"
123 self._closeblock = u"}"
124 self._parsed = 0
125
126 self._tok_type = None
127
128 self._parse_table = {
129 self.STATE_INIT: self._process_state_init,
130 self.STATE_BLOCK_OPEN: self._process_state_block_open,
131 self.STATE_VARIABLE: self._process_state_variable,
132 self.STATE_ASSIGN: self._process_state_assign,
133 self.STATE_VALUE: self._process_state_value,
134 self.STATE_LIST_VALUE: self._process_state_list_value,
135 self.STATE_DELIM: self._process_state_delim,
136 }
137
138
139
140 - def parse(self, source, default_data=None):
141 """
142 Parse blocks of text and return a dict of L{ParsedData} objects
143 or raise L{libxyz.exceptions.ParseError} exception
144
145 @param source: Source data
146 @param default_data: Dictionary containing L{libxyz.parser.ParsedData}
147 objects with default values.
148 """
149
150 self._cleanup()
151
152 self._result = default_data or {}
153 self._parsed = 0
154 self._default_data = default_data
155
156 _tokens = (self._openblock,
157 self._closeblock,
158 self.assignchar,
159 self.delimiter,
160 self.list_separator,
161 )
162
163 self._lexer = Lexer(source, _tokens, self.comment, self.macrochar)
164 self._sdata = self._lexer.sdata
165
166 try:
167 while True:
168 _res = self._lexer.lexer()
169
170 if _res is None:
171 break
172 else:
173 self._tok_type, _val = _res
174
175
176
177 if _val == "\n" and \
178 self._state not in (self.STATE_DELIM, self.STATE_LIST_VALUE):
179 continue
180 else:
181 self._parse_table[self._state](_val)
182 except LexerError, e:
183 self.error(str(e))
184
185 self._check_complete()
186
187 return self._result
188
189
190
192 if self._default_data and word in self._default_data:
193 self._parsed_obj = self._default_data[word]
194 else:
195 self._parsed_obj = ParsedData(word)
196
197 self._state = self.STATE_BLOCK_OPEN
198
199
200
207
208
209
211 if word == self._closeblock:
212 self._complete_block()
213 return
214
215 if self._tok_type == self._lexer.TOKEN_MACRO:
216 self._macroname = word
217 else:
218 if self.validvars and word not in self.validvars:
219 self.error(_(u"Unknown variable %s") % word)
220 elif self.varre.match(word) is None:
221 self.error(_(u"Invalid variable name: %s") % word)
222
223 if callable(self.var_transform):
224 try:
225 self._varname = self.var_transform(word)
226 except XYZValueError, e:
227 self.error(_(u"Variable transformation error: %s") %
228 libxyz.core.utils.ustring(str(e)))
229 else:
230 self._varname = word
231
232 self._state = self.STATE_ASSIGN
233
234
235
243
244
245
247 if word == self.list_separator:
248 self._state = self.STATE_VALUE
249 return
250
251 if len(self._current_list) == 1:
252 _value = self._current_list[0]
253 else:
254 _value = tuple(self._current_list)
255
256
257 if self._macroname:
258 self._macros[self._macroname] = _value
259 self._macroname = None
260
261 else:
262 if self.value_validator:
263 try:
264 _value = self.value_validator(self._parsed_obj.name,
265 self._varname, _value)
266 except XYZValueError, e:
267 self.error(_(u"Invalid value: %s" % str(e)))
268
269 self._parsed_obj[self._varname] = _value
270 self._varname = None
271
272 self._current_list = []
273 self._lexer.escaping_off()
274 self._state = self.STATE_DELIM
275 self._lexer.unget(word)
276
277
278
280 if self._tok_type == self._lexer.TOKEN_MACRO:
281 try:
282 self._current_list.append(self._macros[word])
283 except KeyError:
284 self.error(_(u"Undefined macro %s" % word))
285 else:
286 self._current_list.append(word)
287
288 self._state = self.STATE_LIST_VALUE
289
290
291
301
302
303
305 if self._parsed_obj:
306 self._result[self._parsed_obj.name] = self._parsed_obj
307 self._parsed += 1
308
309 self._cleanup()
310
311 if self.count > 0 and self.count == self._parsed:
312 self._lexer.done()
313
314
315
317 """
318 Set all neccessary variables to initial state
319 """
320
321 self._parsed_obj = None
322 self._varname = None
323 self._macroname = None
324 self._state = self.STATE_INIT
325 self._in_comment = False
326 self._in_quote = False
327 self._current_list = []
328 self._macros = {}
329 self._default_data = None
330
331
332
334 """
335 Check state after source reaches EOF for consistency
336 """
337
338 _err = False
339 _msg = None
340
341 if self._in_quote:
342 _err, _msg = True, _(u"Unterminated quote")
343
344 if self._state != self.STATE_INIT:
345 if self._state != self.STATE_BLOCK_OPEN:
346 _err, _msg = True, _(u"Unclosed block")
347 else:
348 _err, _msg = True, None
349
350 if self._lexer.get_idt():
351 _err, _msg = True, None
352
353 if _err:
354 self.error(_msg)
355