1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 __version__ = 'SPARK-0.6.1'
23
24 import re
25 import sys
26 import string
27
29 namelist, namedict, classlist = [], {}, [instance.__class__]
30 for c in classlist:
31 for b in c.__bases__:
32 classlist.append(b)
33 for name in dir(c):
34 if name not in namedict:
35 namelist.append(name)
36 namedict[name] = 1
37 return namelist
38
41 pattern = self.reflect()
42 self.re = re.compile(pattern, re.VERBOSE)
43
44 self.index2func = {}
45 for name, number in self.re.groupindex.items():
46 self.index2func[number-1] = getattr(self, 't_' + name)
47
49 doc = getattr(self, name).__doc__
50 rv = '(?P<%s>%s)' % (name[2:], doc)
51 return rv
52
61
63 print "Lexical error at position %s" % pos
64 raise SystemExit
65
67 pos = 0
68 n = len(s)
69 while pos < n:
70 m = self.re.match(s, pos)
71 if m is None:
72 self.error(s, pos)
73
74 groups = m.groups()
75 for i in range(len(groups)):
76 if groups[i] and i in self.index2func:
77 self.index2func[i](groups[i])
78 pos = m.end()
79
81 r'( . | \n )+'
82 pass
83
86 self.rules = {}
87 self.rule2func = {}
88 self.rule2name = {}
89 self.collectRules()
90 self.startRule = self.augment(start)
91 self.ruleschanged = 1
92
93 _START = 'START'
94 _EOF = 'EOF'
95
96
97
98
100
102 rules = string.split(doc)
103
104 index = []
105 for i in range(len(rules)):
106 if rules[i] == '::=':
107 index.append(i-1)
108 index.append(len(rules))
109
110 for i in range(len(index)-1):
111 lhs = rules[index[i]]
112 rhs = rules[index[i]+2:index[i+1]]
113 rule = (lhs, tuple(rhs))
114
115 rule, fn = self.preprocess(rule, func)
116
117 if lhs in self.rules:
118 self.rules[lhs].append(rule)
119 else:
120 self.rules[lhs] = [ rule ]
121 self.rule2func[rule] = fn
122 self.rule2name[rule] = func.__name__[2:]
123 self.ruleschanged = 1
124
131
133
134
135
136
137
138 startRule = (self._START, ( start, self._EOF ))
139 self.rule2func[startRule] = lambda args: args[0]
140 self.rules[self._START] = [ startRule ]
141 self.rule2name[startRule] = ''
142 return startRule
143
145 union = {}
146 self.first = {}
147
148 for rulelist in self.rules.values():
149 for lhs, rhs in rulelist:
150 if lhs not in self.first:
151 self.first[lhs] = {}
152
153 if len(rhs) == 0:
154 self.first[lhs][None] = 1
155 continue
156
157 sym = rhs[0]
158 if sym not in self.rules:
159 self.first[lhs][sym] = 1
160 else:
161 union[(sym, lhs)] = 1
162 changes = 1
163 while changes:
164 changes = 0
165 for src, dest in union.keys():
166 destlen = len(self.first[dest])
167 self.first[dest].update(self.first[src])
168 if len(self.first[dest]) != destlen:
169 changes = 1
170
171
172
173
174
175
176
177
180
182 print "Syntax error at or near `%s' token" % token
183 raise SystemExit
184
185 - def parse(self, tokens):
186 tree = {}
187 tokens.append(self._EOF)
188 states = { 0: [ (self.startRule, 0, 0) ] }
189
190 if self.ruleschanged:
191 self.makeFIRST()
192
193 for i in xrange(len(tokens)):
194 states[i+1] = []
195
196 if states[i] == []:
197 break
198 self.buildState(tokens[i], states, i, tree)
199
200
201
202 if i < len(tokens)-1 or states[i+1] != [(self.startRule, 2, 0)]:
203 del tokens[-1]
204 self.error(tokens[i-1])
205 rv = self.buildTree(tokens, tree, ((self.startRule, 2, 0), i+1))
206 del tokens[-1]
207 return rv
208
210 needsCompletion = {}
211 state = states[i]
212 predicted = {}
213
214 for item in state:
215 rule, pos, parent = item
216 lhs, rhs = rule
217
218
219
220
221 if pos == len(rhs):
222 if len(rhs) == 0:
223 needsCompletion[lhs] = (item, i)
224
225 for pitem in states[parent]:
226 if pitem is item:
227 break
228
229 prule, ppos, pparent = pitem
230 plhs, prhs = prule
231
232 if prhs[ppos:ppos+1] == (lhs,):
233 new = (prule,
234 ppos+1,
235 pparent)
236 if new not in state:
237 state.append(new)
238 tree[(new, i)] = [(item, i)]
239 else:
240 tree[(new, i)].append((item, i))
241 continue
242
243 nextSym = rhs[pos]
244
245
246
247
248 if nextSym in self.rules:
249
250
251
252
253
254
255
256 if nextSym in needsCompletion:
257 new = (rule, pos+1, parent)
258 olditem_i = needsCompletion[nextSym]
259 if new not in state:
260 state.append(new)
261 tree[(new, i)] = [olditem_i]
262 else:
263 tree[(new, i)].append(olditem_i)
264
265
266
267
268 if nextSym in predicted:
269 continue
270 predicted[nextSym] = 1
271
272 ttype = token is not self._EOF and \
273 self.typestring(token) or \
274 None
275 if ttype is not None:
276
277
278
279
280
281
282
283
284
285 for prule in self.rules[nextSym]:
286 new = (prule, 0, i)
287 prhs = prule[1]
288 if len(prhs) == 0:
289 state.append(new)
290 continue
291 prhs0 = prhs[0]
292 if prhs0 not in self.rules:
293 if prhs0 != ttype:
294 continue
295 else:
296 state.append(new)
297 continue
298 first = self.first[prhs0]
299 if None not in first and \
300 ttype not in first:
301 continue
302 state.append(new)
303 continue
304
305 for prule in self.rules[nextSym]:
306
307
308
309
310
311 prhs = prule[1]
312 if len(prhs) > 0 and \
313 prhs[0] not in self.rules and \
314 token != prhs[0]:
315 continue
316 state.append((prule, 0, i))
317
318
319
320
321 elif token == nextSym:
322
323 states[i+1].append((rule, pos+1, parent))
324
326 stack = []
327 self.buildTree_r(stack, tokens, -1, tree, root)
328 return stack[0]
329
330 - def buildTree_r(self, stack, tokens, tokpos, tree, root):
331 (rule, pos, parent), state = root
332
333 while pos > 0:
334 want = ((rule, pos, parent), state)
335 if want not in tree:
336
337
338
339
340
341
342 pos = pos - 1
343 state = state - 1
344 stack.insert(0, tokens[tokpos])
345 tokpos = tokpos - 1
346 else:
347
348
349
350
351
352
353
354
355
356 children = tree[want]
357 if len(children) > 1:
358 child = self.ambiguity(children)
359 else:
360 child = children[0]
361
362 tokpos = self.buildTree_r(stack,
363 tokens, tokpos,
364 tree, child)
365 pos = pos - 1
366 (crule, cpos, cparent), cstate = child
367 state = cparent
368
369 lhs, rhs = rule
370 result = self.rule2func[rule](stack[:len(rhs)])
371 stack[:len(rhs)] = [result]
372 return tokpos
373
393
395
396
397
398
399
400 return list[0]
401
402
403
404
405
406
407
408
409
414
416 rebind = lambda lhs, self=self: \
417 lambda args, lhs=lhs, self=self: \
418 self.buildASTNode(args, lhs)
419 lhs, rhs = rule
420 return rule, rebind(lhs)
421
430
431 - def terminal(self, token): return token
432
434 rv = self.AST(type)
435 rv[:len(args)] = args
436 return rv
437
438
439
440
441
442
443
444
445
446
447
450
500
501
502
503
504
505
506
507
512
514 rebind = lambda func, self=self: \
515 lambda args, func=func, self=self: \
516 self.foundMatch(args, func)
517 lhs, rhs = rule
518 rhslist = list(rhs)
519 rhslist.reverse()
520
521 return (lhs, tuple(rhslist)), rebind(func)
522
524 func(args[-1])
525 return args[-1]
526
539
540 - def match(self, ast=None):
541 if ast is None:
542 ast = self.ast
543 self.input = []
544
545 self.match_r(ast)
546 self.parse(self.input)
547
549
550
551
552 return list[-1]
553
554 -def _dump(tokens, states):
555 for i in range(len(states)):
556 print 'state', i
557 for (lhs, rhs), pos, parent in states[i]:
558 print '\t', lhs, '::=',
559 print string.join(rhs[:pos]),
560 print '.',
561 print string.join(rhs[pos:]),
562 print ',', parent
563 if i < len(tokens):
564 print
565 print 'token', str(tokens[i])
566 print
567