root/cly/tags/final-dictionary-based-version/cly/parser.py

Revision 110, 20.8 kB (checked in by athomas, 3 years ago)
  • Changed MERGE behaviour so that a list of grammars nust be returned by the callable.
  • Made COMPLETE automatically cull candidates that do not match the current prefix.
  • Property svn:eol-style set to native
Line 
1 import re, inspect, types
2 from crash.util import tolist, flatten
3 from cly.symbols import *
4
5
6
7 class Error(Exception): pass
8 class NotExecutable(Error): pass
9 class DuplicateLabel(Error): pass
10 class SyntaxError(Error): pass
11 class UndefinedSymbol(Error): pass
12
13
14
15 # Result states
16 class Result:
17     """ Hold the result state of a parse. """
18     # Command can be executed successfully
19     OK = 1
20     # Not enough input was supplied
21     NEEDMOREINPUT = 2
22     # No match for a token
23     NOMATCH = 3
24     # End of input
25     ENDOFINPUT = 4
26     # No-operation (empty line)
27     NOP = 5
28
29     # Other error
30     ERROR = 999
31
32     def __init__(self, state, context = None, grammar = None, token = None, message = None):
33         if not message: message = str(state)
34         self.state, self.message, self.context, self.__grammar, self.token = state, message, context, grammar, token
35
36     def __call__(self):
37         return self.execute()
38
39     def execute(self):
40         if self.state == Result.OK:
41             return self.context(self.__grammar)
42         raise NotExecutable("CLY parse result was not OK, can't execute")
43
44     def candidates(self, prefix = ''):
45         return self.context.candidates(self.__grammar, prefix)
46
47     def help(self):
48         return self.context.extract_help(self.__grammar)
49
50
51
52 class ParseNode:
53     def __init__(self, context, grammar, grammar_key, token):
54         self.context = context
55         self.grammar = grammar
56         self.grammar_key = grammar_key
57         self.token = token
58
59         # Track variable?
60         if VAR in grammar:
61             str = token.group(0)
62             # quoted string?
63             if token.group(1):
64                 str = token.group(1)[1:-1]
65             if callable(grammar[VAR]):
66                 name = self.context.ctx_call(grammar[VAR], str)
67             else:
68                 name = grammar[VAR]
69             self.context.add_var(name, str)
70         # Is this node a LABEL?
71         if LABEL in grammar:
72             self.context.add_label(grammar[LABEL], {grammar_key : grammar})
73         elif GLOBAL_LABEL in grammar:
74             self.context.add_label(grammar[GLOBAL_LABEL], {grammar_key : grammar})
75         # Increase reference count
76         context._traverse(grammar)
77
78 # Context states
79 def _is_command(command):
80     return type(command) is str and re.match('^[A-Z_]+$', command)
81
82
83
84 class Context:
85     VAR_TYPES = {'i': int, 'L': long, 's': str, 'f': float, 'b': bool}
86
87     def __init__(self, grammar, tokens, text, user_context):
88         self.__label = []
89         self.__mark = []
90         self.__var = {}
91         self.__count = {}
92         self.__grammar = grammar
93         self.user = user_context
94         self.text = text
95         self.tokens = tokens
96         self.parsed_tokens = []
97         self.nodes = []
98         self.result = None
99         self.__pre_parse(self.__grammar)
100         self.var_history = []
101
102     def __pre_parse(self, grammar, grammar_key = None):
103         for node in grammar.keys():
104             if node == GLOBAL_LABEL:
105                 self.add_label(grammar[GLOBAL_LABEL], {grammar_key : grammar})
106             elif type(grammar[node]) is dict:
107                 self.__pre_parse(grammar[node], node)
108
109     def __parse(self):
110         return self.__parse_branch(self.__grammar, self.tokens, self.nodes)
111
112     def add_var(self, var, value):
113         """ Add a VAR to the context. VAR name can be in the form
114             [l](b|s|i|f|L):<name>, where l signifies a list of values and s, i,
115             f, b and L represent string, integer, float, bool and long,
116             respectively. If the more simple <name> form is used, then CLY will
117             pass a scalar if the VAR is encountered only once, or convert it to
118             a list if encountered more than once. """
119         if ':' in var:
120             type, var = var.split(':')
121             islist = 'l' in type
122             type = type.replace('l', '')
123             if islist:
124                 if not type:
125                     type = 's'
126                 self.__var.setdefault(var, []).append(self.VAR_TYPES[type](value))
127             else:
128                 self.__var[var] = self.VAR_TYPES[type](value)
129         else:
130             # No type...
131             if var in self.__var:
132                 self.__var[var] = tolist(self.__var[var])
133                 self.__var[var].append(value)
134             else:
135                 self.__var[var] = value
136         self.var_history.append((var, value))
137
138     def add_mark(self, grammar):
139         """ Add a RETURN mark to the context. """
140         self.__mark.append(grammar)
141
142     def add_label(self, name, grammar):
143         """ Add a label to the context. The grammar can be disconnected. """
144         for l in self.__label:
145             if l[0] == name:
146                 if l[1] == grammar:
147                     return
148                 else:
149                     raise DuplicateLabel("label '%s' already exists under a different tree" % name)
150         self.__label.append([name, grammar])
151
152     def has_all_labels(self, seek):
153         if type(seek) is str: seek = [ seek ]
154         for s in seek:
155             found = False
156             for l in self.__label:
157                 if l[0] == s:
158                     found = True
159                     break
160             if not found:
161                 return False
162         return True
163
164     def has_labels(self, seek):
165         """ Return true if any of the sought labels exist. """
166         if type(seek) is str: seek = [ seek ]
167         for s in seek:
168             for l in self.__label:
169                 if l[0] == s:
170                     return True
171         return False
172
173     def has_all_vars(self, seek):
174         """ Determine if all of the sought variables are defined. """
175         if type(seek) is str: seek = [ seek ]
176         for var in seek:
177             if var not in self.__var:
178                 return False
179         return True
180
181     def has_vars(self, seek):
182         """ Determine if any of the given variables are defined. """
183         if type(seek) is str: seek = [ seek ]
184         for var in seek:
185             if var in self.__var:
186                 return True
187         return False
188
189     def __merge_labels_shallow(self, seek):
190         out = {}
191         for label in self.__label:
192             if label[0] in seek:
193                 for k1 in label[1]:
194                     for k2 in label[1][k1]:
195                         out[k2] = label[1][k1][k2]
196         return out
197
198     def __merge_labels(self, seek):
199         out = {}
200         for label in self.__label:
201             if label[0] in seek:
202                 for key in label[1]:
203                     out[key] = label[1][key]
204         return out
205
206     def _traverse(self, node):
207         node[MAX] = self.__max(node)
208         node[MAX][0] += 1
209         return node[MAX][0]
210
211     def __max(self, grammar):
212         max = None
213         if MAX in grammar:
214             max = grammar[MAX]
215             t = type(max)
216             if t is int:
217                 max = [ 0, max, id(self) ]
218             elif t is list:
219                 if max[2] != id(self):
220                     max[0] = 0
221                     max[2] = id(self)
222             else:
223                 raise SyntaxError("MAX keys must be an integer count, not %s" % t)
224         else:
225             max = [ 0, 9999, id(self) ]
226         grammar[MAX] = max
227         return max
228
229     def __filter_grammar(self, grammar):
230         out = {}
231         for key, branch in grammar.iteritems():
232             if key == ACTION and type(branch) is not dict \
233                 or type(branch) is dict and (
234                     # Logical filters
235                     (IF in branch and self.ctx_call(branch[IF]) or IF not in branch) \
236                     and
237                     (UNLESS in branch and not self.ctx_call(branch[UNLESS]) or UNLESS not in branch) \
238                     and
239                     ((IF_VAR in branch and self.has_vars(branch[IF_VAR])) or IF_VAR not in branch) \
240                     and
241                     ((UNLESS_VAR in branch and not self.has_vars(branch[UNLESS_VAR])) or UNLESS_VAR not in branch) \
242                     and
243                     (IF_LABEL in branch and self.has_labels(branch[IF_LABEL]) or IF_LABEL not in branch) \
244                     and
245                     (UNLESS_LABEL in branch and not branch[UNLESS_LABEL] in self or UNLESS_LABEL not in branch) \
246                     and
247                     # If MAX is exceeded...
248                     (self.__max(branch)[0] < self.__max(branch)[1])
249                 ):
250                 out[key] = branch
251         return out
252
253     def __sort_grammar_keys(self, keys, grammar):
254         def key_sort(a, b):
255             l = [0, 0, a]
256             r = [0, 0, b]
257             if type(grammar[a]) is dict:
258                 if GROUP in grammar[a]:
259                     if type(grammar[a][GROUP]) is tuple:
260                         l[0] = grammar[a][GROUP][0]
261                     elif type(grammar[a][GROUP]) is int:
262                         l[0] = grammar[a][GROUP]
263                 if ORDER in grammar[a]:
264                     l[0] = grammar[a][ORDER]
265
266             if type(grammar[b]) is dict:
267                 if GROUP in grammar[b]:
268                     if type(grammar[b][GROUP]) is tuple:
269                         r[0] = grammar[b][GROUP][0]
270                     elif type(grammar[b][GROUP]) is int:
271                         r[0] = grammar[b][GROUP]
272                 if ORDER in grammar[b]:
273                     r[0] = grammar[b][ORDER]
274             return cmp(l, r)
275
276         return sorted(keys.keys(), key_sort)
277
278     def __grammar_branches(self, grammar):
279         done = {}
280         for i in range(0, 3):
281             if MERGE in grammar and MERGE not in done:
282                 done[MERGE] = 1
283                 merge = tolist(grammar[MERGE])
284                 self.add_mark(grammar)
285                 for m in flatten(merge):
286                     mtype = type(m)
287                     while callable(m):
288                         m = self.ctx_call(m, grammar)
289                         mtype = type(m)
290                     m = flatten(m)
291                     for g in m:
292                         if type(g) is not dict:
293                             raise SyntaxError("MERGE target must be a dictionary (or callable that returns a list of dictionaries)")
294                         grammar.update(g)
295             if JUMP in grammar and JUMP not in done:
296                 done[JUMP] = 1
297                 label = grammar[JUMP]
298                 if label == RETURN:
299                     grammar.update(self.__mark[-1])
300                 else:
301                     if not self.has_labels(label):
302                         raise UndefinedSymbol("no such label '%s' referenced in JUMP" % grammar[JUMP])
303                     grammar.update(self.__merge_labels_shallow(label))
304             if JUMP_TO in grammar and JUMP_TO not in done:
305                 done[JUMP_TO] = 1
306                 label = grammar[JUMP_TO]
307                 if label == RETURN:
308                     grammar.update(self.__mark[-1])
309                 else:
310                     if not self.has_labels(label):
311                         raise UndefinedSymbol("no such label '%s' referenced in JUMP_TO" % grammar[JUMP_TO])
312                     grammar.update(self.__merge_labels(label))
313         return grammar
314
315     def __parse_help_node(self, key, node):
316         if type(node) is dict and HELP in node:
317             help, htype = node[HELP], type(node[HELP])
318             if htype is dict:
319                 # Delete MAX from help tree - useless anyway
320                 if MAX in help: del(help[MAX])
321                 return help
322             elif htype is tuple or htype is list:
323                 if len(help):
324                     if type(help[0]) is list or type(help[0]) is tuple:
325                         out = {}
326                         for k, v in help:
327                             out[k] = v
328                         return out
329                     return { help[0] : help[1] }
330                 else:
331                     return {}
332             elif htype is str:
333                 if type(key) is list or type(key) is tuple:
334                     out = {}
335                     for k in key:
336                         out[k] = help % {'key' : k, 'titlekey' : k.title()}
337                     return out
338                 return { key : help }
339             elif callable(help):
340                 return self.__parse_help_node(key, {'HELP' : self.ctx_call(help) })
341             else:
342                 raise SyntaxError("expected dict, tuple or str for HELP branch, got %s" % htype)
343         return None
344
345     def extract_help(self, grammar):
346         """ Extract help from the given grammar node. Returns a tuple of
347         (header, help, footer) where header and footer are either None or the
348         help header/footer, and help is a dictionary where the keys are tuples
349         of (order, group) and the values are an array of (command, help) pairs. """
350         header = None
351         footer = None
352         out = {(0, None) : []}
353         grammar = self.__copy_grammar_shallow(grammar)
354         if HELP_HEADER in grammar:
355             if callable(grammar[HELP_HEADER]):
356                 header = self.ctx_call(grammar[HELP_HEADER])
357             else:
358                 header = grammar[HELP_HEADER]
359             assert(type(header) is str)
360         if HELP_FOOTER in grammar:
361             if callable(grammar[HELP_FOOTER]):
362                 footer = self.ctx_call(grammar[HELP_FOOTER])
363             else:
364                 footer = grammar[HELP_FOOTER]
365             assert(type(footer) is str)
366         for k in self.__sort_grammar_keys(self.__filter_grammar(grammar), grammar):
367             groupid = 0
368             if k == ACTION: groupid = 10
369             group = (groupid, None)
370             v = grammar[k]
371             if type(v) is dict and GROUP in v:
372                 group = v[GROUP]
373                 if type(group) is int:
374                     groupid = group
375                     group = (group, None)
376                 elif type(group) is not tuple:
377                     group = (groupid, group)
378             if group not in out: out[group] = []
379             help = None
380             if _is_command(k):
381                 if k == ACTION:
382                     help = self.__parse_help_node(ACTION, grammar[k])
383                     if help is not None:
384                         help['<eol>'] = help[ACTION]
385                         del(help[ACTION])
386                     else:
387                         help = { '<eol>' : 'Execute command.' }
388                     del(out[group])
389                     group = (2147483648, None)
390                     out[group] = []
391                 else:
392                     continue
393             else:
394                 help = self.__parse_help_node(k, v)
395             if help is None:
396                 if callable(k):
397                     out[group].append([ k, "No HELP for this grammar node." ])
398             else:
399                 keys = help.keys()
400                 for k in keys:
401                     out[group].append([k, help[k]])
402         # TODO: Do a real sort?
403         for k in out:
404             if type(out[k]) is list:
405                 out[k].sort()
406         if (0, None) in out and not out[(0, None)]:
407             del(out[(0, None)])
408         return (header, out, footer)
409
410     def completion_candidates(self, grammar, prefix = ''):
411         """ Return the candidates for completion based on the given grammar and
412             prefix. """
413         out = []
414         for key, branch in grammar.iteritems():
415             if type(branch) is dict and COMPLETE in branch:
416                 words = self.ctx_call(branch[COMPLETE], prefix)
417                 words = [word for word in words if word.startswith(prefix)]
418                 out.extend(words)
419         return out
420
421     def candidates(self, grammar, prefix = ''):
422         """ The flattened list of command candidates for grammar. Excludes
423         help headers and footers and generic <type> commands. """
424         out = {}
425         for group in self.extract_help(grammar)[1].values():
426             if type(group) is list:
427                 out.update(dict.fromkeys([x[0] + ' ' for x in group
428                     if x[0][0] != '<' and x[0].startswith(prefix)]))
429         out.update(dict.fromkeys(self.completion_candidates(grammar, prefix)))
430         return out.keys()
431
432     def __call__(self, grammar):
433         self.execute(grammar)
434
435     def ctx_call(self, func, *argl, **argd):
436         """ Call the given callable with Context as the first argument iff the
437         first parameter of the callable is "ctx" or "context". """
438         ctx_index = 0
439         # No way to inspect builtin functions, hope for the best it.
440         if inspect.isbuiltin(func):
441             return func(*argl, **argd)
442         # Function object
443         if inspect.ismethod(func):
444             ctx_index = 1
445         elif type(func) is types.InstanceType:
446             func = func.__call__
447             ctx_index = 1
448         args = inspect.getargspec(func)[0]
449         if args and len(args) > ctx_index and args[ctx_index] in ('ctx', 'context'):
450             return func(self, *argl, **argd)
451         return func(*argl, **argd)
452
453     def execute(self, grammar):
454         if ACTION in grammar:
455             args = self.__var
456             if callable(grammar[ACTION]):
457                 return self.ctx_call(grammar[ACTION], **args)
458             elif type(grammar[ACTION]) is dict:
459                 if ACTION in grammar[ACTION]:
460     &n