1 """Generate ast module from specification
2 
3 This script generates the ast module from a simple specification,
4 which makes it easy to accommodate changes in the grammar.  This
5 approach would be quite reasonable if the grammar changed often.
6 Instead, it is rather complex to generate the appropriate code.  And
7 the Node interface has changed more often than the grammar.
8 """
9 
10 import fileinput
11 import re
12 import sys
13 from StringIO import StringIO
14 
15 SPEC = "ast.txt"
16 COMMA = ", "
17 
18 def load_boilerplate(file):
19     f = open(file)
20     buf = f.read()
21     f.close()
22     i = buf.find('### ''PROLOGUE')
23     j = buf.find('### ''EPILOGUE')
24     pro = buf[i+12:j].strip()
25     epi = buf[j+12:].strip()
26     return pro, epi
27 
28 def strip_default(arg):
29     """Return the argname from an 'arg = default' string"""
30     i = arg.find('=')
31     if i == -1:
32         return arg
33     t = arg[:i].strip()
34     return t
35 
36 P_NODE = 1
37 P_OTHER = 2
38 P_NESTED = 3
39 P_NONE = 4
40 
41 class NodeInfo:
42     """Each instance describes a specific AST node"""
43     def __init__(self, name, args):
44         self.name = name
45         self.args = args.strip()
46         self.argnames = self.get_argnames()
47         self.argprops = self.get_argprops()
48         self.nargs = len(self.argnames)
49         self.init = []
50 
51     def get_argnames(self):
52         if '(' in self.args:
53             i = self.args.find('(')
54             j = self.args.rfind(')')
55             args = self.args[i+1:j]
56         else:
57             args = self.args
58         return [strip_default(arg.strip())
59                 for arg in args.split(',') if arg]
60 
61     def get_argprops(self):
62         """Each argument can have a property like '*' or '!'
63 
64         XXX This method modifies the argnames in place!
65         """
66         d = {}
67         hardest_arg = P_NODE
68         for i in range(len(self.argnames)):
69             arg = self.argnames[i]
70             if arg.endswith('*'):
71                 arg = self.argnames[i] = arg[:-1]
72                 d[arg] = P_OTHER
73                 hardest_arg = max(hardest_arg, P_OTHER)
74             elif arg.endswith('!'):
75                 arg = self.argnames[i] = arg[:-1]
76                 d[arg] = P_NESTED
77                 hardest_arg = max(hardest_arg, P_NESTED)
78             elif arg.endswith('&'):
79                 arg = self.argnames[i] = arg[:-1]
80                 d[arg] = P_NONE
81                 hardest_arg = max(hardest_arg, P_NONE)
82             else:
83                 d[arg] = P_NODE
84         self.hardest_arg = hardest_arg
85 
86         if hardest_arg > P_NODE:
87             self.args = self.args.replace('*', '')
88             self.args = self.args.replace('!', '')
89             self.args = self.args.replace('&', '')
90 
91         return d
92 
93     def gen_source(self):
94         buf = StringIO()
95         print >> buf, "class %s(Node):" % self.name
96         self._gen_init(buf)
97         print >> buf
98         self._gen_getChildren(buf)
99         print >> buf
100         self._gen_getChildNodes(buf)
101         print >> buf
102         self._gen_repr(buf)
103         buf.seek(0, 0)
104         return buf.read()
105 
106     def _gen_init(self, buf):
107         if self.args:
108             argtuple = '(' in self.args
109             args = self.args if not argtuple else ''.join(self.argnames)
110             print >> buf, "    def __init__(self, %s, lineno=None):" % args
111         else:
112             print >> buf, "    def __init__(self, lineno=None):"
113         if self.argnames:
114             if argtuple:
115                 for idx, name in enumerate(self.argnames):
116                     print >> buf, "        self.%s = %s[%s]" % (name, args, idx)
117             else:
118                 for name in self.argnames:
119                     print >> buf, "        self.%s = %s" % (name, name)
120         print >> buf, "        self.lineno = lineno"
121         # Copy the lines in self.init, indented four spaces.  The rstrip()
122         # business is to get rid of the four spaces if line happens to be
123         # empty, so that reindent.py is happy with the output.
124         for line in self.init:
125             print >> buf, ("    " + line).rstrip()
126 
127     def _gen_getChildren(self, buf):
128         print >> buf, "    def getChildren(self):"
129         if len(self.argnames) == 0:
130             print >> buf, "        return ()"
131         else:
132             if self.hardest_arg < P_NESTED:
133                 clist = COMMA.join(["self.%s" % c
134                                     for c in self.argnames])
135                 if self.nargs == 1:
136                     print >> buf, "        return %s," % clist
137                 else:
138                     print >> buf, "        return %s" % clist
139             else:
140                 if len(self.argnames) == 1:
141                     print >> buf, "        return tuple(flatten(self.%s))" % self.argnames[0]
142                 else:
143                     print >> buf, "        children = []"
144                     template = "        children.%s(%sself.%s%s)"
145                     for name in self.argnames:
146                         if self.argprops[name] == P_NESTED:
147                             print >> buf, template % ("extend", "flatten(",
148                                                       name, ")")
149                         else:
150                             print >> buf, template % ("append", "", name, "")
151                     print >> buf, "        return tuple(children)"
152 
153     def _gen_getChildNodes(self, buf):
154         print >> buf, "    def getChildNodes(self):"
155         if len(self.argnames) == 0:
156             print >> buf, "        return ()"
157         else:
158             if self.hardest_arg < P_NESTED:
159                 clist = ["self.%s" % c
160                          for c in self.argnames
161                          if self.argprops[c] == P_NODE]
162                 if len(clist) == 0:
163                     print >> buf, "        return ()"
164                 elif len(clist) == 1:
165                     print >> buf, "        return %s," % clist[0]
166                 else:
167                     print >> buf, "        return %s" % COMMA.join(clist)
168             else:
169                 print >> buf, "        nodelist = []"
170                 template = "        nodelist.%s(%sself.%s%s)"
171                 for name in self.argnames:
172                     if self.argprops[name] == P_NONE:
173                         tmp = ("        if self.%s is not None:\n"
174                                "            nodelist.append(self.%s)")
175                         print >> buf, tmp % (name, name)
176                     elif self.argprops[name] == P_NESTED:
177                         print >> buf, template % ("extend", "flatten_nodes(",
178                                                   name, ")")
179                     elif self.argprops[name] == P_NODE:
180                         print >> buf, template % ("append", "", name, "")
181                 print >> buf, "        return tuple(nodelist)"
182 
183     def _gen_repr(self, buf):
184         print >> buf, "    def __repr__(self):"
185         if self.argnames:
186             fmt = COMMA.join(["%s"] * self.nargs)
187             if '(' in self.args:
188                 fmt = '(%s)' % fmt
189             vals = ["repr(self.%s)" % name for name in self.argnames]
190             vals = COMMA.join(vals)
191             if self.nargs == 1:
192                 vals = vals + ","
193             print >> buf, '        return "%s(%s)" %% (%s)' % \
194                   (self.name, fmt, vals)
195         else:
196             print >> buf, '        return "%s()"' % self.name
197 
198 rx_init = re.compile('init\((.*)\):')
199 
200 def parse_spec(file):
201     classes = {}
202     cur = None
203     for line in fileinput.input(file):
204         if line.strip().startswith('#'):
205             continue
206         mo = rx_init.search(line)
207         if mo is None:
208             if cur is None:
209                 # a normal entry
210                 try:
211                     name, args = line.split(':')
212                 except ValueError:
213                     continue
214                 classes[name] = NodeInfo(name, args)
215                 cur = None
216             else:
217                 # some code for the __init__ method
218                 cur.init.append(line)
219         else:
220             # some extra code for a Node's __init__ method
221             name = mo.group(1)
222             cur = classes[name]
223     return sorted(classes.values(), key=lambda n: n.name)
224 
225 def main():
226     prologue, epilogue = load_boilerplate(sys.argv[-1])
227     print prologue
228     print
229     classes = parse_spec(SPEC)
230     for info in classes:
231         print info.gen_source()
232     print epilogue
233 
234 if __name__ == "__main__":
235     main()
236     sys.exit(0)
237 
238 ### PROLOGUE
239 """Python abstract syntax node definitions
240 
241 This file is automatically generated by Tools/compiler/astgen.py
242 """
243 from consts import CO_VARARGS, CO_VARKEYWORDS
244 
245 def flatten(seq):
246     l = []
247     for elt in seq:
248         t = type(elt)
249         if t is tuple or t is list:
250             for elt2 in flatten(elt):
251                 l.append(elt2)
252         else:
253             l.append(elt)
254     return l
255 
256 def flatten_nodes(seq):
257     return [n for n in flatten(seq) if isinstance(n, Node)]
258 
259 nodes = {}
260 
261 class Node:
262     """Abstract base class for ast nodes."""
263     def getChildren(self):
264         pass # implemented by subclasses
265     def __iter__(self):
266         for n in self.getChildren():
267             yield n
268     def asList(self): # for backwards compatibility
269         return self.getChildren()
270     def getChildNodes(self):
271         pass # implemented by subclasses
272 
273 class EmptyNode(Node):
274     pass
275 
276 class Expression(Node):
277     # Expression is an artificial node class to support "eval"
278     nodes["expression"] = "Expression"
279     def __init__(self, node):
280         self.node = node
281 
282     def getChildren(self):
283         return self.node,
284 
285     def getChildNodes(self):
286         return self.node,
287 
288     def __repr__(self):
289         return "Expression(%s)" % (repr(self.node))
290 
291 ### EPILOGUE
292 for name, obj in globals().items():
293     if isinstance(obj, type) and issubclass(obj, Node):
294         nodes[name.lower()] = obj
295