xref: /aosp_15_r20/external/antlr/runtime/Python/tests/t054main.py (revision 16467b971bd3e2009fad32dd79016f2c7e421deb)
1*16467b97STreehugger Robot# -*- coding: utf-8 -*-
2*16467b97STreehugger Robot
3*16467b97STreehugger Robotimport unittest
4*16467b97STreehugger Robotimport textwrap
5*16467b97STreehugger Robotimport antlr3
6*16467b97STreehugger Robotimport antlr3.tree
7*16467b97STreehugger Robotimport testbase
8*16467b97STreehugger Robotimport sys
9*16467b97STreehugger Robotfrom StringIO import StringIO
10*16467b97STreehugger Robot
11*16467b97STreehugger Robotclass T(testbase.ANTLRTest):
12*16467b97STreehugger Robot    def setUp(self):
13*16467b97STreehugger Robot        self.oldPath = sys.path[:]
14*16467b97STreehugger Robot        sys.path.insert(0, self.baseDir)
15*16467b97STreehugger Robot
16*16467b97STreehugger Robot
17*16467b97STreehugger Robot    def tearDown(self):
18*16467b97STreehugger Robot        sys.path = self.oldPath
19*16467b97STreehugger Robot
20*16467b97STreehugger Robot
21*16467b97STreehugger Robot    def testOverrideMain(self):
22*16467b97STreehugger Robot        grammar = textwrap.dedent(
23*16467b97STreehugger Robot            r"""lexer grammar T3;
24*16467b97STreehugger Robot            options {
25*16467b97STreehugger Robot              language = Python;
26*16467b97STreehugger Robot              }
27*16467b97STreehugger Robot
28*16467b97STreehugger Robot            @main {
29*16467b97STreehugger Robot            def main(argv):
30*16467b97STreehugger Robot                raise RuntimeError("no")
31*16467b97STreehugger Robot            }
32*16467b97STreehugger Robot
33*16467b97STreehugger Robot            ID: ('a'..'z' | '\u00c0'..'\u00ff')+;
34*16467b97STreehugger Robot            WS: ' '+ { $channel = HIDDEN; };
35*16467b97STreehugger Robot            """)
36*16467b97STreehugger Robot
37*16467b97STreehugger Robot
38*16467b97STreehugger Robot        stdout = StringIO()
39*16467b97STreehugger Robot
40*16467b97STreehugger Robot        lexerMod = self.compileInlineGrammar(grammar, returnModule=True)
41*16467b97STreehugger Robot        try:
42*16467b97STreehugger Robot            lexerMod.main(
43*16467b97STreehugger Robot            ['lexer.py']
44*16467b97STreehugger Robot            )
45*16467b97STreehugger Robot            self.fail()
46*16467b97STreehugger Robot        except RuntimeError:
47*16467b97STreehugger Robot            pass
48*16467b97STreehugger Robot
49*16467b97STreehugger Robot
50*16467b97STreehugger Robot    def testLexerFromFile(self):
51*16467b97STreehugger Robot        input = "foo bar"
52*16467b97STreehugger Robot        inputPath = self.writeFile("input.txt", input)
53*16467b97STreehugger Robot
54*16467b97STreehugger Robot        grammar = textwrap.dedent(
55*16467b97STreehugger Robot            r"""lexer grammar T1;
56*16467b97STreehugger Robot            options {
57*16467b97STreehugger Robot              language = Python;
58*16467b97STreehugger Robot              }
59*16467b97STreehugger Robot
60*16467b97STreehugger Robot            ID: 'a'..'z'+;
61*16467b97STreehugger Robot            WS: ' '+ { $channel = HIDDEN; };
62*16467b97STreehugger Robot            """)
63*16467b97STreehugger Robot
64*16467b97STreehugger Robot
65*16467b97STreehugger Robot        stdout = StringIO()
66*16467b97STreehugger Robot
67*16467b97STreehugger Robot        lexerMod = self.compileInlineGrammar(grammar, returnModule=True)
68*16467b97STreehugger Robot        lexerMod.main(
69*16467b97STreehugger Robot            ['lexer.py', inputPath],
70*16467b97STreehugger Robot            stdout=stdout
71*16467b97STreehugger Robot            )
72*16467b97STreehugger Robot
73*16467b97STreehugger Robot        self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3)
74*16467b97STreehugger Robot
75*16467b97STreehugger Robot
76*16467b97STreehugger Robot    def testLexerFromStdIO(self):
77*16467b97STreehugger Robot        input = "foo bar"
78*16467b97STreehugger Robot
79*16467b97STreehugger Robot        grammar = textwrap.dedent(
80*16467b97STreehugger Robot            r"""lexer grammar T2;
81*16467b97STreehugger Robot            options {
82*16467b97STreehugger Robot              language = Python;
83*16467b97STreehugger Robot              }
84*16467b97STreehugger Robot
85*16467b97STreehugger Robot            ID: 'a'..'z'+;
86*16467b97STreehugger Robot            WS: ' '+ { $channel = HIDDEN; };
87*16467b97STreehugger Robot            """)
88*16467b97STreehugger Robot
89*16467b97STreehugger Robot
90*16467b97STreehugger Robot        stdout = StringIO()
91*16467b97STreehugger Robot
92*16467b97STreehugger Robot        lexerMod = self.compileInlineGrammar(grammar, returnModule=True)
93*16467b97STreehugger Robot        lexerMod.main(
94*16467b97STreehugger Robot            ['lexer.py'],
95*16467b97STreehugger Robot            stdin=StringIO(input),
96*16467b97STreehugger Robot            stdout=stdout
97*16467b97STreehugger Robot            )
98*16467b97STreehugger Robot
99*16467b97STreehugger Robot        self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3)
100*16467b97STreehugger Robot
101*16467b97STreehugger Robot
102*16467b97STreehugger Robot    def testLexerEncoding(self):
103*16467b97STreehugger Robot        input = u"föö bär".encode('utf-8')
104*16467b97STreehugger Robot
105*16467b97STreehugger Robot        grammar = textwrap.dedent(
106*16467b97STreehugger Robot            r"""lexer grammar T3;
107*16467b97STreehugger Robot            options {
108*16467b97STreehugger Robot              language = Python;
109*16467b97STreehugger Robot              }
110*16467b97STreehugger Robot
111*16467b97STreehugger Robot            ID: ('a'..'z' | '\u00c0'..'\u00ff')+;
112*16467b97STreehugger Robot            WS: ' '+ { $channel = HIDDEN; };
113*16467b97STreehugger Robot            """)
114*16467b97STreehugger Robot
115*16467b97STreehugger Robot
116*16467b97STreehugger Robot        stdout = StringIO()
117*16467b97STreehugger Robot
118*16467b97STreehugger Robot        lexerMod = self.compileInlineGrammar(grammar, returnModule=True)
119*16467b97STreehugger Robot        lexerMod.main(
120*16467b97STreehugger Robot            ['lexer.py', '--encoding', 'utf-8'],
121*16467b97STreehugger Robot            stdin=StringIO(input),
122*16467b97STreehugger Robot            stdout=stdout
123*16467b97STreehugger Robot            )
124*16467b97STreehugger Robot
125*16467b97STreehugger Robot        self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3)
126*16467b97STreehugger Robot
127*16467b97STreehugger Robot
128*16467b97STreehugger Robot    def testCombined(self):
129*16467b97STreehugger Robot        input = "foo bar"
130*16467b97STreehugger Robot
131*16467b97STreehugger Robot        grammar = textwrap.dedent(
132*16467b97STreehugger Robot            r"""grammar T4;
133*16467b97STreehugger Robot            options {
134*16467b97STreehugger Robot              language = Python;
135*16467b97STreehugger Robot              }
136*16467b97STreehugger Robot
137*16467b97STreehugger Robot            r returns [res]: (ID)+ EOF { $res = $text; };
138*16467b97STreehugger Robot
139*16467b97STreehugger Robot            ID: 'a'..'z'+;
140*16467b97STreehugger Robot            WS: ' '+ { $channel = HIDDEN; };
141*16467b97STreehugger Robot            """)
142*16467b97STreehugger Robot
143*16467b97STreehugger Robot
144*16467b97STreehugger Robot        stdout = StringIO()
145*16467b97STreehugger Robot
146*16467b97STreehugger Robot        lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True)
147*16467b97STreehugger Robot        parserMod.main(
148*16467b97STreehugger Robot            ['combined.py', '--rule', 'r'],
149*16467b97STreehugger Robot            stdin=StringIO(input),
150*16467b97STreehugger Robot            stdout=stdout
151*16467b97STreehugger Robot            )
152*16467b97STreehugger Robot
153*16467b97STreehugger Robot        stdout = stdout.getvalue()
154*16467b97STreehugger Robot        self.failUnlessEqual(len(stdout.splitlines()), 1, stdout)
155*16467b97STreehugger Robot
156*16467b97STreehugger Robot
157*16467b97STreehugger Robot    def testCombinedOutputAST(self):
158*16467b97STreehugger Robot        input = "foo + bar"
159*16467b97STreehugger Robot
160*16467b97STreehugger Robot        grammar = textwrap.dedent(
161*16467b97STreehugger Robot            r"""grammar T5;
162*16467b97STreehugger Robot            options {
163*16467b97STreehugger Robot              language = Python;
164*16467b97STreehugger Robot              output = AST;
165*16467b97STreehugger Robot            }
166*16467b97STreehugger Robot
167*16467b97STreehugger Robot            r: ID OP^ ID EOF!;
168*16467b97STreehugger Robot
169*16467b97STreehugger Robot            ID: 'a'..'z'+;
170*16467b97STreehugger Robot            OP: '+';
171*16467b97STreehugger Robot            WS: ' '+ { $channel = HIDDEN; };
172*16467b97STreehugger Robot            """)
173*16467b97STreehugger Robot
174*16467b97STreehugger Robot
175*16467b97STreehugger Robot        stdout = StringIO()
176*16467b97STreehugger Robot
177*16467b97STreehugger Robot        lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True)
178*16467b97STreehugger Robot        parserMod.main(
179*16467b97STreehugger Robot            ['combined.py', '--rule', 'r'],
180*16467b97STreehugger Robot            stdin=StringIO(input),
181*16467b97STreehugger Robot            stdout=stdout
182*16467b97STreehugger Robot            )
183*16467b97STreehugger Robot
184*16467b97STreehugger Robot        stdout = stdout.getvalue().strip()
185*16467b97STreehugger Robot        self.failUnlessEqual(stdout, "(+ foo bar)")
186*16467b97STreehugger Robot
187*16467b97STreehugger Robot
188*16467b97STreehugger Robot    def testTreeParser(self):
189*16467b97STreehugger Robot        grammar = textwrap.dedent(
190*16467b97STreehugger Robot            r'''grammar T6;
191*16467b97STreehugger Robot            options {
192*16467b97STreehugger Robot              language = Python;
193*16467b97STreehugger Robot              output = AST;
194*16467b97STreehugger Robot            }
195*16467b97STreehugger Robot
196*16467b97STreehugger Robot            r: ID OP^ ID EOF!;
197*16467b97STreehugger Robot
198*16467b97STreehugger Robot            ID: 'a'..'z'+;
199*16467b97STreehugger Robot            OP: '+';
200*16467b97STreehugger Robot            WS: ' '+ { $channel = HIDDEN; };
201*16467b97STreehugger Robot            ''')
202*16467b97STreehugger Robot
203*16467b97STreehugger Robot        treeGrammar = textwrap.dedent(
204*16467b97STreehugger Robot            r'''tree grammar T6Walker;
205*16467b97STreehugger Robot            options {
206*16467b97STreehugger Robot            language=Python;
207*16467b97STreehugger Robot            ASTLabelType=CommonTree;
208*16467b97STreehugger Robot            tokenVocab=T6;
209*16467b97STreehugger Robot            }
210*16467b97STreehugger Robot            r returns [res]: ^(OP a=ID b=ID)
211*16467b97STreehugger Robot              { $res = "\%s \%s \%s" \% ($a.text, $OP.text, $b.text) }
212*16467b97STreehugger Robot              ;
213*16467b97STreehugger Robot            ''')
214*16467b97STreehugger Robot
215*16467b97STreehugger Robot        lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True)
216*16467b97STreehugger Robot        walkerMod = self.compileInlineGrammar(treeGrammar, returnModule=True)
217*16467b97STreehugger Robot
218*16467b97STreehugger Robot        stdout = StringIO()
219*16467b97STreehugger Robot        walkerMod.main(
220*16467b97STreehugger Robot            ['walker.py', '--rule', 'r', '--parser', 'T6Parser', '--parser-rule', 'r', '--lexer', 'T6Lexer'],
221*16467b97STreehugger Robot            stdin=StringIO("a+b"),
222*16467b97STreehugger Robot            stdout=stdout
223*16467b97STreehugger Robot            )
224*16467b97STreehugger Robot
225*16467b97STreehugger Robot        stdout = stdout.getvalue().strip()
226*16467b97STreehugger Robot        self.failUnlessEqual(stdout, "u'a + b'")
227*16467b97STreehugger Robot
228*16467b97STreehugger Robot
229*16467b97STreehugger Robot    def testTreeParserRewrite(self):
230*16467b97STreehugger Robot        grammar = textwrap.dedent(
231*16467b97STreehugger Robot            r'''grammar T7;
232*16467b97STreehugger Robot            options {
233*16467b97STreehugger Robot              language = Python;
234*16467b97STreehugger Robot              output = AST;
235*16467b97STreehugger Robot            }
236*16467b97STreehugger Robot
237*16467b97STreehugger Robot            r: ID OP^ ID EOF!;
238*16467b97STreehugger Robot
239*16467b97STreehugger Robot            ID: 'a'..'z'+;
240*16467b97STreehugger Robot            OP: '+';
241*16467b97STreehugger Robot            WS: ' '+ { $channel = HIDDEN; };
242*16467b97STreehugger Robot            ''')
243*16467b97STreehugger Robot
244*16467b97STreehugger Robot        treeGrammar = textwrap.dedent(
245*16467b97STreehugger Robot            r'''tree grammar T7Walker;
246*16467b97STreehugger Robot            options {
247*16467b97STreehugger Robot              language=Python;
248*16467b97STreehugger Robot              ASTLabelType=CommonTree;
249*16467b97STreehugger Robot              tokenVocab=T7;
250*16467b97STreehugger Robot              output=AST;
251*16467b97STreehugger Robot            }
252*16467b97STreehugger Robot            tokens {
253*16467b97STreehugger Robot              ARG;
254*16467b97STreehugger Robot            }
255*16467b97STreehugger Robot            r: ^(OP a=ID b=ID) -> ^(OP ^(ARG ID) ^(ARG ID));
256*16467b97STreehugger Robot            ''')
257*16467b97STreehugger Robot
258*16467b97STreehugger Robot        lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True)
259*16467b97STreehugger Robot        walkerMod = self.compileInlineGrammar(treeGrammar, returnModule=True)
260*16467b97STreehugger Robot
261*16467b97STreehugger Robot        stdout = StringIO()
262*16467b97STreehugger Robot        walkerMod.main(
263*16467b97STreehugger Robot            ['walker.py', '--rule', 'r', '--parser', 'T7Parser', '--parser-rule', 'r', '--lexer', 'T7Lexer'],
264*16467b97STreehugger Robot            stdin=StringIO("a+b"),
265*16467b97STreehugger Robot            stdout=stdout
266*16467b97STreehugger Robot            )
267*16467b97STreehugger Robot
268*16467b97STreehugger Robot        stdout = stdout.getvalue().strip()
269*16467b97STreehugger Robot        self.failUnlessEqual(stdout, "(+ (ARG a) (ARG b))")
270*16467b97STreehugger Robot
271*16467b97STreehugger Robot
272*16467b97STreehugger Robot
273*16467b97STreehugger Robot    def testGrammarImport(self):
274*16467b97STreehugger Robot        slave = textwrap.dedent(
275*16467b97STreehugger Robot            r'''
276*16467b97STreehugger Robot            parser grammar T8S;
277*16467b97STreehugger Robot            options {
278*16467b97STreehugger Robot              language=Python;
279*16467b97STreehugger Robot            }
280*16467b97STreehugger Robot
281*16467b97STreehugger Robot            a : B;
282*16467b97STreehugger Robot            ''')
283*16467b97STreehugger Robot
284*16467b97STreehugger Robot        parserName = self.writeInlineGrammar(slave)[0]
285*16467b97STreehugger Robot        # slave parsers are imported as normal python modules
286*16467b97STreehugger Robot        # to force reloading current version, purge module from sys.modules
287*16467b97STreehugger Robot        try:
288*16467b97STreehugger Robot            del sys.modules[parserName+'Parser']
289*16467b97STreehugger Robot        except KeyError:
290*16467b97STreehugger Robot            pass
291*16467b97STreehugger Robot
292*16467b97STreehugger Robot        master = textwrap.dedent(
293*16467b97STreehugger Robot            r'''
294*16467b97STreehugger Robot            grammar T8M;
295*16467b97STreehugger Robot            options {
296*16467b97STreehugger Robot              language=Python;
297*16467b97STreehugger Robot            }
298*16467b97STreehugger Robot            import T8S;
299*16467b97STreehugger Robot            s returns [res]: a { $res = $a.text };
300*16467b97STreehugger Robot            B : 'b' ; // defines B from inherited token space
301*16467b97STreehugger Robot            WS : (' '|'\n') {self.skip()} ;
302*16467b97STreehugger Robot            ''')
303*16467b97STreehugger Robot
304*16467b97STreehugger Robot        stdout = StringIO()
305*16467b97STreehugger Robot
306*16467b97STreehugger Robot        lexerMod, parserMod = self.compileInlineGrammar(master, returnModule=True)
307*16467b97STreehugger Robot        parserMod.main(
308*16467b97STreehugger Robot            ['import.py', '--rule', 's'],
309*16467b97STreehugger Robot            stdin=StringIO("b"),
310*16467b97STreehugger Robot            stdout=stdout
311*16467b97STreehugger Robot            )
312*16467b97STreehugger Robot
313*16467b97STreehugger Robot        stdout = stdout.getvalue().strip()
314*16467b97STreehugger Robot        self.failUnlessEqual(stdout, "u'b'")
315*16467b97STreehugger Robot
316*16467b97STreehugger Robot
317*16467b97STreehugger Robotif __name__ == '__main__':
318*16467b97STreehugger Robot    unittest.main()
319