1*16467b97STreehugger Robot# -*- coding: utf-8 -*- 2*16467b97STreehugger Robot 3*16467b97STreehugger Robotimport unittest 4*16467b97STreehugger Robotimport textwrap 5*16467b97STreehugger Robotimport antlr3 6*16467b97STreehugger Robotimport antlr3.tree 7*16467b97STreehugger Robotimport testbase 8*16467b97STreehugger Robotimport sys 9*16467b97STreehugger Robotfrom StringIO import StringIO 10*16467b97STreehugger Robot 11*16467b97STreehugger Robotclass T(testbase.ANTLRTest): 12*16467b97STreehugger Robot def setUp(self): 13*16467b97STreehugger Robot self.oldPath = sys.path[:] 14*16467b97STreehugger Robot sys.path.insert(0, self.baseDir) 15*16467b97STreehugger Robot 16*16467b97STreehugger Robot 17*16467b97STreehugger Robot def tearDown(self): 18*16467b97STreehugger Robot sys.path = self.oldPath 19*16467b97STreehugger Robot 20*16467b97STreehugger Robot 21*16467b97STreehugger Robot def testOverrideMain(self): 22*16467b97STreehugger Robot grammar = textwrap.dedent( 23*16467b97STreehugger Robot r"""lexer grammar T3; 24*16467b97STreehugger Robot options { 25*16467b97STreehugger Robot language = Python; 26*16467b97STreehugger Robot } 27*16467b97STreehugger Robot 28*16467b97STreehugger Robot @main { 29*16467b97STreehugger Robot def main(argv): 30*16467b97STreehugger Robot raise RuntimeError("no") 31*16467b97STreehugger Robot } 32*16467b97STreehugger Robot 33*16467b97STreehugger Robot ID: ('a'..'z' | '\u00c0'..'\u00ff')+; 34*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN; }; 35*16467b97STreehugger Robot """) 36*16467b97STreehugger Robot 37*16467b97STreehugger Robot 38*16467b97STreehugger Robot stdout = StringIO() 39*16467b97STreehugger Robot 40*16467b97STreehugger Robot lexerMod = self.compileInlineGrammar(grammar, returnModule=True) 41*16467b97STreehugger Robot try: 42*16467b97STreehugger Robot lexerMod.main( 43*16467b97STreehugger Robot ['lexer.py'] 44*16467b97STreehugger Robot ) 45*16467b97STreehugger Robot self.fail() 46*16467b97STreehugger Robot except RuntimeError: 47*16467b97STreehugger Robot pass 48*16467b97STreehugger Robot 49*16467b97STreehugger Robot 50*16467b97STreehugger Robot def testLexerFromFile(self): 51*16467b97STreehugger Robot input = "foo bar" 52*16467b97STreehugger Robot inputPath = self.writeFile("input.txt", input) 53*16467b97STreehugger Robot 54*16467b97STreehugger Robot grammar = textwrap.dedent( 55*16467b97STreehugger Robot r"""lexer grammar T1; 56*16467b97STreehugger Robot options { 57*16467b97STreehugger Robot language = Python; 58*16467b97STreehugger Robot } 59*16467b97STreehugger Robot 60*16467b97STreehugger Robot ID: 'a'..'z'+; 61*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN; }; 62*16467b97STreehugger Robot """) 63*16467b97STreehugger Robot 64*16467b97STreehugger Robot 65*16467b97STreehugger Robot stdout = StringIO() 66*16467b97STreehugger Robot 67*16467b97STreehugger Robot lexerMod = self.compileInlineGrammar(grammar, returnModule=True) 68*16467b97STreehugger Robot lexerMod.main( 69*16467b97STreehugger Robot ['lexer.py', inputPath], 70*16467b97STreehugger Robot stdout=stdout 71*16467b97STreehugger Robot ) 72*16467b97STreehugger Robot 73*16467b97STreehugger Robot self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3) 74*16467b97STreehugger Robot 75*16467b97STreehugger Robot 76*16467b97STreehugger Robot def testLexerFromStdIO(self): 77*16467b97STreehugger Robot input = "foo bar" 78*16467b97STreehugger Robot 79*16467b97STreehugger Robot grammar = textwrap.dedent( 80*16467b97STreehugger Robot r"""lexer grammar T2; 81*16467b97STreehugger Robot options { 82*16467b97STreehugger Robot language = Python; 83*16467b97STreehugger Robot } 84*16467b97STreehugger Robot 85*16467b97STreehugger Robot ID: 'a'..'z'+; 86*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN; }; 87*16467b97STreehugger Robot """) 88*16467b97STreehugger Robot 89*16467b97STreehugger Robot 90*16467b97STreehugger Robot stdout = StringIO() 91*16467b97STreehugger Robot 92*16467b97STreehugger Robot lexerMod = self.compileInlineGrammar(grammar, returnModule=True) 93*16467b97STreehugger Robot lexerMod.main( 94*16467b97STreehugger Robot ['lexer.py'], 95*16467b97STreehugger Robot stdin=StringIO(input), 96*16467b97STreehugger Robot stdout=stdout 97*16467b97STreehugger Robot ) 98*16467b97STreehugger Robot 99*16467b97STreehugger Robot self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3) 100*16467b97STreehugger Robot 101*16467b97STreehugger Robot 102*16467b97STreehugger Robot def testLexerEncoding(self): 103*16467b97STreehugger Robot input = u"föö bär".encode('utf-8') 104*16467b97STreehugger Robot 105*16467b97STreehugger Robot grammar = textwrap.dedent( 106*16467b97STreehugger Robot r"""lexer grammar T3; 107*16467b97STreehugger Robot options { 108*16467b97STreehugger Robot language = Python; 109*16467b97STreehugger Robot } 110*16467b97STreehugger Robot 111*16467b97STreehugger Robot ID: ('a'..'z' | '\u00c0'..'\u00ff')+; 112*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN; }; 113*16467b97STreehugger Robot """) 114*16467b97STreehugger Robot 115*16467b97STreehugger Robot 116*16467b97STreehugger Robot stdout = StringIO() 117*16467b97STreehugger Robot 118*16467b97STreehugger Robot lexerMod = self.compileInlineGrammar(grammar, returnModule=True) 119*16467b97STreehugger Robot lexerMod.main( 120*16467b97STreehugger Robot ['lexer.py', '--encoding', 'utf-8'], 121*16467b97STreehugger Robot stdin=StringIO(input), 122*16467b97STreehugger Robot stdout=stdout 123*16467b97STreehugger Robot ) 124*16467b97STreehugger Robot 125*16467b97STreehugger Robot self.failUnlessEqual(len(stdout.getvalue().splitlines()), 3) 126*16467b97STreehugger Robot 127*16467b97STreehugger Robot 128*16467b97STreehugger Robot def testCombined(self): 129*16467b97STreehugger Robot input = "foo bar" 130*16467b97STreehugger Robot 131*16467b97STreehugger Robot grammar = textwrap.dedent( 132*16467b97STreehugger Robot r"""grammar T4; 133*16467b97STreehugger Robot options { 134*16467b97STreehugger Robot language = Python; 135*16467b97STreehugger Robot } 136*16467b97STreehugger Robot 137*16467b97STreehugger Robot r returns [res]: (ID)+ EOF { $res = $text; }; 138*16467b97STreehugger Robot 139*16467b97STreehugger Robot ID: 'a'..'z'+; 140*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN; }; 141*16467b97STreehugger Robot """) 142*16467b97STreehugger Robot 143*16467b97STreehugger Robot 144*16467b97STreehugger Robot stdout = StringIO() 145*16467b97STreehugger Robot 146*16467b97STreehugger Robot lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True) 147*16467b97STreehugger Robot parserMod.main( 148*16467b97STreehugger Robot ['combined.py', '--rule', 'r'], 149*16467b97STreehugger Robot stdin=StringIO(input), 150*16467b97STreehugger Robot stdout=stdout 151*16467b97STreehugger Robot ) 152*16467b97STreehugger Robot 153*16467b97STreehugger Robot stdout = stdout.getvalue() 154*16467b97STreehugger Robot self.failUnlessEqual(len(stdout.splitlines()), 1, stdout) 155*16467b97STreehugger Robot 156*16467b97STreehugger Robot 157*16467b97STreehugger Robot def testCombinedOutputAST(self): 158*16467b97STreehugger Robot input = "foo + bar" 159*16467b97STreehugger Robot 160*16467b97STreehugger Robot grammar = textwrap.dedent( 161*16467b97STreehugger Robot r"""grammar T5; 162*16467b97STreehugger Robot options { 163*16467b97STreehugger Robot language = Python; 164*16467b97STreehugger Robot output = AST; 165*16467b97STreehugger Robot } 166*16467b97STreehugger Robot 167*16467b97STreehugger Robot r: ID OP^ ID EOF!; 168*16467b97STreehugger Robot 169*16467b97STreehugger Robot ID: 'a'..'z'+; 170*16467b97STreehugger Robot OP: '+'; 171*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN; }; 172*16467b97STreehugger Robot """) 173*16467b97STreehugger Robot 174*16467b97STreehugger Robot 175*16467b97STreehugger Robot stdout = StringIO() 176*16467b97STreehugger Robot 177*16467b97STreehugger Robot lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True) 178*16467b97STreehugger Robot parserMod.main( 179*16467b97STreehugger Robot ['combined.py', '--rule', 'r'], 180*16467b97STreehugger Robot stdin=StringIO(input), 181*16467b97STreehugger Robot stdout=stdout 182*16467b97STreehugger Robot ) 183*16467b97STreehugger Robot 184*16467b97STreehugger Robot stdout = stdout.getvalue().strip() 185*16467b97STreehugger Robot self.failUnlessEqual(stdout, "(+ foo bar)") 186*16467b97STreehugger Robot 187*16467b97STreehugger Robot 188*16467b97STreehugger Robot def testTreeParser(self): 189*16467b97STreehugger Robot grammar = textwrap.dedent( 190*16467b97STreehugger Robot r'''grammar T6; 191*16467b97STreehugger Robot options { 192*16467b97STreehugger Robot language = Python; 193*16467b97STreehugger Robot output = AST; 194*16467b97STreehugger Robot } 195*16467b97STreehugger Robot 196*16467b97STreehugger Robot r: ID OP^ ID EOF!; 197*16467b97STreehugger Robot 198*16467b97STreehugger Robot ID: 'a'..'z'+; 199*16467b97STreehugger Robot OP: '+'; 200*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN; }; 201*16467b97STreehugger Robot ''') 202*16467b97STreehugger Robot 203*16467b97STreehugger Robot treeGrammar = textwrap.dedent( 204*16467b97STreehugger Robot r'''tree grammar T6Walker; 205*16467b97STreehugger Robot options { 206*16467b97STreehugger Robot language=Python; 207*16467b97STreehugger Robot ASTLabelType=CommonTree; 208*16467b97STreehugger Robot tokenVocab=T6; 209*16467b97STreehugger Robot } 210*16467b97STreehugger Robot r returns [res]: ^(OP a=ID b=ID) 211*16467b97STreehugger Robot { $res = "\%s \%s \%s" \% ($a.text, $OP.text, $b.text) } 212*16467b97STreehugger Robot ; 213*16467b97STreehugger Robot ''') 214*16467b97STreehugger Robot 215*16467b97STreehugger Robot lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True) 216*16467b97STreehugger Robot walkerMod = self.compileInlineGrammar(treeGrammar, returnModule=True) 217*16467b97STreehugger Robot 218*16467b97STreehugger Robot stdout = StringIO() 219*16467b97STreehugger Robot walkerMod.main( 220*16467b97STreehugger Robot ['walker.py', '--rule', 'r', '--parser', 'T6Parser', '--parser-rule', 'r', '--lexer', 'T6Lexer'], 221*16467b97STreehugger Robot stdin=StringIO("a+b"), 222*16467b97STreehugger Robot stdout=stdout 223*16467b97STreehugger Robot ) 224*16467b97STreehugger Robot 225*16467b97STreehugger Robot stdout = stdout.getvalue().strip() 226*16467b97STreehugger Robot self.failUnlessEqual(stdout, "u'a + b'") 227*16467b97STreehugger Robot 228*16467b97STreehugger Robot 229*16467b97STreehugger Robot def testTreeParserRewrite(self): 230*16467b97STreehugger Robot grammar = textwrap.dedent( 231*16467b97STreehugger Robot r'''grammar T7; 232*16467b97STreehugger Robot options { 233*16467b97STreehugger Robot language = Python; 234*16467b97STreehugger Robot output = AST; 235*16467b97STreehugger Robot } 236*16467b97STreehugger Robot 237*16467b97STreehugger Robot r: ID OP^ ID EOF!; 238*16467b97STreehugger Robot 239*16467b97STreehugger Robot ID: 'a'..'z'+; 240*16467b97STreehugger Robot OP: '+'; 241*16467b97STreehugger Robot WS: ' '+ { $channel = HIDDEN; }; 242*16467b97STreehugger Robot ''') 243*16467b97STreehugger Robot 244*16467b97STreehugger Robot treeGrammar = textwrap.dedent( 245*16467b97STreehugger Robot r'''tree grammar T7Walker; 246*16467b97STreehugger Robot options { 247*16467b97STreehugger Robot language=Python; 248*16467b97STreehugger Robot ASTLabelType=CommonTree; 249*16467b97STreehugger Robot tokenVocab=T7; 250*16467b97STreehugger Robot output=AST; 251*16467b97STreehugger Robot } 252*16467b97STreehugger Robot tokens { 253*16467b97STreehugger Robot ARG; 254*16467b97STreehugger Robot } 255*16467b97STreehugger Robot r: ^(OP a=ID b=ID) -> ^(OP ^(ARG ID) ^(ARG ID)); 256*16467b97STreehugger Robot ''') 257*16467b97STreehugger Robot 258*16467b97STreehugger Robot lexerMod, parserMod = self.compileInlineGrammar(grammar, returnModule=True) 259*16467b97STreehugger Robot walkerMod = self.compileInlineGrammar(treeGrammar, returnModule=True) 260*16467b97STreehugger Robot 261*16467b97STreehugger Robot stdout = StringIO() 262*16467b97STreehugger Robot walkerMod.main( 263*16467b97STreehugger Robot ['walker.py', '--rule', 'r', '--parser', 'T7Parser', '--parser-rule', 'r', '--lexer', 'T7Lexer'], 264*16467b97STreehugger Robot stdin=StringIO("a+b"), 265*16467b97STreehugger Robot stdout=stdout 266*16467b97STreehugger Robot ) 267*16467b97STreehugger Robot 268*16467b97STreehugger Robot stdout = stdout.getvalue().strip() 269*16467b97STreehugger Robot self.failUnlessEqual(stdout, "(+ (ARG a) (ARG b))") 270*16467b97STreehugger Robot 271*16467b97STreehugger Robot 272*16467b97STreehugger Robot 273*16467b97STreehugger Robot def testGrammarImport(self): 274*16467b97STreehugger Robot slave = textwrap.dedent( 275*16467b97STreehugger Robot r''' 276*16467b97STreehugger Robot parser grammar T8S; 277*16467b97STreehugger Robot options { 278*16467b97STreehugger Robot language=Python; 279*16467b97STreehugger Robot } 280*16467b97STreehugger Robot 281*16467b97STreehugger Robot a : B; 282*16467b97STreehugger Robot ''') 283*16467b97STreehugger Robot 284*16467b97STreehugger Robot parserName = self.writeInlineGrammar(slave)[0] 285*16467b97STreehugger Robot # slave parsers are imported as normal python modules 286*16467b97STreehugger Robot # to force reloading current version, purge module from sys.modules 287*16467b97STreehugger Robot try: 288*16467b97STreehugger Robot del sys.modules[parserName+'Parser'] 289*16467b97STreehugger Robot except KeyError: 290*16467b97STreehugger Robot pass 291*16467b97STreehugger Robot 292*16467b97STreehugger Robot master = textwrap.dedent( 293*16467b97STreehugger Robot r''' 294*16467b97STreehugger Robot grammar T8M; 295*16467b97STreehugger Robot options { 296*16467b97STreehugger Robot language=Python; 297*16467b97STreehugger Robot } 298*16467b97STreehugger Robot import T8S; 299*16467b97STreehugger Robot s returns [res]: a { $res = $a.text }; 300*16467b97STreehugger Robot B : 'b' ; // defines B from inherited token space 301*16467b97STreehugger Robot WS : (' '|'\n') {self.skip()} ; 302*16467b97STreehugger Robot ''') 303*16467b97STreehugger Robot 304*16467b97STreehugger Robot stdout = StringIO() 305*16467b97STreehugger Robot 306*16467b97STreehugger Robot lexerMod, parserMod = self.compileInlineGrammar(master, returnModule=True) 307*16467b97STreehugger Robot parserMod.main( 308*16467b97STreehugger Robot ['import.py', '--rule', 's'], 309*16467b97STreehugger Robot stdin=StringIO("b"), 310*16467b97STreehugger Robot stdout=stdout 311*16467b97STreehugger Robot ) 312*16467b97STreehugger Robot 313*16467b97STreehugger Robot stdout = stdout.getvalue().strip() 314*16467b97STreehugger Robot self.failUnlessEqual(stdout, "u'b'") 315*16467b97STreehugger Robot 316*16467b97STreehugger Robot 317*16467b97STreehugger Robotif __name__ == '__main__': 318*16467b97STreehugger Robot unittest.main() 319