1*16467b97STreehugger Robotimport antlr3 2*16467b97STreehugger Robotimport testbase 3*16467b97STreehugger Robotimport unittest 4*16467b97STreehugger Robotimport os 5*16467b97STreehugger Robotimport sys 6*16467b97STreehugger Robotfrom cStringIO import StringIO 7*16467b97STreehugger Robotimport difflib 8*16467b97STreehugger Robotimport textwrap 9*16467b97STreehugger Robot 10*16467b97STreehugger Robotclass t012lexerXML(testbase.ANTLRTest): 11*16467b97STreehugger Robot def setUp(self): 12*16467b97STreehugger Robot self.compileGrammar('t012lexerXMLLexer.g') 13*16467b97STreehugger Robot 14*16467b97STreehugger Robot 15*16467b97STreehugger Robot def lexerClass(self, base): 16*16467b97STreehugger Robot class TLexer(base): 17*16467b97STreehugger Robot def emitErrorMessage(self, msg): 18*16467b97STreehugger Robot # report errors to /dev/null 19*16467b97STreehugger Robot pass 20*16467b97STreehugger Robot 21*16467b97STreehugger Robot def reportError(self, re): 22*16467b97STreehugger Robot # no error recovery yet, just crash! 23*16467b97STreehugger Robot raise re 24*16467b97STreehugger Robot 25*16467b97STreehugger Robot return TLexer 26*16467b97STreehugger Robot 27*16467b97STreehugger Robot 28*16467b97STreehugger Robot def testValid(self): 29*16467b97STreehugger Robot inputPath = os.path.splitext(__file__)[0] + '.input' 30*16467b97STreehugger Robot stream = antlr3.StringStream(unicode(open(inputPath).read(), 'utf-8')) 31*16467b97STreehugger Robot lexer = self.getLexer(stream) 32*16467b97STreehugger Robot 33*16467b97STreehugger Robot while True: 34*16467b97STreehugger Robot token = lexer.nextToken() 35*16467b97STreehugger Robot if token.type == self.lexerModule.EOF: 36*16467b97STreehugger Robot break 37*16467b97STreehugger Robot 38*16467b97STreehugger Robot 39*16467b97STreehugger Robot output = unicode(lexer.outbuf.getvalue(), 'utf-8') 40*16467b97STreehugger Robot 41*16467b97STreehugger Robot outputPath = os.path.splitext(__file__)[0] + '.output' 42*16467b97STreehugger Robot testOutput = unicode(open(outputPath).read(), 'utf-8') 43*16467b97STreehugger Robot 44*16467b97STreehugger Robot success = (output == testOutput) 45*16467b97STreehugger Robot if not success: 46*16467b97STreehugger Robot d = difflib.Differ() 47*16467b97STreehugger Robot r = d.compare(output.splitlines(1), testOutput.splitlines(1)) 48*16467b97STreehugger Robot self.fail( 49*16467b97STreehugger Robot ''.join([l.encode('ascii', 'backslashreplace') for l in r]) 50*16467b97STreehugger Robot ) 51*16467b97STreehugger Robot 52*16467b97STreehugger Robot 53*16467b97STreehugger Robot def testMalformedInput1(self): 54*16467b97STreehugger Robot input = textwrap.dedent("""\ 55*16467b97STreehugger Robot <?xml version='1.0'?> 56*16467b97STreehugger Robot <document d> 57*16467b97STreehugger Robot </document> 58*16467b97STreehugger Robot """) 59*16467b97STreehugger Robot 60*16467b97STreehugger Robot stream = antlr3.StringStream(input) 61*16467b97STreehugger Robot lexer = self.getLexer(stream) 62*16467b97STreehugger Robot 63*16467b97STreehugger Robot try: 64*16467b97STreehugger Robot while True: 65*16467b97STreehugger Robot token = lexer.nextToken() 66*16467b97STreehugger Robot if token.type == antlr3.EOF: 67*16467b97STreehugger Robot break 68*16467b97STreehugger Robot 69*16467b97STreehugger Robot raise AssertionError 70*16467b97STreehugger Robot 71*16467b97STreehugger Robot except antlr3.NoViableAltException, exc: 72*16467b97STreehugger Robot assert exc.unexpectedType == '>', repr(exc.unexpectedType) 73*16467b97STreehugger Robot assert exc.charPositionInLine == 11, repr(exc.charPositionInLine) 74*16467b97STreehugger Robot assert exc.line == 2, repr(exc.line) 75*16467b97STreehugger Robot 76*16467b97STreehugger Robot 77*16467b97STreehugger Robot def testMalformedInput2(self): 78*16467b97STreehugger Robot input = textwrap.dedent("""\ 79*16467b97STreehugger Robot <?tml version='1.0'?> 80*16467b97STreehugger Robot <document> 81*16467b97STreehugger Robot </document> 82*16467b97STreehugger Robot """) 83*16467b97STreehugger Robot 84*16467b97STreehugger Robot stream = antlr3.StringStream(input) 85*16467b97STreehugger Robot lexer = self.getLexer(stream) 86*16467b97STreehugger Robot 87*16467b97STreehugger Robot try: 88*16467b97STreehugger Robot while True: 89*16467b97STreehugger Robot token = lexer.nextToken() 90*16467b97STreehugger Robot if token.type == antlr3.EOF: 91*16467b97STreehugger Robot break 92*16467b97STreehugger Robot 93*16467b97STreehugger Robot raise AssertionError 94*16467b97STreehugger Robot 95*16467b97STreehugger Robot except antlr3.MismatchedSetException, exc: 96*16467b97STreehugger Robot assert exc.unexpectedType == 't', repr(exc.unexpectedType) 97*16467b97STreehugger Robot assert exc.charPositionInLine == 2, repr(exc.charPositionInLine) 98*16467b97STreehugger Robot assert exc.line == 1, repr(exc.line) 99*16467b97STreehugger Robot 100*16467b97STreehugger Robot 101*16467b97STreehugger Robot def testMalformedInput3(self): 102*16467b97STreehugger Robot input = textwrap.dedent("""\ 103*16467b97STreehugger Robot <?xml version='1.0'?> 104*16467b97STreehugger Robot <docu ment attr="foo"> 105*16467b97STreehugger Robot </document> 106*16467b97STreehugger Robot """) 107*16467b97STreehugger Robot 108*16467b97STreehugger Robot stream = antlr3.StringStream(input) 109*16467b97STreehugger Robot lexer = self.getLexer(stream) 110*16467b97STreehugger Robot 111*16467b97STreehugger Robot try: 112*16467b97STreehugger Robot while True: 113*16467b97STreehugger Robot token = lexer.nextToken() 114*16467b97STreehugger Robot if token.type == antlr3.EOF: 115*16467b97STreehugger Robot break 116*16467b97STreehugger Robot 117*16467b97STreehugger Robot raise AssertionError 118*16467b97STreehugger Robot 119*16467b97STreehugger Robot except antlr3.NoViableAltException, exc: 120*16467b97STreehugger Robot assert exc.unexpectedType == 'a', repr(exc.unexpectedType) 121*16467b97STreehugger Robot assert exc.charPositionInLine == 11, repr(exc.charPositionInLine) 122*16467b97STreehugger Robot assert exc.line == 2, repr(exc.line) 123*16467b97STreehugger Robot 124*16467b97STreehugger Robot 125*16467b97STreehugger Robot 126*16467b97STreehugger Robotif __name__ == '__main__': 127*16467b97STreehugger Robot unittest.main() 128*16467b97STreehugger Robot 129*16467b97STreehugger Robot 130*16467b97STreehugger Robot## # run an infinite loop with randomly mangled input 131*16467b97STreehugger Robot## while True: 132*16467b97STreehugger Robot## print "ping" 133*16467b97STreehugger Robot 134*16467b97STreehugger Robot## input = """\ 135*16467b97STreehugger Robot## <?xml version='1.0'?> 136*16467b97STreehugger Robot## <!DOCTYPE component [ 137*16467b97STreehugger Robot## <!ELEMENT component (PCDATA|sub)*> 138*16467b97STreehugger Robot## <!ATTLIST component 139*16467b97STreehugger Robot## attr CDATA #IMPLIED 140*16467b97STreehugger Robot## attr2 CDATA #IMPLIED 141*16467b97STreehugger Robot## > 142*16467b97STreehugger Robot## <!ELMENT sub EMPTY> 143*16467b97STreehugger Robot 144*16467b97STreehugger Robot## ]> 145*16467b97STreehugger Robot## <component attr="val'ue" attr2='val"ue'> 146*16467b97STreehugger Robot## <!-- This is a comment --> 147*16467b97STreehugger Robot## Text 148*16467b97STreehugger Robot## <![CDATA[huhu]]> 149*16467b97STreehugger Robot## & 150*16467b97STreehugger Robot## < 151*16467b97STreehugger Robot## <?xtal cursor='11'?> 152*16467b97STreehugger Robot## <sub/> 153*16467b97STreehugger Robot## <sub></sub> 154*16467b97STreehugger Robot## </component> 155*16467b97STreehugger Robot## """ 156*16467b97STreehugger Robot 157*16467b97STreehugger Robot## import random 158*16467b97STreehugger Robot## input = list(input) # make it mutable 159*16467b97STreehugger Robot## for _ in range(3): 160*16467b97STreehugger Robot## p1 = random.randrange(len(input)) 161*16467b97STreehugger Robot## p2 = random.randrange(len(input)) 162*16467b97STreehugger Robot 163*16467b97STreehugger Robot## c1 = input[p1] 164*16467b97STreehugger Robot## input[p1] = input[p2] 165*16467b97STreehugger Robot## input[p2] = c1 166*16467b97STreehugger Robot## input = ''.join(input) # back to string 167*16467b97STreehugger Robot 168*16467b97STreehugger Robot## stream = antlr3.StringStream(input) 169*16467b97STreehugger Robot## lexer = Lexer(stream) 170*16467b97STreehugger Robot 171*16467b97STreehugger Robot## try: 172*16467b97STreehugger Robot## while True: 173*16467b97STreehugger Robot## token = lexer.nextToken() 174*16467b97STreehugger Robot## if token.type == EOF: 175*16467b97STreehugger Robot## break 176*16467b97STreehugger Robot 177*16467b97STreehugger Robot## except antlr3.RecognitionException, exc: 178*16467b97STreehugger Robot## print exc 179*16467b97STreehugger Robot## for l in input.splitlines()[0:exc.line]: 180*16467b97STreehugger Robot## print l 181*16467b97STreehugger Robot## print ' '*exc.charPositionInLine + '^' 182*16467b97STreehugger Robot 183*16467b97STreehugger Robot## except BaseException, exc: 184*16467b97STreehugger Robot## print '\n'.join(['%02d: %s' % (idx+1, l) for idx, l in enumerate(input.splitlines())]) 185*16467b97STreehugger Robot## print "%s at %d:%d" % (exc, stream.line, stream.charPositionInLine) 186*16467b97STreehugger Robot## print 187*16467b97STreehugger Robot 188*16467b97STreehugger Robot## raise 189*16467b97STreehugger Robot 190