1*99e0aae7SDavid Rees# Copyright 2019 Google LLC 2*99e0aae7SDavid Rees# 3*99e0aae7SDavid Rees# Licensed under the Apache License, Version 2.0 (the "License"); 4*99e0aae7SDavid Rees# you may not use this file except in compliance with the License. 5*99e0aae7SDavid Rees# You may obtain a copy of the License at 6*99e0aae7SDavid Rees# 7*99e0aae7SDavid Rees# https://www.apache.org/licenses/LICENSE-2.0 8*99e0aae7SDavid Rees# 9*99e0aae7SDavid Rees# Unless required by applicable law or agreed to in writing, software 10*99e0aae7SDavid Rees# distributed under the License is distributed on an "AS IS" BASIS, 11*99e0aae7SDavid Rees# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*99e0aae7SDavid Rees# See the License for the specific language governing permissions and 13*99e0aae7SDavid Rees# limitations under the License. 14*99e0aae7SDavid Rees 15*99e0aae7SDavid Rees"""Tests for parser.""" 16*99e0aae7SDavid Rees 17*99e0aae7SDavid Reesimport unittest 18*99e0aae7SDavid Reesfrom compiler.front_end import lr1 19*99e0aae7SDavid Reesfrom compiler.front_end import parser 20*99e0aae7SDavid Reesfrom compiler.front_end import tokenizer 21*99e0aae7SDavid Reesfrom compiler.util import parser_types 22*99e0aae7SDavid Rees 23*99e0aae7SDavid Rees 24*99e0aae7SDavid Rees# TODO(bolms): This is repeated in lr1_test.py; separate into test utils? 25*99e0aae7SDavid Reesdef _parse_productions(*productions): 26*99e0aae7SDavid Rees """Parses text into a grammar by calling Production.parse on each line.""" 27*99e0aae7SDavid Rees return [parser_types.Production.parse(p) for p in productions] 28*99e0aae7SDavid Rees 29*99e0aae7SDavid Rees 30*99e0aae7SDavid Rees_EXAMPLE_DIVIDER = "\n" + "=" * 80 + "\n" 31*99e0aae7SDavid Rees_MESSAGE_ERROR_DIVIDER = "\n" + "-" * 80 + "\n" 32*99e0aae7SDavid Rees_ERROR_DIVIDER = "\n---\n" 33*99e0aae7SDavid Rees 34*99e0aae7SDavid Rees 35*99e0aae7SDavid Reesclass ParserGeneratorTest(unittest.TestCase): 36*99e0aae7SDavid Rees """Tests parser.parse_error_examples and generate_parser.""" 37*99e0aae7SDavid Rees 38*99e0aae7SDavid Rees def test_parse_good_error_examples(self): 39*99e0aae7SDavid Rees errors = parser.parse_error_examples( 40*99e0aae7SDavid Rees _EXAMPLE_DIVIDER + # ======... 41*99e0aae7SDavid Rees "structure names must be Camel" + # Message. 42*99e0aae7SDavid Rees _MESSAGE_ERROR_DIVIDER + # ------... 43*99e0aae7SDavid Rees "struct $ERR FOO" + # First example. 44*99e0aae7SDavid Rees _ERROR_DIVIDER + # --- 45*99e0aae7SDavid Rees "struct $ERR foo" + # Second example. 46*99e0aae7SDavid Rees _EXAMPLE_DIVIDER + # ======... 47*99e0aae7SDavid Rees ' \n struct must be followed by ":" \n\n' + # Second message. 48*99e0aae7SDavid Rees _MESSAGE_ERROR_DIVIDER + # ------... 49*99e0aae7SDavid Rees "struct Foo $ERR") # Example for second message. 50*99e0aae7SDavid Rees self.assertEqual(tokenizer.tokenize("struct FOO", "")[0], errors[0][0]) 51*99e0aae7SDavid Rees self.assertEqual("structure names must be Camel", errors[0][2]) 52*99e0aae7SDavid Rees self.assertEqual(tokenizer.tokenize("struct foo", "")[0], errors[1][0]) 53*99e0aae7SDavid Rees self.assertEqual("structure names must be Camel", errors[1][2]) 54*99e0aae7SDavid Rees self.assertEqual(tokenizer.tokenize("struct Foo ", "")[0], errors[2][0]) 55*99e0aae7SDavid Rees self.assertEqual('struct must be followed by ":"', errors[2][2]) 56*99e0aae7SDavid Rees 57*99e0aae7SDavid Rees def test_parse_good_wildcard_example(self): 58*99e0aae7SDavid Rees errors = parser.parse_error_examples( 59*99e0aae7SDavid Rees _EXAMPLE_DIVIDER + # ======... 60*99e0aae7SDavid Rees ' \n struct must be followed by ":" \n\n' + # Second message. 61*99e0aae7SDavid Rees _MESSAGE_ERROR_DIVIDER + # ------... 62*99e0aae7SDavid Rees "struct Foo $ERR $ANY") 63*99e0aae7SDavid Rees tokens = tokenizer.tokenize("struct Foo ", "")[0] 64*99e0aae7SDavid Rees # The $ANY token should come just before the end-of-line token in the parsed 65*99e0aae7SDavid Rees # result. 66*99e0aae7SDavid Rees tokens.insert(-1, lr1.ANY_TOKEN) 67*99e0aae7SDavid Rees self.assertEqual(tokens, errors[0][0]) 68*99e0aae7SDavid Rees self.assertEqual('struct must be followed by ":"', errors[0][2]) 69*99e0aae7SDavid Rees 70*99e0aae7SDavid Rees def test_parse_with_no_error_marker(self): 71*99e0aae7SDavid Rees self.assertRaises( 72*99e0aae7SDavid Rees parser.ParserGenerationError, 73*99e0aae7SDavid Rees parser.parse_error_examples, 74*99e0aae7SDavid Rees _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "-- doc") 75*99e0aae7SDavid Rees 76*99e0aae7SDavid Rees def test_that_no_error_example_fails(self): 77*99e0aae7SDavid Rees self.assertRaises(parser.ParserGenerationError, 78*99e0aae7SDavid Rees parser.parse_error_examples, 79*99e0aae7SDavid Rees _EXAMPLE_DIVIDER + "msg" + _EXAMPLE_DIVIDER + "msg" + 80*99e0aae7SDavid Rees _MESSAGE_ERROR_DIVIDER + "example") 81*99e0aae7SDavid Rees 82*99e0aae7SDavid Rees def test_that_message_example_divider_must_be_on_its_own_line(self): 83*99e0aae7SDavid Rees self.assertRaises(parser.ParserGenerationError, 84*99e0aae7SDavid Rees parser.parse_error_examples, 85*99e0aae7SDavid Rees _EXAMPLE_DIVIDER + "msg" + "-" * 80 + "example") 86*99e0aae7SDavid Rees self.assertRaises(parser.ParserGenerationError, 87*99e0aae7SDavid Rees parser.parse_error_examples, 88*99e0aae7SDavid Rees _EXAMPLE_DIVIDER + "msg\n" + "-" * 80 + "example") 89*99e0aae7SDavid Rees self.assertRaises(parser.ParserGenerationError, 90*99e0aae7SDavid Rees parser.parse_error_examples, 91*99e0aae7SDavid Rees _EXAMPLE_DIVIDER + "msg" + "-" * 80 + "\nexample") 92*99e0aae7SDavid Rees self.assertRaises(parser.ParserGenerationError, 93*99e0aae7SDavid Rees parser.parse_error_examples, 94*99e0aae7SDavid Rees _EXAMPLE_DIVIDER + "msg\n" + "-" * 80 + " \nexample") 95*99e0aae7SDavid Rees 96*99e0aae7SDavid Rees def test_that_example_divider_must_be_on_its_own_line(self): 97*99e0aae7SDavid Rees self.assertRaises( 98*99e0aae7SDavid Rees parser.ParserGenerationError, 99*99e0aae7SDavid Rees parser.parse_error_examples, 100*99e0aae7SDavid Rees _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example" + "=" * 80 101*99e0aae7SDavid Rees + "msg" + _MESSAGE_ERROR_DIVIDER + "example") 102*99e0aae7SDavid Rees self.assertRaises( 103*99e0aae7SDavid Rees parser.ParserGenerationError, 104*99e0aae7SDavid Rees parser.parse_error_examples, 105*99e0aae7SDavid Rees _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example\n" + "=" * 106*99e0aae7SDavid Rees 80 + "msg" + _MESSAGE_ERROR_DIVIDER + "example") 107*99e0aae7SDavid Rees self.assertRaises( 108*99e0aae7SDavid Rees parser.ParserGenerationError, 109*99e0aae7SDavid Rees parser.parse_error_examples, 110*99e0aae7SDavid Rees _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example" + "=" * 80 111*99e0aae7SDavid Rees + "\nmsg" + _MESSAGE_ERROR_DIVIDER + "example") 112*99e0aae7SDavid Rees self.assertRaises( 113*99e0aae7SDavid Rees parser.ParserGenerationError, 114*99e0aae7SDavid Rees parser.parse_error_examples, 115*99e0aae7SDavid Rees _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example\n" + "=" * 116*99e0aae7SDavid Rees 80 + " \nmsg" + _MESSAGE_ERROR_DIVIDER + "example") 117*99e0aae7SDavid Rees 118*99e0aae7SDavid Rees def test_that_tokenization_failure_results_in_failure(self): 119*99e0aae7SDavid Rees self.assertRaises( 120*99e0aae7SDavid Rees parser.ParserGenerationError, 121*99e0aae7SDavid Rees parser.parse_error_examples, 122*99e0aae7SDavid Rees _EXAMPLE_DIVIDER + "message" + _MESSAGE_ERROR_DIVIDER + "|") 123*99e0aae7SDavid Rees 124*99e0aae7SDavid Rees def test_generate_parser(self): 125*99e0aae7SDavid Rees self.assertTrue(parser.generate_parser("C", _parse_productions("C -> s"), 126*99e0aae7SDavid Rees [])) 127*99e0aae7SDavid Rees self.assertTrue(parser.generate_parser( 128*99e0aae7SDavid Rees "C", _parse_productions("C -> s", "C -> d"), [])) 129*99e0aae7SDavid Rees 130*99e0aae7SDavid Rees def test_generated_parser_error(self): 131*99e0aae7SDavid Rees test_parser = parser.generate_parser( 132*99e0aae7SDavid Rees "C", _parse_productions("C -> s", "C -> d"), 133*99e0aae7SDavid Rees [([parser_types.Token("s", "s", None), 134*99e0aae7SDavid Rees parser_types.Token("s", "s", None)], 135*99e0aae7SDavid Rees parser_types.Token("s", "s", None), 136*99e0aae7SDavid Rees "double s", "ss")]) 137*99e0aae7SDavid Rees parse_result = test_parser.parse([parser_types.Token("s", "s", None), 138*99e0aae7SDavid Rees parser_types.Token("s", "s", None)]) 139*99e0aae7SDavid Rees self.assertEqual(None, parse_result.parse_tree) 140*99e0aae7SDavid Rees self.assertEqual("double s", parse_result.error.code) 141*99e0aae7SDavid Rees 142*99e0aae7SDavid Rees def test_conflict_error(self): 143*99e0aae7SDavid Rees self.assertRaises( 144*99e0aae7SDavid Rees parser.ParserGenerationError, 145*99e0aae7SDavid Rees parser.generate_parser, 146*99e0aae7SDavid Rees "C", _parse_productions("C -> S", "C -> D", "S -> a", "D -> a"), []) 147*99e0aae7SDavid Rees 148*99e0aae7SDavid Rees def test_bad_mark_error(self): 149*99e0aae7SDavid Rees self.assertRaises(parser.ParserGenerationError, 150*99e0aae7SDavid Rees parser.generate_parser, 151*99e0aae7SDavid Rees "C", _parse_productions("C -> s", "C -> d"), 152*99e0aae7SDavid Rees [([parser_types.Token("s", "s", None), 153*99e0aae7SDavid Rees parser_types.Token("s", "s", None)], 154*99e0aae7SDavid Rees parser_types.Token("s", "s", None), 155*99e0aae7SDavid Rees "double s", "ss"), 156*99e0aae7SDavid Rees ([parser_types.Token("s", "s", None), 157*99e0aae7SDavid Rees parser_types.Token("s", "s", None)], 158*99e0aae7SDavid Rees parser_types.Token("s", "s", None), 159*99e0aae7SDavid Rees "double 's'", "ss")]) 160*99e0aae7SDavid Rees self.assertRaises(parser.ParserGenerationError, 161*99e0aae7SDavid Rees parser.generate_parser, 162*99e0aae7SDavid Rees "C", _parse_productions("C -> s", "C -> d"), 163*99e0aae7SDavid Rees [([parser_types.Token("s", "s", None)], 164*99e0aae7SDavid Rees parser_types.Token("s", "s", None), 165*99e0aae7SDavid Rees "single s", "s")]) 166*99e0aae7SDavid Rees 167*99e0aae7SDavid Rees 168*99e0aae7SDavid Reesclass ModuleParserTest(unittest.TestCase): 169*99e0aae7SDavid Rees """Tests for parser.parse_module(). 170*99e0aae7SDavid Rees 171*99e0aae7SDavid Rees Correct parses should mostly be checked in conjunction with 172*99e0aae7SDavid Rees module_ir.build_ir, as the exact data structure returned by 173*99e0aae7SDavid Rees parser.parse_module() is determined by the grammar defined in module_ir. 174*99e0aae7SDavid Rees These tests only need to cover errors and sanity checking. 175*99e0aae7SDavid Rees """ 176*99e0aae7SDavid Rees 177*99e0aae7SDavid Rees def test_error_reporting_by_example(self): 178*99e0aae7SDavid Rees parse_result = parser.parse_module( 179*99e0aae7SDavid Rees tokenizer.tokenize("struct LogFileStatus:\n" 180*99e0aae7SDavid Rees " 0 [+4] UInt\n", "")[0]) 181*99e0aae7SDavid Rees self.assertEqual(None, parse_result.parse_tree) 182*99e0aae7SDavid Rees self.assertEqual("A name is required for a struct field.", 183*99e0aae7SDavid Rees parse_result.error.code) 184*99e0aae7SDavid Rees self.assertEqual('"\\n"', parse_result.error.token.symbol) 185*99e0aae7SDavid Rees self.assertEqual(set(['"["', "SnakeWord", '"."', '":"', '"("']), 186*99e0aae7SDavid Rees parse_result.error.expected_tokens) 187*99e0aae7SDavid Rees 188*99e0aae7SDavid Rees def test_error_reporting_without_example(self): 189*99e0aae7SDavid Rees parse_result = parser.parse_module( 190*99e0aae7SDavid Rees tokenizer.tokenize("struct LogFileStatus:\n" 191*99e0aae7SDavid Rees " 0 [+4] UInt foo +\n", "")[0]) 192*99e0aae7SDavid Rees self.assertEqual(None, parse_result.parse_tree) 193*99e0aae7SDavid Rees self.assertEqual(None, parse_result.error.code) 194*99e0aae7SDavid Rees self.assertEqual('"+"', parse_result.error.token.symbol) 195*99e0aae7SDavid Rees self.assertEqual(set(['"("', '"\\n"', '"["', "Documentation", "Comment"]), 196*99e0aae7SDavid Rees parse_result.error.expected_tokens) 197*99e0aae7SDavid Rees 198*99e0aae7SDavid Rees def test_ok_parse(self): 199*99e0aae7SDavid Rees parse_result = parser.parse_module( 200*99e0aae7SDavid Rees tokenizer.tokenize("struct LogFileStatus:\n" 201*99e0aae7SDavid Rees " 0 [+4] UInt foo\n", "")[0]) 202*99e0aae7SDavid Rees self.assertTrue(parse_result.parse_tree) 203*99e0aae7SDavid Rees self.assertEqual(None, parse_result.error) 204*99e0aae7SDavid Rees 205*99e0aae7SDavid Rees 206*99e0aae7SDavid Reesif __name__ == "__main__": 207*99e0aae7SDavid Rees unittest.main() 208