xref: /aosp_15_r20/external/emboss/compiler/front_end/parser_test.py (revision 99e0aae7469b87d12f0ad23e61142c2d74c1ef70)
1*99e0aae7SDavid Rees# Copyright 2019 Google LLC
2*99e0aae7SDavid Rees#
3*99e0aae7SDavid Rees# Licensed under the Apache License, Version 2.0 (the "License");
4*99e0aae7SDavid Rees# you may not use this file except in compliance with the License.
5*99e0aae7SDavid Rees# You may obtain a copy of the License at
6*99e0aae7SDavid Rees#
7*99e0aae7SDavid Rees#     https://www.apache.org/licenses/LICENSE-2.0
8*99e0aae7SDavid Rees#
9*99e0aae7SDavid Rees# Unless required by applicable law or agreed to in writing, software
10*99e0aae7SDavid Rees# distributed under the License is distributed on an "AS IS" BASIS,
11*99e0aae7SDavid Rees# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*99e0aae7SDavid Rees# See the License for the specific language governing permissions and
13*99e0aae7SDavid Rees# limitations under the License.
14*99e0aae7SDavid Rees
15*99e0aae7SDavid Rees"""Tests for parser."""
16*99e0aae7SDavid Rees
17*99e0aae7SDavid Reesimport unittest
18*99e0aae7SDavid Reesfrom compiler.front_end import lr1
19*99e0aae7SDavid Reesfrom compiler.front_end import parser
20*99e0aae7SDavid Reesfrom compiler.front_end import tokenizer
21*99e0aae7SDavid Reesfrom compiler.util import parser_types
22*99e0aae7SDavid Rees
23*99e0aae7SDavid Rees
24*99e0aae7SDavid Rees# TODO(bolms): This is repeated in lr1_test.py; separate into test utils?
25*99e0aae7SDavid Reesdef _parse_productions(*productions):
26*99e0aae7SDavid Rees  """Parses text into a grammar by calling Production.parse on each line."""
27*99e0aae7SDavid Rees  return [parser_types.Production.parse(p) for p in productions]
28*99e0aae7SDavid Rees
29*99e0aae7SDavid Rees
30*99e0aae7SDavid Rees_EXAMPLE_DIVIDER = "\n" + "=" * 80 + "\n"
31*99e0aae7SDavid Rees_MESSAGE_ERROR_DIVIDER = "\n" + "-" * 80 + "\n"
32*99e0aae7SDavid Rees_ERROR_DIVIDER = "\n---\n"
33*99e0aae7SDavid Rees
34*99e0aae7SDavid Rees
35*99e0aae7SDavid Reesclass ParserGeneratorTest(unittest.TestCase):
36*99e0aae7SDavid Rees  """Tests parser.parse_error_examples and generate_parser."""
37*99e0aae7SDavid Rees
38*99e0aae7SDavid Rees  def test_parse_good_error_examples(self):
39*99e0aae7SDavid Rees    errors = parser.parse_error_examples(
40*99e0aae7SDavid Rees        _EXAMPLE_DIVIDER +  # ======...
41*99e0aae7SDavid Rees        "structure names must be Camel" +  # Message.
42*99e0aae7SDavid Rees        _MESSAGE_ERROR_DIVIDER +  # ------...
43*99e0aae7SDavid Rees        "struct $ERR FOO" +  # First example.
44*99e0aae7SDavid Rees        _ERROR_DIVIDER +  # ---
45*99e0aae7SDavid Rees        "struct $ERR foo" +  # Second example.
46*99e0aae7SDavid Rees        _EXAMPLE_DIVIDER +  # ======...
47*99e0aae7SDavid Rees        '   \n   struct must be followed by ":"   \n\n' +  # Second message.
48*99e0aae7SDavid Rees        _MESSAGE_ERROR_DIVIDER +  # ------...
49*99e0aae7SDavid Rees        "struct Foo $ERR")  # Example for second message.
50*99e0aae7SDavid Rees    self.assertEqual(tokenizer.tokenize("struct      FOO", "")[0], errors[0][0])
51*99e0aae7SDavid Rees    self.assertEqual("structure names must be Camel", errors[0][2])
52*99e0aae7SDavid Rees    self.assertEqual(tokenizer.tokenize("struct      foo", "")[0], errors[1][0])
53*99e0aae7SDavid Rees    self.assertEqual("structure names must be Camel", errors[1][2])
54*99e0aae7SDavid Rees    self.assertEqual(tokenizer.tokenize("struct Foo     ", "")[0], errors[2][0])
55*99e0aae7SDavid Rees    self.assertEqual('struct must be followed by ":"', errors[2][2])
56*99e0aae7SDavid Rees
57*99e0aae7SDavid Rees  def test_parse_good_wildcard_example(self):
58*99e0aae7SDavid Rees    errors = parser.parse_error_examples(
59*99e0aae7SDavid Rees        _EXAMPLE_DIVIDER +  # ======...
60*99e0aae7SDavid Rees        '   \n   struct must be followed by ":"   \n\n' +  # Second message.
61*99e0aae7SDavid Rees        _MESSAGE_ERROR_DIVIDER +  # ------...
62*99e0aae7SDavid Rees        "struct Foo $ERR $ANY")
63*99e0aae7SDavid Rees    tokens = tokenizer.tokenize("struct Foo          ", "")[0]
64*99e0aae7SDavid Rees    # The $ANY token should come just before the end-of-line token in the parsed
65*99e0aae7SDavid Rees    # result.
66*99e0aae7SDavid Rees    tokens.insert(-1, lr1.ANY_TOKEN)
67*99e0aae7SDavid Rees    self.assertEqual(tokens, errors[0][0])
68*99e0aae7SDavid Rees    self.assertEqual('struct must be followed by ":"', errors[0][2])
69*99e0aae7SDavid Rees
70*99e0aae7SDavid Rees  def test_parse_with_no_error_marker(self):
71*99e0aae7SDavid Rees    self.assertRaises(
72*99e0aae7SDavid Rees        parser.ParserGenerationError,
73*99e0aae7SDavid Rees        parser.parse_error_examples,
74*99e0aae7SDavid Rees        _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "-- doc")
75*99e0aae7SDavid Rees
76*99e0aae7SDavid Rees  def test_that_no_error_example_fails(self):
77*99e0aae7SDavid Rees    self.assertRaises(parser.ParserGenerationError,
78*99e0aae7SDavid Rees                      parser.parse_error_examples,
79*99e0aae7SDavid Rees                      _EXAMPLE_DIVIDER + "msg" + _EXAMPLE_DIVIDER + "msg" +
80*99e0aae7SDavid Rees                      _MESSAGE_ERROR_DIVIDER + "example")
81*99e0aae7SDavid Rees
82*99e0aae7SDavid Rees  def test_that_message_example_divider_must_be_on_its_own_line(self):
83*99e0aae7SDavid Rees    self.assertRaises(parser.ParserGenerationError,
84*99e0aae7SDavid Rees                      parser.parse_error_examples,
85*99e0aae7SDavid Rees                      _EXAMPLE_DIVIDER + "msg" + "-" * 80 + "example")
86*99e0aae7SDavid Rees    self.assertRaises(parser.ParserGenerationError,
87*99e0aae7SDavid Rees                      parser.parse_error_examples,
88*99e0aae7SDavid Rees                      _EXAMPLE_DIVIDER + "msg\n" + "-" * 80 + "example")
89*99e0aae7SDavid Rees    self.assertRaises(parser.ParserGenerationError,
90*99e0aae7SDavid Rees                      parser.parse_error_examples,
91*99e0aae7SDavid Rees                      _EXAMPLE_DIVIDER + "msg" + "-" * 80 + "\nexample")
92*99e0aae7SDavid Rees    self.assertRaises(parser.ParserGenerationError,
93*99e0aae7SDavid Rees                      parser.parse_error_examples,
94*99e0aae7SDavid Rees                      _EXAMPLE_DIVIDER + "msg\n" + "-" * 80 + " \nexample")
95*99e0aae7SDavid Rees
96*99e0aae7SDavid Rees  def test_that_example_divider_must_be_on_its_own_line(self):
97*99e0aae7SDavid Rees    self.assertRaises(
98*99e0aae7SDavid Rees        parser.ParserGenerationError,
99*99e0aae7SDavid Rees        parser.parse_error_examples,
100*99e0aae7SDavid Rees        _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example" + "=" * 80
101*99e0aae7SDavid Rees        + "msg" + _MESSAGE_ERROR_DIVIDER + "example")
102*99e0aae7SDavid Rees    self.assertRaises(
103*99e0aae7SDavid Rees        parser.ParserGenerationError,
104*99e0aae7SDavid Rees        parser.parse_error_examples,
105*99e0aae7SDavid Rees        _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example\n" + "=" *
106*99e0aae7SDavid Rees        80 + "msg" + _MESSAGE_ERROR_DIVIDER + "example")
107*99e0aae7SDavid Rees    self.assertRaises(
108*99e0aae7SDavid Rees        parser.ParserGenerationError,
109*99e0aae7SDavid Rees        parser.parse_error_examples,
110*99e0aae7SDavid Rees        _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example" + "=" * 80
111*99e0aae7SDavid Rees        + "\nmsg" + _MESSAGE_ERROR_DIVIDER + "example")
112*99e0aae7SDavid Rees    self.assertRaises(
113*99e0aae7SDavid Rees        parser.ParserGenerationError,
114*99e0aae7SDavid Rees        parser.parse_error_examples,
115*99e0aae7SDavid Rees        _EXAMPLE_DIVIDER + "msg" + _MESSAGE_ERROR_DIVIDER + "example\n" + "=" *
116*99e0aae7SDavid Rees        80 + " \nmsg" + _MESSAGE_ERROR_DIVIDER + "example")
117*99e0aae7SDavid Rees
118*99e0aae7SDavid Rees  def test_that_tokenization_failure_results_in_failure(self):
119*99e0aae7SDavid Rees    self.assertRaises(
120*99e0aae7SDavid Rees        parser.ParserGenerationError,
121*99e0aae7SDavid Rees        parser.parse_error_examples,
122*99e0aae7SDavid Rees        _EXAMPLE_DIVIDER + "message" + _MESSAGE_ERROR_DIVIDER + "|")
123*99e0aae7SDavid Rees
124*99e0aae7SDavid Rees  def test_generate_parser(self):
125*99e0aae7SDavid Rees    self.assertTrue(parser.generate_parser("C", _parse_productions("C -> s"),
126*99e0aae7SDavid Rees                                           []))
127*99e0aae7SDavid Rees    self.assertTrue(parser.generate_parser(
128*99e0aae7SDavid Rees        "C", _parse_productions("C -> s", "C -> d"), []))
129*99e0aae7SDavid Rees
130*99e0aae7SDavid Rees  def test_generated_parser_error(self):
131*99e0aae7SDavid Rees    test_parser = parser.generate_parser(
132*99e0aae7SDavid Rees        "C", _parse_productions("C -> s", "C -> d"),
133*99e0aae7SDavid Rees        [([parser_types.Token("s", "s", None),
134*99e0aae7SDavid Rees           parser_types.Token("s", "s", None)],
135*99e0aae7SDavid Rees          parser_types.Token("s", "s", None),
136*99e0aae7SDavid Rees          "double s", "ss")])
137*99e0aae7SDavid Rees    parse_result = test_parser.parse([parser_types.Token("s", "s", None),
138*99e0aae7SDavid Rees                                      parser_types.Token("s", "s", None)])
139*99e0aae7SDavid Rees    self.assertEqual(None, parse_result.parse_tree)
140*99e0aae7SDavid Rees    self.assertEqual("double s", parse_result.error.code)
141*99e0aae7SDavid Rees
142*99e0aae7SDavid Rees  def test_conflict_error(self):
143*99e0aae7SDavid Rees    self.assertRaises(
144*99e0aae7SDavid Rees        parser.ParserGenerationError,
145*99e0aae7SDavid Rees        parser.generate_parser,
146*99e0aae7SDavid Rees        "C", _parse_productions("C -> S", "C -> D", "S -> a", "D -> a"), [])
147*99e0aae7SDavid Rees
148*99e0aae7SDavid Rees  def test_bad_mark_error(self):
149*99e0aae7SDavid Rees    self.assertRaises(parser.ParserGenerationError,
150*99e0aae7SDavid Rees                      parser.generate_parser,
151*99e0aae7SDavid Rees                      "C", _parse_productions("C -> s", "C -> d"),
152*99e0aae7SDavid Rees                      [([parser_types.Token("s", "s", None),
153*99e0aae7SDavid Rees                         parser_types.Token("s", "s", None)],
154*99e0aae7SDavid Rees                        parser_types.Token("s", "s", None),
155*99e0aae7SDavid Rees                        "double s", "ss"),
156*99e0aae7SDavid Rees                       ([parser_types.Token("s", "s", None),
157*99e0aae7SDavid Rees                         parser_types.Token("s", "s", None)],
158*99e0aae7SDavid Rees                        parser_types.Token("s", "s", None),
159*99e0aae7SDavid Rees                        "double 's'", "ss")])
160*99e0aae7SDavid Rees    self.assertRaises(parser.ParserGenerationError,
161*99e0aae7SDavid Rees                      parser.generate_parser,
162*99e0aae7SDavid Rees                      "C", _parse_productions("C -> s", "C -> d"),
163*99e0aae7SDavid Rees                      [([parser_types.Token("s", "s", None)],
164*99e0aae7SDavid Rees                        parser_types.Token("s", "s", None),
165*99e0aae7SDavid Rees                        "single s", "s")])
166*99e0aae7SDavid Rees
167*99e0aae7SDavid Rees
168*99e0aae7SDavid Reesclass ModuleParserTest(unittest.TestCase):
169*99e0aae7SDavid Rees  """Tests for parser.parse_module().
170*99e0aae7SDavid Rees
171*99e0aae7SDavid Rees  Correct parses should mostly be checked in conjunction with
172*99e0aae7SDavid Rees  module_ir.build_ir, as the exact data structure returned by
173*99e0aae7SDavid Rees  parser.parse_module() is determined by the grammar defined in module_ir.
174*99e0aae7SDavid Rees  These tests only need to cover errors and sanity checking.
175*99e0aae7SDavid Rees  """
176*99e0aae7SDavid Rees
177*99e0aae7SDavid Rees  def test_error_reporting_by_example(self):
178*99e0aae7SDavid Rees    parse_result = parser.parse_module(
179*99e0aae7SDavid Rees        tokenizer.tokenize("struct LogFileStatus:\n"
180*99e0aae7SDavid Rees                           "  0 [+4]    UInt\n", "")[0])
181*99e0aae7SDavid Rees    self.assertEqual(None, parse_result.parse_tree)
182*99e0aae7SDavid Rees    self.assertEqual("A name is required for a struct field.",
183*99e0aae7SDavid Rees                     parse_result.error.code)
184*99e0aae7SDavid Rees    self.assertEqual('"\\n"', parse_result.error.token.symbol)
185*99e0aae7SDavid Rees    self.assertEqual(set(['"["', "SnakeWord", '"."', '":"', '"("']),
186*99e0aae7SDavid Rees                     parse_result.error.expected_tokens)
187*99e0aae7SDavid Rees
188*99e0aae7SDavid Rees  def test_error_reporting_without_example(self):
189*99e0aae7SDavid Rees    parse_result = parser.parse_module(
190*99e0aae7SDavid Rees        tokenizer.tokenize("struct LogFileStatus:\n"
191*99e0aae7SDavid Rees                           "  0 [+4]    UInt    foo +\n", "")[0])
192*99e0aae7SDavid Rees    self.assertEqual(None, parse_result.parse_tree)
193*99e0aae7SDavid Rees    self.assertEqual(None, parse_result.error.code)
194*99e0aae7SDavid Rees    self.assertEqual('"+"', parse_result.error.token.symbol)
195*99e0aae7SDavid Rees    self.assertEqual(set(['"("', '"\\n"', '"["', "Documentation", "Comment"]),
196*99e0aae7SDavid Rees                     parse_result.error.expected_tokens)
197*99e0aae7SDavid Rees
198*99e0aae7SDavid Rees  def test_ok_parse(self):
199*99e0aae7SDavid Rees    parse_result = parser.parse_module(
200*99e0aae7SDavid Rees        tokenizer.tokenize("struct LogFileStatus:\n"
201*99e0aae7SDavid Rees                           "  0 [+4]    UInt    foo\n", "")[0])
202*99e0aae7SDavid Rees    self.assertTrue(parse_result.parse_tree)
203*99e0aae7SDavid Rees    self.assertEqual(None, parse_result.error)
204*99e0aae7SDavid Rees
205*99e0aae7SDavid Rees
206*99e0aae7SDavid Reesif __name__ == "__main__":
207*99e0aae7SDavid Rees  unittest.main()
208