1import unittest
2
3from test import test_tools
4from typing import Dict, Set
5
6test_tools.skip_if_missing("peg_generator")
7with test_tools.imports_under_tool("peg_generator"):
8    from pegen.grammar_parser import GeneratedParser as GrammarParser
9    from pegen.testutil import parse_string
10    from pegen.first_sets import FirstSetCalculator
11    from pegen.grammar import Grammar
12
13
14class TestFirstSets(unittest.TestCase):
15    def calculate_first_sets(self, grammar_source: str) -> Dict[str, Set[str]]:
16        grammar: Grammar = parse_string(grammar_source, GrammarParser)
17        return FirstSetCalculator(grammar.rules).calculate()
18
19    def test_alternatives(self) -> None:
20        grammar = """
21            start: expr NEWLINE? ENDMARKER
22            expr: A | B
23            A: 'a' | '-'
24            B: 'b' | '+'
25        """
26        self.assertEqual(
27            self.calculate_first_sets(grammar),
28            {
29                "A": {"'a'", "'-'"},
30                "B": {"'+'", "'b'"},
31                "expr": {"'+'", "'a'", "'b'", "'-'"},
32                "start": {"'+'", "'a'", "'b'", "'-'"},
33            },
34        )
35
36    def test_optionals(self) -> None:
37        grammar = """
38            start: expr NEWLINE
39            expr: ['a'] ['b'] 'c'
40        """
41        self.assertEqual(
42            self.calculate_first_sets(grammar),
43            {
44                "expr": {"'c'", "'a'", "'b'"},
45                "start": {"'c'", "'a'", "'b'"},
46            },
47        )
48
49    def test_repeat_with_separator(self) -> None:
50        grammar = """
51        start: ','.thing+ NEWLINE
52        thing: NUMBER
53        """
54        self.assertEqual(
55            self.calculate_first_sets(grammar),
56            {"thing": {"NUMBER"}, "start": {"NUMBER"}},
57        )
58
59    def test_optional_operator(self) -> None:
60        grammar = """
61        start: sum NEWLINE
62        sum: (term)? 'b'
63        term: NUMBER
64        """
65        self.assertEqual(
66            self.calculate_first_sets(grammar),
67            {
68                "term": {"NUMBER"},
69                "sum": {"NUMBER", "'b'"},
70                "start": {"'b'", "NUMBER"},
71            },
72        )
73
74    def test_optional_literal(self) -> None:
75        grammar = """
76        start: sum NEWLINE
77        sum: '+' ? term
78        term: NUMBER
79        """
80        self.assertEqual(
81            self.calculate_first_sets(grammar),
82            {
83                "term": {"NUMBER"},
84                "sum": {"'+'", "NUMBER"},
85                "start": {"'+'", "NUMBER"},
86            },
87        )
88
89    def test_optional_after(self) -> None:
90        grammar = """
91        start: term NEWLINE
92        term: NUMBER ['+']
93        """
94        self.assertEqual(
95            self.calculate_first_sets(grammar),
96            {"term": {"NUMBER"}, "start": {"NUMBER"}},
97        )
98
99    def test_optional_before(self) -> None:
100        grammar = """
101        start: term NEWLINE
102        term: ['+'] NUMBER
103        """
104        self.assertEqual(
105            self.calculate_first_sets(grammar),
106            {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}},
107        )
108
109    def test_repeat_0(self) -> None:
110        grammar = """
111        start: thing* "+" NEWLINE
112        thing: NUMBER
113        """
114        self.assertEqual(
115            self.calculate_first_sets(grammar),
116            {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}},
117        )
118
119    def test_repeat_0_with_group(self) -> None:
120        grammar = """
121        start: ('+' '-')* term NEWLINE
122        term: NUMBER
123        """
124        self.assertEqual(
125            self.calculate_first_sets(grammar),
126            {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}},
127        )
128
129    def test_repeat_1(self) -> None:
130        grammar = """
131        start: thing+ '-' NEWLINE
132        thing: NUMBER
133        """
134        self.assertEqual(
135            self.calculate_first_sets(grammar),
136            {"thing": {"NUMBER"}, "start": {"NUMBER"}},
137        )
138
139    def test_repeat_1_with_group(self) -> None:
140        grammar = """
141        start: ('+' term)+ term NEWLINE
142        term: NUMBER
143        """
144        self.assertEqual(
145            self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}}
146        )
147
148    def test_gather(self) -> None:
149        grammar = """
150        start: ','.thing+ NEWLINE
151        thing: NUMBER
152        """
153        self.assertEqual(
154            self.calculate_first_sets(grammar),
155            {"thing": {"NUMBER"}, "start": {"NUMBER"}},
156        )
157
158    def test_positive_lookahead(self) -> None:
159        grammar = """
160        start: expr NEWLINE
161        expr: &'a' opt
162        opt: 'a' | 'b' | 'c'
163        """
164        self.assertEqual(
165            self.calculate_first_sets(grammar),
166            {
167                "expr": {"'a'"},
168                "start": {"'a'"},
169                "opt": {"'b'", "'c'", "'a'"},
170            },
171        )
172
173    def test_negative_lookahead(self) -> None:
174        grammar = """
175        start: expr NEWLINE
176        expr: !'a' opt
177        opt: 'a' | 'b' | 'c'
178        """
179        self.assertEqual(
180            self.calculate_first_sets(grammar),
181            {
182                "opt": {"'b'", "'a'", "'c'"},
183                "expr": {"'b'", "'c'"},
184                "start": {"'b'", "'c'"},
185            },
186        )
187
188    def test_left_recursion(self) -> None:
189        grammar = """
190        start: expr NEWLINE
191        expr: ('-' term | expr '+' term | term)
192        term: NUMBER
193        foo: 'foo'
194        bar: 'bar'
195        baz: 'baz'
196        """
197        self.assertEqual(
198            self.calculate_first_sets(grammar),
199            {
200                "expr": {"NUMBER", "'-'"},
201                "term": {"NUMBER"},
202                "start": {"NUMBER", "'-'"},
203                "foo": {"'foo'"},
204                "bar": {"'bar'"},
205                "baz": {"'baz'"},
206            },
207        )
208
209    def test_advance_left_recursion(self) -> None:
210        grammar = """
211        start: NUMBER | sign start
212        sign: ['-']
213        """
214        self.assertEqual(
215            self.calculate_first_sets(grammar),
216            {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}},
217        )
218
219    def test_mutual_left_recursion(self) -> None:
220        grammar = """
221        start: foo 'E'
222        foo: bar 'A' | 'B'
223        bar: foo 'C' | 'D'
224        """
225        self.assertEqual(
226            self.calculate_first_sets(grammar),
227            {
228                "foo": {"'D'", "'B'"},
229                "bar": {"'D'"},
230                "start": {"'D'", "'B'"},
231            },
232        )
233
234    def test_nasty_left_recursion(self) -> None:
235        # TODO: Validate this
236        grammar = """
237        start: target '='
238        target: maybe '+' | NAME
239        maybe: maybe '-' | target
240        """
241        self.assertEqual(
242            self.calculate_first_sets(grammar),
243            {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}},
244        )
245
246    def test_nullable_rule(self) -> None:
247        grammar = """
248        start: sign thing $
249        sign: ['-']
250        thing: NUMBER
251        """
252        self.assertEqual(
253            self.calculate_first_sets(grammar),
254            {
255                "sign": {"", "'-'"},
256                "thing": {"NUMBER"},
257                "start": {"NUMBER", "'-'"},
258            },
259        )
260
261    def test_epsilon_production_in_start_rule(self) -> None:
262        grammar = """
263        start: ['-'] $
264        """
265        self.assertEqual(
266            self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}}
267        )
268
269    def test_multiple_nullable_rules(self) -> None:
270        grammar = """
271        start: sign thing other another $
272        sign: ['-']
273        thing: ['+']
274        other: '*'
275        another: '/'
276        """
277        self.assertEqual(
278            self.calculate_first_sets(grammar),
279            {
280                "sign": {"", "'-'"},
281                "thing": {"'+'", ""},
282                "start": {"'+'", "'-'", "'*'"},
283                "other": {"'*'"},
284                "another": {"'/'"},
285            },
286        )
287