1import unittest 2 3from test import test_tools 4from typing import Dict, Set 5 6test_tools.skip_if_missing("peg_generator") 7with test_tools.imports_under_tool("peg_generator"): 8 from pegen.grammar_parser import GeneratedParser as GrammarParser 9 from pegen.testutil import parse_string 10 from pegen.first_sets import FirstSetCalculator 11 from pegen.grammar import Grammar 12 13 14class TestFirstSets(unittest.TestCase): 15 def calculate_first_sets(self, grammar_source: str) -> Dict[str, Set[str]]: 16 grammar: Grammar = parse_string(grammar_source, GrammarParser) 17 return FirstSetCalculator(grammar.rules).calculate() 18 19 def test_alternatives(self) -> None: 20 grammar = """ 21 start: expr NEWLINE? ENDMARKER 22 expr: A | B 23 A: 'a' | '-' 24 B: 'b' | '+' 25 """ 26 self.assertEqual( 27 self.calculate_first_sets(grammar), 28 { 29 "A": {"'a'", "'-'"}, 30 "B": {"'+'", "'b'"}, 31 "expr": {"'+'", "'a'", "'b'", "'-'"}, 32 "start": {"'+'", "'a'", "'b'", "'-'"}, 33 }, 34 ) 35 36 def test_optionals(self) -> None: 37 grammar = """ 38 start: expr NEWLINE 39 expr: ['a'] ['b'] 'c' 40 """ 41 self.assertEqual( 42 self.calculate_first_sets(grammar), 43 { 44 "expr": {"'c'", "'a'", "'b'"}, 45 "start": {"'c'", "'a'", "'b'"}, 46 }, 47 ) 48 49 def test_repeat_with_separator(self) -> None: 50 grammar = """ 51 start: ','.thing+ NEWLINE 52 thing: NUMBER 53 """ 54 self.assertEqual( 55 self.calculate_first_sets(grammar), 56 {"thing": {"NUMBER"}, "start": {"NUMBER"}}, 57 ) 58 59 def test_optional_operator(self) -> None: 60 grammar = """ 61 start: sum NEWLINE 62 sum: (term)? 'b' 63 term: NUMBER 64 """ 65 self.assertEqual( 66 self.calculate_first_sets(grammar), 67 { 68 "term": {"NUMBER"}, 69 "sum": {"NUMBER", "'b'"}, 70 "start": {"'b'", "NUMBER"}, 71 }, 72 ) 73 74 def test_optional_literal(self) -> None: 75 grammar = """ 76 start: sum NEWLINE 77 sum: '+' ? term 78 term: NUMBER 79 """ 80 self.assertEqual( 81 self.calculate_first_sets(grammar), 82 { 83 "term": {"NUMBER"}, 84 "sum": {"'+'", "NUMBER"}, 85 "start": {"'+'", "NUMBER"}, 86 }, 87 ) 88 89 def test_optional_after(self) -> None: 90 grammar = """ 91 start: term NEWLINE 92 term: NUMBER ['+'] 93 """ 94 self.assertEqual( 95 self.calculate_first_sets(grammar), 96 {"term": {"NUMBER"}, "start": {"NUMBER"}}, 97 ) 98 99 def test_optional_before(self) -> None: 100 grammar = """ 101 start: term NEWLINE 102 term: ['+'] NUMBER 103 """ 104 self.assertEqual( 105 self.calculate_first_sets(grammar), 106 {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}}, 107 ) 108 109 def test_repeat_0(self) -> None: 110 grammar = """ 111 start: thing* "+" NEWLINE 112 thing: NUMBER 113 """ 114 self.assertEqual( 115 self.calculate_first_sets(grammar), 116 {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}}, 117 ) 118 119 def test_repeat_0_with_group(self) -> None: 120 grammar = """ 121 start: ('+' '-')* term NEWLINE 122 term: NUMBER 123 """ 124 self.assertEqual( 125 self.calculate_first_sets(grammar), 126 {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}}, 127 ) 128 129 def test_repeat_1(self) -> None: 130 grammar = """ 131 start: thing+ '-' NEWLINE 132 thing: NUMBER 133 """ 134 self.assertEqual( 135 self.calculate_first_sets(grammar), 136 {"thing": {"NUMBER"}, "start": {"NUMBER"}}, 137 ) 138 139 def test_repeat_1_with_group(self) -> None: 140 grammar = """ 141 start: ('+' term)+ term NEWLINE 142 term: NUMBER 143 """ 144 self.assertEqual( 145 self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}} 146 ) 147 148 def test_gather(self) -> None: 149 grammar = """ 150 start: ','.thing+ NEWLINE 151 thing: NUMBER 152 """ 153 self.assertEqual( 154 self.calculate_first_sets(grammar), 155 {"thing": {"NUMBER"}, "start": {"NUMBER"}}, 156 ) 157 158 def test_positive_lookahead(self) -> None: 159 grammar = """ 160 start: expr NEWLINE 161 expr: &'a' opt 162 opt: 'a' | 'b' | 'c' 163 """ 164 self.assertEqual( 165 self.calculate_first_sets(grammar), 166 { 167 "expr": {"'a'"}, 168 "start": {"'a'"}, 169 "opt": {"'b'", "'c'", "'a'"}, 170 }, 171 ) 172 173 def test_negative_lookahead(self) -> None: 174 grammar = """ 175 start: expr NEWLINE 176 expr: !'a' opt 177 opt: 'a' | 'b' | 'c' 178 """ 179 self.assertEqual( 180 self.calculate_first_sets(grammar), 181 { 182 "opt": {"'b'", "'a'", "'c'"}, 183 "expr": {"'b'", "'c'"}, 184 "start": {"'b'", "'c'"}, 185 }, 186 ) 187 188 def test_left_recursion(self) -> None: 189 grammar = """ 190 start: expr NEWLINE 191 expr: ('-' term | expr '+' term | term) 192 term: NUMBER 193 foo: 'foo' 194 bar: 'bar' 195 baz: 'baz' 196 """ 197 self.assertEqual( 198 self.calculate_first_sets(grammar), 199 { 200 "expr": {"NUMBER", "'-'"}, 201 "term": {"NUMBER"}, 202 "start": {"NUMBER", "'-'"}, 203 "foo": {"'foo'"}, 204 "bar": {"'bar'"}, 205 "baz": {"'baz'"}, 206 }, 207 ) 208 209 def test_advance_left_recursion(self) -> None: 210 grammar = """ 211 start: NUMBER | sign start 212 sign: ['-'] 213 """ 214 self.assertEqual( 215 self.calculate_first_sets(grammar), 216 {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}}, 217 ) 218 219 def test_mutual_left_recursion(self) -> None: 220 grammar = """ 221 start: foo 'E' 222 foo: bar 'A' | 'B' 223 bar: foo 'C' | 'D' 224 """ 225 self.assertEqual( 226 self.calculate_first_sets(grammar), 227 { 228 "foo": {"'D'", "'B'"}, 229 "bar": {"'D'"}, 230 "start": {"'D'", "'B'"}, 231 }, 232 ) 233 234 def test_nasty_left_recursion(self) -> None: 235 # TODO: Validate this 236 grammar = """ 237 start: target '=' 238 target: maybe '+' | NAME 239 maybe: maybe '-' | target 240 """ 241 self.assertEqual( 242 self.calculate_first_sets(grammar), 243 {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}}, 244 ) 245 246 def test_nullable_rule(self) -> None: 247 grammar = """ 248 start: sign thing $ 249 sign: ['-'] 250 thing: NUMBER 251 """ 252 self.assertEqual( 253 self.calculate_first_sets(grammar), 254 { 255 "sign": {"", "'-'"}, 256 "thing": {"NUMBER"}, 257 "start": {"NUMBER", "'-'"}, 258 }, 259 ) 260 261 def test_epsilon_production_in_start_rule(self) -> None: 262 grammar = """ 263 start: ['-'] $ 264 """ 265 self.assertEqual( 266 self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}} 267 ) 268 269 def test_multiple_nullable_rules(self) -> None: 270 grammar = """ 271 start: sign thing other another $ 272 sign: ['-'] 273 thing: ['+'] 274 other: '*' 275 another: '/' 276 """ 277 self.assertEqual( 278 self.calculate_first_sets(grammar), 279 { 280 "sign": {"", "'-'"}, 281 "thing": {"'+'", ""}, 282 "start": {"'+'", "'-'", "'*'"}, 283 "other": {"'*'"}, 284 "another": {"'/'"}, 285 }, 286 ) 287