xref: /aosp_15_r20/external/emboss/compiler/front_end/tokenizer_test.py (revision 99e0aae7469b87d12f0ad23e61142c2d74c1ef70)
1*99e0aae7SDavid Rees# Copyright 2019 Google LLC
2*99e0aae7SDavid Rees#
3*99e0aae7SDavid Rees# Licensed under the Apache License, Version 2.0 (the "License");
4*99e0aae7SDavid Rees# you may not use this file except in compliance with the License.
5*99e0aae7SDavid Rees# You may obtain a copy of the License at
6*99e0aae7SDavid Rees#
7*99e0aae7SDavid Rees#     https://www.apache.org/licenses/LICENSE-2.0
8*99e0aae7SDavid Rees#
9*99e0aae7SDavid Rees# Unless required by applicable law or agreed to in writing, software
10*99e0aae7SDavid Rees# distributed under the License is distributed on an "AS IS" BASIS,
11*99e0aae7SDavid Rees# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*99e0aae7SDavid Rees# See the License for the specific language governing permissions and
13*99e0aae7SDavid Rees# limitations under the License.
14*99e0aae7SDavid Rees
15*99e0aae7SDavid Rees"""Tests for tokenizer."""
16*99e0aae7SDavid Rees
17*99e0aae7SDavid Reesimport unittest
18*99e0aae7SDavid Reesfrom compiler.front_end import tokenizer
19*99e0aae7SDavid Reesfrom compiler.util import error
20*99e0aae7SDavid Reesfrom compiler.util import parser_types
21*99e0aae7SDavid Rees
22*99e0aae7SDavid Rees
23*99e0aae7SDavid Reesdef _token_symbols(token_list):
24*99e0aae7SDavid Rees  """Given a list of tokens, returns a list of their symbol names."""
25*99e0aae7SDavid Rees  return [token.symbol for token in token_list]
26*99e0aae7SDavid Rees
27*99e0aae7SDavid Rees
28*99e0aae7SDavid Reesclass TokenizerTest(unittest.TestCase):
29*99e0aae7SDavid Rees  """Tests for the tokenizer.tokenize function."""
30*99e0aae7SDavid Rees
31*99e0aae7SDavid Rees  def test_bad_indent_tab_versus_space(self):
32*99e0aae7SDavid Rees    # A bad indent is one that doesn't match a previous unmatched indent.
33*99e0aae7SDavid Rees    tokens, errors = tokenizer.tokenize(" a\n\tb", "file")
34*99e0aae7SDavid Rees    self.assertFalse(tokens)
35*99e0aae7SDavid Rees    self.assertEqual([[error.error("file", parser_types.make_location(
36*99e0aae7SDavid Rees        (2, 1), (2, 2)), "Bad indentation")]], errors)
37*99e0aae7SDavid Rees
38*99e0aae7SDavid Rees  def test_bad_indent_tab_versus_eight_spaces(self):
39*99e0aae7SDavid Rees    tokens, errors = tokenizer.tokenize("        a\n\tb", "file")
40*99e0aae7SDavid Rees    self.assertFalse(tokens)
41*99e0aae7SDavid Rees    self.assertEqual([[error.error("file", parser_types.make_location(
42*99e0aae7SDavid Rees        (2, 1), (2, 2)), "Bad indentation")]], errors)
43*99e0aae7SDavid Rees
44*99e0aae7SDavid Rees  def test_bad_indent_tab_versus_four_spaces(self):
45*99e0aae7SDavid Rees    tokens, errors = tokenizer.tokenize("    a\n\tb", "file")
46*99e0aae7SDavid Rees    self.assertFalse(tokens)
47*99e0aae7SDavid Rees    self.assertEqual([[error.error("file", parser_types.make_location(
48*99e0aae7SDavid Rees        (2, 1), (2, 2)), "Bad indentation")]], errors)
49*99e0aae7SDavid Rees
50*99e0aae7SDavid Rees  def test_bad_indent_two_spaces_versus_one_space(self):
51*99e0aae7SDavid Rees    tokens, errors = tokenizer.tokenize("  a\n b", "file")
52*99e0aae7SDavid Rees    self.assertFalse(tokens)
53*99e0aae7SDavid Rees    self.assertEqual([[error.error("file", parser_types.make_location(
54*99e0aae7SDavid Rees        (2, 1), (2, 2)), "Bad indentation")]], errors)
55*99e0aae7SDavid Rees
56*99e0aae7SDavid Rees  def test_bad_indent_matches_closed_indent(self):
57*99e0aae7SDavid Rees    tokens, errors = tokenizer.tokenize(" a\nb\n  c\n d", "file")
58*99e0aae7SDavid Rees    self.assertFalse(tokens)
59*99e0aae7SDavid Rees    self.assertEqual([[error.error("file", parser_types.make_location(
60*99e0aae7SDavid Rees        (4, 1), (4, 2)), "Bad indentation")]], errors)
61*99e0aae7SDavid Rees
62*99e0aae7SDavid Rees  def test_bad_string_after_string_with_escaped_backslash_at_end(self):
63*99e0aae7SDavid Rees    tokens, errors = tokenizer.tokenize(r'"\\""', "name")
64*99e0aae7SDavid Rees    self.assertFalse(tokens)
65*99e0aae7SDavid Rees    self.assertEqual([[error.error("name", parser_types.make_location(
66*99e0aae7SDavid Rees        (1, 5), (1, 6)), "Unrecognized token")]], errors)
67*99e0aae7SDavid Rees
68*99e0aae7SDavid Rees
69*99e0aae7SDavid Reesdef _make_short_token_match_tests():
70*99e0aae7SDavid Rees  """Makes tests for short, simple tokenization cases."""
71*99e0aae7SDavid Rees  eol = '"\\n"'
72*99e0aae7SDavid Rees  cases = {
73*99e0aae7SDavid Rees      "Cam": ["CamelWord", eol],
74*99e0aae7SDavid Rees      "Ca9": ["CamelWord", eol],
75*99e0aae7SDavid Rees      "CanB": ["CamelWord", eol],
76*99e0aae7SDavid Rees      "CanBee": ["CamelWord", eol],
77*99e0aae7SDavid Rees      "CBa": ["CamelWord", eol],
78*99e0aae7SDavid Rees      "cam": ["SnakeWord", eol],
79*99e0aae7SDavid Rees      "ca9": ["SnakeWord", eol],
80*99e0aae7SDavid Rees      "can_b": ["SnakeWord", eol],
81*99e0aae7SDavid Rees      "can_bee": ["SnakeWord", eol],
82*99e0aae7SDavid Rees      "c_ba": ["SnakeWord", eol],
83*99e0aae7SDavid Rees      "cba_": ["SnakeWord", eol],
84*99e0aae7SDavid Rees      "c_b_a_": ["SnakeWord", eol],
85*99e0aae7SDavid Rees      "CAM": ["ShoutyWord", eol],
86*99e0aae7SDavid Rees      "CA9": ["ShoutyWord", eol],
87*99e0aae7SDavid Rees      "CAN_B": ["ShoutyWord", eol],
88*99e0aae7SDavid Rees      "CAN_BEE": ["ShoutyWord", eol],
89*99e0aae7SDavid Rees      "C_BA": ["ShoutyWord", eol],
90*99e0aae7SDavid Rees      "C": ["BadWord", eol],
91*99e0aae7SDavid Rees      "C1": ["BadWord", eol],
92*99e0aae7SDavid Rees      "c": ["SnakeWord", eol],
93*99e0aae7SDavid Rees      "$": ["BadWord", eol],
94*99e0aae7SDavid Rees      "_": ["BadWord", eol],
95*99e0aae7SDavid Rees      "_a": ["BadWord", eol],
96*99e0aae7SDavid Rees      "_A": ["BadWord", eol],
97*99e0aae7SDavid Rees      "Cb_A": ["BadWord", eol],
98*99e0aae7SDavid Rees      "aCb": ["BadWord", eol],
99*99e0aae7SDavid Rees      "a  b": ["SnakeWord", "SnakeWord", eol],
100*99e0aae7SDavid Rees      "a\tb": ["SnakeWord", "SnakeWord", eol],
101*99e0aae7SDavid Rees      "a \t b ": ["SnakeWord", "SnakeWord", eol],
102*99e0aae7SDavid Rees      " \t ": [eol],
103*99e0aae7SDavid Rees      "a #b": ["SnakeWord", "Comment", eol],
104*99e0aae7SDavid Rees      "a#": ["SnakeWord", "Comment", eol],
105*99e0aae7SDavid Rees      "# b": ["Comment", eol],
106*99e0aae7SDavid Rees      "    # b": ["Comment", eol],
107*99e0aae7SDavid Rees      "    #": ["Comment", eol],
108*99e0aae7SDavid Rees      "": [],
109*99e0aae7SDavid Rees      "\n": [eol],
110*99e0aae7SDavid Rees      "\na": [eol, "SnakeWord", eol],
111*99e0aae7SDavid Rees      "a--example": ["SnakeWord", "BadDocumentation", eol],
112*99e0aae7SDavid Rees      "a ---- example": ["SnakeWord", "BadDocumentation", eol],
113*99e0aae7SDavid Rees      "a --- example": ["SnakeWord", "BadDocumentation", eol],
114*99e0aae7SDavid Rees      "a-- example": ["SnakeWord", "Documentation", eol],
115*99e0aae7SDavid Rees      "a --    -- example": ["SnakeWord", "Documentation", eol],
116*99e0aae7SDavid Rees      "a -- - example": ["SnakeWord", "Documentation", eol],
117*99e0aae7SDavid Rees      "--": ["Documentation", eol],
118*99e0aae7SDavid Rees      "-- ": ["Documentation", eol],
119*99e0aae7SDavid Rees      "--  ": ["Documentation", eol],
120*99e0aae7SDavid Rees      "$default": ['"$default"', eol],
121*99e0aae7SDavid Rees      "$defaultx": ["BadWord", eol],
122*99e0aae7SDavid Rees      "$def": ["BadWord", eol],
123*99e0aae7SDavid Rees      "x$default": ["BadWord", eol],
124*99e0aae7SDavid Rees      "9$default": ["BadWord", eol],
125*99e0aae7SDavid Rees      "struct": ['"struct"', eol],
126*99e0aae7SDavid Rees      "external": ['"external"', eol],
127*99e0aae7SDavid Rees      "bits": ['"bits"', eol],
128*99e0aae7SDavid Rees      "enum": ['"enum"', eol],
129*99e0aae7SDavid Rees      "as": ['"as"', eol],
130*99e0aae7SDavid Rees      "import": ['"import"', eol],
131*99e0aae7SDavid Rees      "true": ["BooleanConstant", eol],
132*99e0aae7SDavid Rees      "false": ["BooleanConstant", eol],
133*99e0aae7SDavid Rees      "truex": ["SnakeWord", eol],
134*99e0aae7SDavid Rees      "falsex": ["SnakeWord", eol],
135*99e0aae7SDavid Rees      "structx": ["SnakeWord", eol],
136*99e0aae7SDavid Rees      "bitsx": ["SnakeWord", eol],
137*99e0aae7SDavid Rees      "enumx": ["SnakeWord", eol],
138*99e0aae7SDavid Rees      "0b": ["BadNumber", eol],
139*99e0aae7SDavid Rees      "0x": ["BadNumber", eol],
140*99e0aae7SDavid Rees      "0b011101": ["Number", eol],
141*99e0aae7SDavid Rees      "0b0": ["Number", eol],
142*99e0aae7SDavid Rees      "0b0111_1111_0000": ["Number", eol],
143*99e0aae7SDavid Rees      "0b00_000_00": ["BadNumber", eol],
144*99e0aae7SDavid Rees      "0b0_0_0": ["BadNumber", eol],
145*99e0aae7SDavid Rees      "0b0111012": ["BadNumber", eol],
146*99e0aae7SDavid Rees      "0b011101x": ["BadWord", eol],
147*99e0aae7SDavid Rees      "0b011101b": ["BadNumber", eol],
148*99e0aae7SDavid Rees      "0B0": ["BadNumber", eol],
149*99e0aae7SDavid Rees      "0X0": ["BadNumber", eol],
150*99e0aae7SDavid Rees      "0b_": ["BadNumber", eol],
151*99e0aae7SDavid Rees      "0x_": ["BadNumber", eol],
152*99e0aae7SDavid Rees      "0b__": ["BadNumber", eol],
153*99e0aae7SDavid Rees      "0x__": ["BadNumber", eol],
154*99e0aae7SDavid Rees      "0b_0000": ["Number", eol],
155*99e0aae7SDavid Rees      "0b0000_": ["BadNumber", eol],
156*99e0aae7SDavid Rees      "0b00_____00": ["BadNumber", eol],
157*99e0aae7SDavid Rees      "0x00_000_00": ["BadNumber", eol],
158*99e0aae7SDavid Rees      "0x0_0_0": ["BadNumber", eol],
159*99e0aae7SDavid Rees      "0b____0____": ["BadNumber", eol],
160*99e0aae7SDavid Rees      "0b00000000000000000000": ["Number", eol],
161*99e0aae7SDavid Rees      "0b_00000000": ["Number", eol],
162*99e0aae7SDavid Rees      "0b0000_0000_0000": ["Number", eol],
163*99e0aae7SDavid Rees      "0b000_0000_0000": ["Number", eol],
164*99e0aae7SDavid Rees      "0b00_0000_0000": ["Number", eol],
165*99e0aae7SDavid Rees      "0b0_0000_0000": ["Number", eol],
166*99e0aae7SDavid Rees      "0b_0000_0000_0000": ["Number", eol],
167*99e0aae7SDavid Rees      "0b_000_0000_0000": ["Number", eol],
168*99e0aae7SDavid Rees      "0b_00_0000_0000": ["Number", eol],
169*99e0aae7SDavid Rees      "0b_0_0000_0000": ["Number", eol],
170*99e0aae7SDavid Rees      "0b00000000_00000000_00000000": ["Number", eol],
171*99e0aae7SDavid Rees      "0b0000000_00000000_00000000": ["Number", eol],
172*99e0aae7SDavid Rees      "0b000000_00000000_00000000": ["Number", eol],
173*99e0aae7SDavid Rees      "0b00000_00000000_00000000": ["Number", eol],
174*99e0aae7SDavid Rees      "0b0000_00000000_00000000": ["Number", eol],
175*99e0aae7SDavid Rees      "0b000_00000000_00000000": ["Number", eol],
176*99e0aae7SDavid Rees      "0b00_00000000_00000000": ["Number", eol],
177*99e0aae7SDavid Rees      "0b0_00000000_00000000": ["Number", eol],
178*99e0aae7SDavid Rees      "0b_00000000_00000000_00000000": ["Number", eol],
179*99e0aae7SDavid Rees      "0b_0000000_00000000_00000000": ["Number", eol],
180*99e0aae7SDavid Rees      "0b_000000_00000000_00000000": ["Number", eol],
181*99e0aae7SDavid Rees      "0b_00000_00000000_00000000": ["Number", eol],
182*99e0aae7SDavid Rees      "0b_0000_00000000_00000000": ["Number", eol],
183*99e0aae7SDavid Rees      "0b_000_00000000_00000000": ["Number", eol],
184*99e0aae7SDavid Rees      "0b_00_00000000_00000000": ["Number", eol],
185*99e0aae7SDavid Rees      "0b_0_00000000_00000000": ["Number", eol],
186*99e0aae7SDavid Rees      "0x0": ["Number", eol],
187*99e0aae7SDavid Rees      "0x00000000000000000000": ["Number", eol],
188*99e0aae7SDavid Rees      "0x_0000": ["Number", eol],
189*99e0aae7SDavid Rees      "0x_00000000": ["Number", eol],
190*99e0aae7SDavid Rees      "0x0000_0000_0000": ["Number", eol],
191*99e0aae7SDavid Rees      "0x000_0000_0000": ["Number", eol],
192*99e0aae7SDavid Rees      "0x00_0000_0000": ["Number", eol],
193*99e0aae7SDavid Rees      "0x0_0000_0000": ["Number", eol],
194*99e0aae7SDavid Rees      "0x_0000_0000_0000": ["Number", eol],
195*99e0aae7SDavid Rees      "0x_000_0000_0000": ["Number", eol],
196*99e0aae7SDavid Rees      "0x_00_0000_0000": ["Number", eol],
197*99e0aae7SDavid Rees      "0x_0_0000_0000": ["Number", eol],
198*99e0aae7SDavid Rees      "0x00000000_00000000_00000000": ["Number", eol],
199*99e0aae7SDavid Rees      "0x0000000_00000000_00000000": ["Number", eol],
200*99e0aae7SDavid Rees      "0x000000_00000000_00000000": ["Number", eol],
201*99e0aae7SDavid Rees      "0x00000_00000000_00000000": ["Number", eol],
202*99e0aae7SDavid Rees      "0x0000_00000000_00000000": ["Number", eol],
203*99e0aae7SDavid Rees      "0x000_00000000_00000000": ["Number", eol],
204*99e0aae7SDavid Rees      "0x00_00000000_00000000": ["Number", eol],
205*99e0aae7SDavid Rees      "0x0_00000000_00000000": ["Number", eol],
206*99e0aae7SDavid Rees      "0x_00000000_00000000_00000000": ["Number", eol],
207*99e0aae7SDavid Rees      "0x_0000000_00000000_00000000": ["Number", eol],
208*99e0aae7SDavid Rees      "0x_000000_00000000_00000000": ["Number", eol],
209*99e0aae7SDavid Rees      "0x_00000_00000000_00000000": ["Number", eol],
210*99e0aae7SDavid Rees      "0x_0000_00000000_00000000": ["Number", eol],
211*99e0aae7SDavid Rees      "0x_000_00000000_00000000": ["Number", eol],
212*99e0aae7SDavid Rees      "0x_00_00000000_00000000": ["Number", eol],
213*99e0aae7SDavid Rees      "0x_0_00000000_00000000": ["Number", eol],
214*99e0aae7SDavid Rees      "0x__00000000_00000000": ["BadNumber", eol],
215*99e0aae7SDavid Rees      "0x00000000_00000000_0000": ["BadNumber", eol],
216*99e0aae7SDavid Rees      "0x00000000_0000_0000": ["BadNumber", eol],
217*99e0aae7SDavid Rees      "0x_00000000000000000000": ["BadNumber", eol],
218*99e0aae7SDavid Rees      "0b_00000000000000000000": ["BadNumber", eol],
219*99e0aae7SDavid Rees      "0b00000000_00000000_0000": ["BadNumber", eol],
220*99e0aae7SDavid Rees      "0b00000000_0000_0000": ["BadNumber", eol],
221*99e0aae7SDavid Rees      "0x0000_": ["BadNumber", eol],
222*99e0aae7SDavid Rees      "0x00_____00": ["BadNumber", eol],
223*99e0aae7SDavid Rees      "0x____0____": ["BadNumber", eol],
224*99e0aae7SDavid Rees      "EmbossReserved": ["BadWord", eol],
225*99e0aae7SDavid Rees      "EmbossReservedA": ["BadWord", eol],
226*99e0aae7SDavid Rees      "EmbossReserved_": ["BadWord", eol],
227*99e0aae7SDavid Rees      "EMBOSS_RESERVED": ["BadWord", eol],
228*99e0aae7SDavid Rees      "EMBOSS_RESERVED_": ["BadWord", eol],
229*99e0aae7SDavid Rees      "EMBOSS_RESERVEDA": ["BadWord", eol],
230*99e0aae7SDavid Rees      "emboss_reserved": ["BadWord", eol],
231*99e0aae7SDavid Rees      "emboss_reserved_": ["BadWord", eol],
232*99e0aae7SDavid Rees      "emboss_reserveda": ["BadWord", eol],
233*99e0aae7SDavid Rees      "0x0123456789abcdefABCDEF": ["Number", eol],
234*99e0aae7SDavid Rees      "0": ["Number", eol],
235*99e0aae7SDavid Rees      "1": ["Number", eol],
236*99e0aae7SDavid Rees      "1a": ["BadNumber", eol],
237*99e0aae7SDavid Rees      "1g": ["BadWord", eol],
238*99e0aae7SDavid Rees      "1234567890": ["Number", eol],
239*99e0aae7SDavid Rees      "1_234_567_890": ["Number", eol],
240*99e0aae7SDavid Rees      "234_567_890": ["Number", eol],
241*99e0aae7SDavid Rees      "34_567_890": ["Number", eol],
242*99e0aae7SDavid Rees      "4_567_890": ["Number", eol],
243*99e0aae7SDavid Rees      "1_2_3_4_5_6_7_8_9_0": ["BadNumber", eol],
244*99e0aae7SDavid Rees      "1234567890_": ["BadNumber", eol],
245*99e0aae7SDavid Rees      "1__234567890": ["BadNumber", eol],
246*99e0aae7SDavid Rees      "_1234567890": ["BadWord", eol],
247*99e0aae7SDavid Rees      "[]": ['"["', '"]"', eol],
248*99e0aae7SDavid Rees      "()": ['"("', '")"', eol],
249*99e0aae7SDavid Rees      "..": ['"."', '"."', eol],
250*99e0aae7SDavid Rees      "...": ['"."', '"."', '"."', eol],
251*99e0aae7SDavid Rees      "....": ['"."', '"."', '"."', '"."', eol],
252*99e0aae7SDavid Rees      '"abc"': ["String", eol],
253*99e0aae7SDavid Rees      '""': ["String", eol],
254*99e0aae7SDavid Rees      r'"\\"': ["String", eol],
255*99e0aae7SDavid Rees      r'"\""': ["String", eol],
256*99e0aae7SDavid Rees      r'"\n"': ["String", eol],
257*99e0aae7SDavid Rees      r'"\\n"': ["String", eol],
258*99e0aae7SDavid Rees      r'"\\xyz"': ["String", eol],
259*99e0aae7SDavid Rees      r'"\\\\"': ["String", eol],
260*99e0aae7SDavid Rees  }
261*99e0aae7SDavid Rees  for c in ("[ ] ( ) ? : = + - * . == != < <= > >= && || , $max $present "
262*99e0aae7SDavid Rees            "$upper_bound $lower_bound $size_in_bits $size_in_bytes "
263*99e0aae7SDavid Rees            "$max_size_in_bits $max_size_in_bytes $min_size_in_bits "
264*99e0aae7SDavid Rees            "$min_size_in_bytes "
265*99e0aae7SDavid Rees            "$default struct bits enum external import as if let").split():
266*99e0aae7SDavid Rees    cases[c] = ['"' + c + '"', eol]
267*99e0aae7SDavid Rees
268*99e0aae7SDavid Rees  def make_test_case(case):
269*99e0aae7SDavid Rees
270*99e0aae7SDavid Rees    def test_case(self):
271*99e0aae7SDavid Rees      tokens, errors = tokenizer.tokenize(case, "name")
272*99e0aae7SDavid Rees      symbols = _token_symbols(tokens)
273*99e0aae7SDavid Rees      self.assertFalse(errors)
274*99e0aae7SDavid Rees      self.assertEqual(symbols, cases[case])
275*99e0aae7SDavid Rees
276*99e0aae7SDavid Rees    return test_case
277*99e0aae7SDavid Rees
278*99e0aae7SDavid Rees  for c in cases:
279*99e0aae7SDavid Rees    setattr(TokenizerTest, "testShortTokenMatch{!r}".format(c),
280*99e0aae7SDavid Rees            make_test_case(c))
281*99e0aae7SDavid Rees
282*99e0aae7SDavid Rees
283*99e0aae7SDavid Reesdef _make_bad_char_tests():
284*99e0aae7SDavid Rees  """Makes tests that an error is returned for bad characters."""
285*99e0aae7SDavid Rees
286*99e0aae7SDavid Rees  def make_test_case(case):
287*99e0aae7SDavid Rees
288*99e0aae7SDavid Rees    def test_case(self):
289*99e0aae7SDavid Rees      tokens, errors = tokenizer.tokenize(case, "name")
290*99e0aae7SDavid Rees      self.assertFalse(tokens)
291*99e0aae7SDavid Rees      self.assertEqual([[error.error("name", parser_types.make_location(
292*99e0aae7SDavid Rees          (1, 1), (1, 2)), "Unrecognized token")]], errors)
293*99e0aae7SDavid Rees
294*99e0aae7SDavid Rees    return test_case
295*99e0aae7SDavid Rees
296*99e0aae7SDavid Rees  for c in "~`!@%^&\\|;'\"/{}":
297*99e0aae7SDavid Rees    setattr(TokenizerTest, "testBadChar{!r}".format(c), make_test_case(c))
298*99e0aae7SDavid Rees
299*99e0aae7SDavid Rees
300*99e0aae7SDavid Reesdef _make_bad_string_tests():
301*99e0aae7SDavid Rees  """Makes tests that an error is returned for bad strings."""
302*99e0aae7SDavid Rees  bad_strings = (r'"\"', '"\\\n"', r'"\\\"', r'"', r'"\q"', r'"\\\q"')
303*99e0aae7SDavid Rees
304*99e0aae7SDavid Rees  def make_test_case(string):
305*99e0aae7SDavid Rees
306*99e0aae7SDavid Rees    def test_case(self):
307*99e0aae7SDavid Rees      tokens, errors = tokenizer.tokenize(string, "name")
308*99e0aae7SDavid Rees      self.assertFalse(tokens)
309*99e0aae7SDavid Rees      self.assertEqual([[error.error("name", parser_types.make_location(
310*99e0aae7SDavid Rees          (1, 1), (1, 2)), "Unrecognized token")]], errors)
311*99e0aae7SDavid Rees
312*99e0aae7SDavid Rees    return test_case
313*99e0aae7SDavid Rees
314*99e0aae7SDavid Rees  for s in bad_strings:
315*99e0aae7SDavid Rees    setattr(TokenizerTest, "testBadString{!r}".format(s), make_test_case(s))
316*99e0aae7SDavid Rees
317*99e0aae7SDavid Rees
318*99e0aae7SDavid Reesdef _make_multiline_tests():
319*99e0aae7SDavid Rees  """Makes tests for indent/dedent insertion and eol insertion."""
320*99e0aae7SDavid Rees
321*99e0aae7SDavid Rees  c = "Comment"
322*99e0aae7SDavid Rees  eol = '"\\n"'
323*99e0aae7SDavid Rees  sw = "SnakeWord"
324*99e0aae7SDavid Rees  ind = "Indent"
325*99e0aae7SDavid Rees  ded = "Dedent"
326*99e0aae7SDavid Rees  cases = {
327*99e0aae7SDavid Rees      "a\nb\n": [sw, eol, sw, eol],
328*99e0aae7SDavid Rees      "a\n\nb\n": [sw, eol, eol, sw, eol],
329*99e0aae7SDavid Rees      "a\n#foo\nb\n": [sw, eol, c, eol, sw, eol],
330*99e0aae7SDavid Rees      "a\n   #foo\nb\n": [sw, eol, c, eol, sw, eol],
331*99e0aae7SDavid Rees      "a\n b\n": [sw, eol, ind, sw, eol, ded],
332*99e0aae7SDavid Rees      "a\n b\n\n": [sw, eol, ind, sw, eol, eol, ded],
333*99e0aae7SDavid Rees      "a\n b\n  c\n": [sw, eol, ind, sw, eol, ind, sw, eol, ded, ded],
334*99e0aae7SDavid Rees      "a\n b\n c\n": [sw, eol, ind, sw, eol, sw, eol, ded],
335*99e0aae7SDavid Rees      "a\n b\n\n c\n": [sw, eol, ind, sw, eol, eol, sw, eol, ded],
336*99e0aae7SDavid Rees      "a\n b\n    #\n c\n": [sw, eol, ind, sw, eol, c, eol, sw, eol, ded],
337*99e0aae7SDavid Rees      "a\n\tb\n    #\n\tc\n": [sw, eol, ind, sw, eol, c, eol, sw, eol, ded],
338*99e0aae7SDavid Rees      " a\n  b\n   c\n d\n": [ind, sw, eol, ind, sw, eol, ind, sw, eol, ded,
339*99e0aae7SDavid Rees                              ded, sw, eol, ded],
340*99e0aae7SDavid Rees  }
341*99e0aae7SDavid Rees
342*99e0aae7SDavid Rees  def make_test_case(case):
343*99e0aae7SDavid Rees
344*99e0aae7SDavid Rees    def test_case(self):
345*99e0aae7SDavid Rees      tokens, errors = tokenizer.tokenize(case, "file")
346*99e0aae7SDavid Rees      self.assertFalse(errors)
347*99e0aae7SDavid Rees      self.assertEqual(_token_symbols(tokens), cases[case])
348*99e0aae7SDavid Rees
349*99e0aae7SDavid Rees    return test_case
350*99e0aae7SDavid Rees
351*99e0aae7SDavid Rees  for c in cases:
352*99e0aae7SDavid Rees    setattr(TokenizerTest, "testMultiline{!r}".format(c), make_test_case(c))
353*99e0aae7SDavid Rees
354*99e0aae7SDavid Rees
355*99e0aae7SDavid Reesdef _make_offset_tests():
356*99e0aae7SDavid Rees  """Makes tests that the tokenizer fills in correct source locations."""
357*99e0aae7SDavid Rees  cases = {
358*99e0aae7SDavid Rees      "a+": ["1:1-1:2", "1:2-1:3", "1:3-1:3"],
359*99e0aae7SDavid Rees      "a   +   ": ["1:1-1:2", "1:5-1:6", "1:9-1:9"],
360*99e0aae7SDavid Rees      "a\n\nb": ["1:1-1:2", "1:2-1:2", "2:1-2:1", "3:1-3:2", "3:2-3:2"],
361*99e0aae7SDavid Rees      "a\n  b": ["1:1-1:2", "1:2-1:2", "2:1-2:3", "2:3-2:4", "2:4-2:4",
362*99e0aae7SDavid Rees                 "3:1-3:1"],
363*99e0aae7SDavid Rees      "a\n  b\nc": ["1:1-1:2", "1:2-1:2", "2:1-2:3", "2:3-2:4", "2:4-2:4",
364*99e0aae7SDavid Rees                    "3:1-3:1", "3:1-3:2", "3:2-3:2"],
365*99e0aae7SDavid Rees      "a\n b\n  c": ["1:1-1:2", "1:2-1:2", "2:1-2:2", "2:2-2:3", "2:3-2:3",
366*99e0aae7SDavid Rees                     "3:2-3:3", "3:3-3:4", "3:4-3:4", "4:1-4:1", "4:1-4:1"],
367*99e0aae7SDavid Rees  }
368*99e0aae7SDavid Rees
369*99e0aae7SDavid Rees  def make_test_case(case):
370*99e0aae7SDavid Rees
371*99e0aae7SDavid Rees    def test_case(self):
372*99e0aae7SDavid Rees      self.assertEqual([parser_types.format_location(l.source_location)
373*99e0aae7SDavid Rees                        for l in tokenizer.tokenize(case, "file")[0]],
374*99e0aae7SDavid Rees                       cases[case])
375*99e0aae7SDavid Rees
376*99e0aae7SDavid Rees    return test_case
377*99e0aae7SDavid Rees
378*99e0aae7SDavid Rees  for c in cases:
379*99e0aae7SDavid Rees    setattr(TokenizerTest, "testOffset{!r}".format(c), make_test_case(c))
380*99e0aae7SDavid Rees
381*99e0aae7SDavid Rees_make_short_token_match_tests()
382*99e0aae7SDavid Rees_make_bad_char_tests()
383*99e0aae7SDavid Rees_make_bad_string_tests()
384*99e0aae7SDavid Rees_make_multiline_tests()
385*99e0aae7SDavid Rees_make_offset_tests()
386*99e0aae7SDavid Rees
387*99e0aae7SDavid Reesif __name__ == "__main__":
388*99e0aae7SDavid Rees  unittest.main()
389