1*99e0aae7SDavid Rees# Copyright 2019 Google LLC 2*99e0aae7SDavid Rees# 3*99e0aae7SDavid Rees# Licensed under the Apache License, Version 2.0 (the "License"); 4*99e0aae7SDavid Rees# you may not use this file except in compliance with the License. 5*99e0aae7SDavid Rees# You may obtain a copy of the License at 6*99e0aae7SDavid Rees# 7*99e0aae7SDavid Rees# https://www.apache.org/licenses/LICENSE-2.0 8*99e0aae7SDavid Rees# 9*99e0aae7SDavid Rees# Unless required by applicable law or agreed to in writing, software 10*99e0aae7SDavid Rees# distributed under the License is distributed on an "AS IS" BASIS, 11*99e0aae7SDavid Rees# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*99e0aae7SDavid Rees# See the License for the specific language governing permissions and 13*99e0aae7SDavid Rees# limitations under the License. 14*99e0aae7SDavid Rees 15*99e0aae7SDavid Rees"""Tests for tokenizer.""" 16*99e0aae7SDavid Rees 17*99e0aae7SDavid Reesimport unittest 18*99e0aae7SDavid Reesfrom compiler.front_end import tokenizer 19*99e0aae7SDavid Reesfrom compiler.util import error 20*99e0aae7SDavid Reesfrom compiler.util import parser_types 21*99e0aae7SDavid Rees 22*99e0aae7SDavid Rees 23*99e0aae7SDavid Reesdef _token_symbols(token_list): 24*99e0aae7SDavid Rees """Given a list of tokens, returns a list of their symbol names.""" 25*99e0aae7SDavid Rees return [token.symbol for token in token_list] 26*99e0aae7SDavid Rees 27*99e0aae7SDavid Rees 28*99e0aae7SDavid Reesclass TokenizerTest(unittest.TestCase): 29*99e0aae7SDavid Rees """Tests for the tokenizer.tokenize function.""" 30*99e0aae7SDavid Rees 31*99e0aae7SDavid Rees def test_bad_indent_tab_versus_space(self): 32*99e0aae7SDavid Rees # A bad indent is one that doesn't match a previous unmatched indent. 33*99e0aae7SDavid Rees tokens, errors = tokenizer.tokenize(" a\n\tb", "file") 34*99e0aae7SDavid Rees self.assertFalse(tokens) 35*99e0aae7SDavid Rees self.assertEqual([[error.error("file", parser_types.make_location( 36*99e0aae7SDavid Rees (2, 1), (2, 2)), "Bad indentation")]], errors) 37*99e0aae7SDavid Rees 38*99e0aae7SDavid Rees def test_bad_indent_tab_versus_eight_spaces(self): 39*99e0aae7SDavid Rees tokens, errors = tokenizer.tokenize(" a\n\tb", "file") 40*99e0aae7SDavid Rees self.assertFalse(tokens) 41*99e0aae7SDavid Rees self.assertEqual([[error.error("file", parser_types.make_location( 42*99e0aae7SDavid Rees (2, 1), (2, 2)), "Bad indentation")]], errors) 43*99e0aae7SDavid Rees 44*99e0aae7SDavid Rees def test_bad_indent_tab_versus_four_spaces(self): 45*99e0aae7SDavid Rees tokens, errors = tokenizer.tokenize(" a\n\tb", "file") 46*99e0aae7SDavid Rees self.assertFalse(tokens) 47*99e0aae7SDavid Rees self.assertEqual([[error.error("file", parser_types.make_location( 48*99e0aae7SDavid Rees (2, 1), (2, 2)), "Bad indentation")]], errors) 49*99e0aae7SDavid Rees 50*99e0aae7SDavid Rees def test_bad_indent_two_spaces_versus_one_space(self): 51*99e0aae7SDavid Rees tokens, errors = tokenizer.tokenize(" a\n b", "file") 52*99e0aae7SDavid Rees self.assertFalse(tokens) 53*99e0aae7SDavid Rees self.assertEqual([[error.error("file", parser_types.make_location( 54*99e0aae7SDavid Rees (2, 1), (2, 2)), "Bad indentation")]], errors) 55*99e0aae7SDavid Rees 56*99e0aae7SDavid Rees def test_bad_indent_matches_closed_indent(self): 57*99e0aae7SDavid Rees tokens, errors = tokenizer.tokenize(" a\nb\n c\n d", "file") 58*99e0aae7SDavid Rees self.assertFalse(tokens) 59*99e0aae7SDavid Rees self.assertEqual([[error.error("file", parser_types.make_location( 60*99e0aae7SDavid Rees (4, 1), (4, 2)), "Bad indentation")]], errors) 61*99e0aae7SDavid Rees 62*99e0aae7SDavid Rees def test_bad_string_after_string_with_escaped_backslash_at_end(self): 63*99e0aae7SDavid Rees tokens, errors = tokenizer.tokenize(r'"\\""', "name") 64*99e0aae7SDavid Rees self.assertFalse(tokens) 65*99e0aae7SDavid Rees self.assertEqual([[error.error("name", parser_types.make_location( 66*99e0aae7SDavid Rees (1, 5), (1, 6)), "Unrecognized token")]], errors) 67*99e0aae7SDavid Rees 68*99e0aae7SDavid Rees 69*99e0aae7SDavid Reesdef _make_short_token_match_tests(): 70*99e0aae7SDavid Rees """Makes tests for short, simple tokenization cases.""" 71*99e0aae7SDavid Rees eol = '"\\n"' 72*99e0aae7SDavid Rees cases = { 73*99e0aae7SDavid Rees "Cam": ["CamelWord", eol], 74*99e0aae7SDavid Rees "Ca9": ["CamelWord", eol], 75*99e0aae7SDavid Rees "CanB": ["CamelWord", eol], 76*99e0aae7SDavid Rees "CanBee": ["CamelWord", eol], 77*99e0aae7SDavid Rees "CBa": ["CamelWord", eol], 78*99e0aae7SDavid Rees "cam": ["SnakeWord", eol], 79*99e0aae7SDavid Rees "ca9": ["SnakeWord", eol], 80*99e0aae7SDavid Rees "can_b": ["SnakeWord", eol], 81*99e0aae7SDavid Rees "can_bee": ["SnakeWord", eol], 82*99e0aae7SDavid Rees "c_ba": ["SnakeWord", eol], 83*99e0aae7SDavid Rees "cba_": ["SnakeWord", eol], 84*99e0aae7SDavid Rees "c_b_a_": ["SnakeWord", eol], 85*99e0aae7SDavid Rees "CAM": ["ShoutyWord", eol], 86*99e0aae7SDavid Rees "CA9": ["ShoutyWord", eol], 87*99e0aae7SDavid Rees "CAN_B": ["ShoutyWord", eol], 88*99e0aae7SDavid Rees "CAN_BEE": ["ShoutyWord", eol], 89*99e0aae7SDavid Rees "C_BA": ["ShoutyWord", eol], 90*99e0aae7SDavid Rees "C": ["BadWord", eol], 91*99e0aae7SDavid Rees "C1": ["BadWord", eol], 92*99e0aae7SDavid Rees "c": ["SnakeWord", eol], 93*99e0aae7SDavid Rees "$": ["BadWord", eol], 94*99e0aae7SDavid Rees "_": ["BadWord", eol], 95*99e0aae7SDavid Rees "_a": ["BadWord", eol], 96*99e0aae7SDavid Rees "_A": ["BadWord", eol], 97*99e0aae7SDavid Rees "Cb_A": ["BadWord", eol], 98*99e0aae7SDavid Rees "aCb": ["BadWord", eol], 99*99e0aae7SDavid Rees "a b": ["SnakeWord", "SnakeWord", eol], 100*99e0aae7SDavid Rees "a\tb": ["SnakeWord", "SnakeWord", eol], 101*99e0aae7SDavid Rees "a \t b ": ["SnakeWord", "SnakeWord", eol], 102*99e0aae7SDavid Rees " \t ": [eol], 103*99e0aae7SDavid Rees "a #b": ["SnakeWord", "Comment", eol], 104*99e0aae7SDavid Rees "a#": ["SnakeWord", "Comment", eol], 105*99e0aae7SDavid Rees "# b": ["Comment", eol], 106*99e0aae7SDavid Rees " # b": ["Comment", eol], 107*99e0aae7SDavid Rees " #": ["Comment", eol], 108*99e0aae7SDavid Rees "": [], 109*99e0aae7SDavid Rees "\n": [eol], 110*99e0aae7SDavid Rees "\na": [eol, "SnakeWord", eol], 111*99e0aae7SDavid Rees "a--example": ["SnakeWord", "BadDocumentation", eol], 112*99e0aae7SDavid Rees "a ---- example": ["SnakeWord", "BadDocumentation", eol], 113*99e0aae7SDavid Rees "a --- example": ["SnakeWord", "BadDocumentation", eol], 114*99e0aae7SDavid Rees "a-- example": ["SnakeWord", "Documentation", eol], 115*99e0aae7SDavid Rees "a -- -- example": ["SnakeWord", "Documentation", eol], 116*99e0aae7SDavid Rees "a -- - example": ["SnakeWord", "Documentation", eol], 117*99e0aae7SDavid Rees "--": ["Documentation", eol], 118*99e0aae7SDavid Rees "-- ": ["Documentation", eol], 119*99e0aae7SDavid Rees "-- ": ["Documentation", eol], 120*99e0aae7SDavid Rees "$default": ['"$default"', eol], 121*99e0aae7SDavid Rees "$defaultx": ["BadWord", eol], 122*99e0aae7SDavid Rees "$def": ["BadWord", eol], 123*99e0aae7SDavid Rees "x$default": ["BadWord", eol], 124*99e0aae7SDavid Rees "9$default": ["BadWord", eol], 125*99e0aae7SDavid Rees "struct": ['"struct"', eol], 126*99e0aae7SDavid Rees "external": ['"external"', eol], 127*99e0aae7SDavid Rees "bits": ['"bits"', eol], 128*99e0aae7SDavid Rees "enum": ['"enum"', eol], 129*99e0aae7SDavid Rees "as": ['"as"', eol], 130*99e0aae7SDavid Rees "import": ['"import"', eol], 131*99e0aae7SDavid Rees "true": ["BooleanConstant", eol], 132*99e0aae7SDavid Rees "false": ["BooleanConstant", eol], 133*99e0aae7SDavid Rees "truex": ["SnakeWord", eol], 134*99e0aae7SDavid Rees "falsex": ["SnakeWord", eol], 135*99e0aae7SDavid Rees "structx": ["SnakeWord", eol], 136*99e0aae7SDavid Rees "bitsx": ["SnakeWord", eol], 137*99e0aae7SDavid Rees "enumx": ["SnakeWord", eol], 138*99e0aae7SDavid Rees "0b": ["BadNumber", eol], 139*99e0aae7SDavid Rees "0x": ["BadNumber", eol], 140*99e0aae7SDavid Rees "0b011101": ["Number", eol], 141*99e0aae7SDavid Rees "0b0": ["Number", eol], 142*99e0aae7SDavid Rees "0b0111_1111_0000": ["Number", eol], 143*99e0aae7SDavid Rees "0b00_000_00": ["BadNumber", eol], 144*99e0aae7SDavid Rees "0b0_0_0": ["BadNumber", eol], 145*99e0aae7SDavid Rees "0b0111012": ["BadNumber", eol], 146*99e0aae7SDavid Rees "0b011101x": ["BadWord", eol], 147*99e0aae7SDavid Rees "0b011101b": ["BadNumber", eol], 148*99e0aae7SDavid Rees "0B0": ["BadNumber", eol], 149*99e0aae7SDavid Rees "0X0": ["BadNumber", eol], 150*99e0aae7SDavid Rees "0b_": ["BadNumber", eol], 151*99e0aae7SDavid Rees "0x_": ["BadNumber", eol], 152*99e0aae7SDavid Rees "0b__": ["BadNumber", eol], 153*99e0aae7SDavid Rees "0x__": ["BadNumber", eol], 154*99e0aae7SDavid Rees "0b_0000": ["Number", eol], 155*99e0aae7SDavid Rees "0b0000_": ["BadNumber", eol], 156*99e0aae7SDavid Rees "0b00_____00": ["BadNumber", eol], 157*99e0aae7SDavid Rees "0x00_000_00": ["BadNumber", eol], 158*99e0aae7SDavid Rees "0x0_0_0": ["BadNumber", eol], 159*99e0aae7SDavid Rees "0b____0____": ["BadNumber", eol], 160*99e0aae7SDavid Rees "0b00000000000000000000": ["Number", eol], 161*99e0aae7SDavid Rees "0b_00000000": ["Number", eol], 162*99e0aae7SDavid Rees "0b0000_0000_0000": ["Number", eol], 163*99e0aae7SDavid Rees "0b000_0000_0000": ["Number", eol], 164*99e0aae7SDavid Rees "0b00_0000_0000": ["Number", eol], 165*99e0aae7SDavid Rees "0b0_0000_0000": ["Number", eol], 166*99e0aae7SDavid Rees "0b_0000_0000_0000": ["Number", eol], 167*99e0aae7SDavid Rees "0b_000_0000_0000": ["Number", eol], 168*99e0aae7SDavid Rees "0b_00_0000_0000": ["Number", eol], 169*99e0aae7SDavid Rees "0b_0_0000_0000": ["Number", eol], 170*99e0aae7SDavid Rees "0b00000000_00000000_00000000": ["Number", eol], 171*99e0aae7SDavid Rees "0b0000000_00000000_00000000": ["Number", eol], 172*99e0aae7SDavid Rees "0b000000_00000000_00000000": ["Number", eol], 173*99e0aae7SDavid Rees "0b00000_00000000_00000000": ["Number", eol], 174*99e0aae7SDavid Rees "0b0000_00000000_00000000": ["Number", eol], 175*99e0aae7SDavid Rees "0b000_00000000_00000000": ["Number", eol], 176*99e0aae7SDavid Rees "0b00_00000000_00000000": ["Number", eol], 177*99e0aae7SDavid Rees "0b0_00000000_00000000": ["Number", eol], 178*99e0aae7SDavid Rees "0b_00000000_00000000_00000000": ["Number", eol], 179*99e0aae7SDavid Rees "0b_0000000_00000000_00000000": ["Number", eol], 180*99e0aae7SDavid Rees "0b_000000_00000000_00000000": ["Number", eol], 181*99e0aae7SDavid Rees "0b_00000_00000000_00000000": ["Number", eol], 182*99e0aae7SDavid Rees "0b_0000_00000000_00000000": ["Number", eol], 183*99e0aae7SDavid Rees "0b_000_00000000_00000000": ["Number", eol], 184*99e0aae7SDavid Rees "0b_00_00000000_00000000": ["Number", eol], 185*99e0aae7SDavid Rees "0b_0_00000000_00000000": ["Number", eol], 186*99e0aae7SDavid Rees "0x0": ["Number", eol], 187*99e0aae7SDavid Rees "0x00000000000000000000": ["Number", eol], 188*99e0aae7SDavid Rees "0x_0000": ["Number", eol], 189*99e0aae7SDavid Rees "0x_00000000": ["Number", eol], 190*99e0aae7SDavid Rees "0x0000_0000_0000": ["Number", eol], 191*99e0aae7SDavid Rees "0x000_0000_0000": ["Number", eol], 192*99e0aae7SDavid Rees "0x00_0000_0000": ["Number", eol], 193*99e0aae7SDavid Rees "0x0_0000_0000": ["Number", eol], 194*99e0aae7SDavid Rees "0x_0000_0000_0000": ["Number", eol], 195*99e0aae7SDavid Rees "0x_000_0000_0000": ["Number", eol], 196*99e0aae7SDavid Rees "0x_00_0000_0000": ["Number", eol], 197*99e0aae7SDavid Rees "0x_0_0000_0000": ["Number", eol], 198*99e0aae7SDavid Rees "0x00000000_00000000_00000000": ["Number", eol], 199*99e0aae7SDavid Rees "0x0000000_00000000_00000000": ["Number", eol], 200*99e0aae7SDavid Rees "0x000000_00000000_00000000": ["Number", eol], 201*99e0aae7SDavid Rees "0x00000_00000000_00000000": ["Number", eol], 202*99e0aae7SDavid Rees "0x0000_00000000_00000000": ["Number", eol], 203*99e0aae7SDavid Rees "0x000_00000000_00000000": ["Number", eol], 204*99e0aae7SDavid Rees "0x00_00000000_00000000": ["Number", eol], 205*99e0aae7SDavid Rees "0x0_00000000_00000000": ["Number", eol], 206*99e0aae7SDavid Rees "0x_00000000_00000000_00000000": ["Number", eol], 207*99e0aae7SDavid Rees "0x_0000000_00000000_00000000": ["Number", eol], 208*99e0aae7SDavid Rees "0x_000000_00000000_00000000": ["Number", eol], 209*99e0aae7SDavid Rees "0x_00000_00000000_00000000": ["Number", eol], 210*99e0aae7SDavid Rees "0x_0000_00000000_00000000": ["Number", eol], 211*99e0aae7SDavid Rees "0x_000_00000000_00000000": ["Number", eol], 212*99e0aae7SDavid Rees "0x_00_00000000_00000000": ["Number", eol], 213*99e0aae7SDavid Rees "0x_0_00000000_00000000": ["Number", eol], 214*99e0aae7SDavid Rees "0x__00000000_00000000": ["BadNumber", eol], 215*99e0aae7SDavid Rees "0x00000000_00000000_0000": ["BadNumber", eol], 216*99e0aae7SDavid Rees "0x00000000_0000_0000": ["BadNumber", eol], 217*99e0aae7SDavid Rees "0x_00000000000000000000": ["BadNumber", eol], 218*99e0aae7SDavid Rees "0b_00000000000000000000": ["BadNumber", eol], 219*99e0aae7SDavid Rees "0b00000000_00000000_0000": ["BadNumber", eol], 220*99e0aae7SDavid Rees "0b00000000_0000_0000": ["BadNumber", eol], 221*99e0aae7SDavid Rees "0x0000_": ["BadNumber", eol], 222*99e0aae7SDavid Rees "0x00_____00": ["BadNumber", eol], 223*99e0aae7SDavid Rees "0x____0____": ["BadNumber", eol], 224*99e0aae7SDavid Rees "EmbossReserved": ["BadWord", eol], 225*99e0aae7SDavid Rees "EmbossReservedA": ["BadWord", eol], 226*99e0aae7SDavid Rees "EmbossReserved_": ["BadWord", eol], 227*99e0aae7SDavid Rees "EMBOSS_RESERVED": ["BadWord", eol], 228*99e0aae7SDavid Rees "EMBOSS_RESERVED_": ["BadWord", eol], 229*99e0aae7SDavid Rees "EMBOSS_RESERVEDA": ["BadWord", eol], 230*99e0aae7SDavid Rees "emboss_reserved": ["BadWord", eol], 231*99e0aae7SDavid Rees "emboss_reserved_": ["BadWord", eol], 232*99e0aae7SDavid Rees "emboss_reserveda": ["BadWord", eol], 233*99e0aae7SDavid Rees "0x0123456789abcdefABCDEF": ["Number", eol], 234*99e0aae7SDavid Rees "0": ["Number", eol], 235*99e0aae7SDavid Rees "1": ["Number", eol], 236*99e0aae7SDavid Rees "1a": ["BadNumber", eol], 237*99e0aae7SDavid Rees "1g": ["BadWord", eol], 238*99e0aae7SDavid Rees "1234567890": ["Number", eol], 239*99e0aae7SDavid Rees "1_234_567_890": ["Number", eol], 240*99e0aae7SDavid Rees "234_567_890": ["Number", eol], 241*99e0aae7SDavid Rees "34_567_890": ["Number", eol], 242*99e0aae7SDavid Rees "4_567_890": ["Number", eol], 243*99e0aae7SDavid Rees "1_2_3_4_5_6_7_8_9_0": ["BadNumber", eol], 244*99e0aae7SDavid Rees "1234567890_": ["BadNumber", eol], 245*99e0aae7SDavid Rees "1__234567890": ["BadNumber", eol], 246*99e0aae7SDavid Rees "_1234567890": ["BadWord", eol], 247*99e0aae7SDavid Rees "[]": ['"["', '"]"', eol], 248*99e0aae7SDavid Rees "()": ['"("', '")"', eol], 249*99e0aae7SDavid Rees "..": ['"."', '"."', eol], 250*99e0aae7SDavid Rees "...": ['"."', '"."', '"."', eol], 251*99e0aae7SDavid Rees "....": ['"."', '"."', '"."', '"."', eol], 252*99e0aae7SDavid Rees '"abc"': ["String", eol], 253*99e0aae7SDavid Rees '""': ["String", eol], 254*99e0aae7SDavid Rees r'"\\"': ["String", eol], 255*99e0aae7SDavid Rees r'"\""': ["String", eol], 256*99e0aae7SDavid Rees r'"\n"': ["String", eol], 257*99e0aae7SDavid Rees r'"\\n"': ["String", eol], 258*99e0aae7SDavid Rees r'"\\xyz"': ["String", eol], 259*99e0aae7SDavid Rees r'"\\\\"': ["String", eol], 260*99e0aae7SDavid Rees } 261*99e0aae7SDavid Rees for c in ("[ ] ( ) ? : = + - * . == != < <= > >= && || , $max $present " 262*99e0aae7SDavid Rees "$upper_bound $lower_bound $size_in_bits $size_in_bytes " 263*99e0aae7SDavid Rees "$max_size_in_bits $max_size_in_bytes $min_size_in_bits " 264*99e0aae7SDavid Rees "$min_size_in_bytes " 265*99e0aae7SDavid Rees "$default struct bits enum external import as if let").split(): 266*99e0aae7SDavid Rees cases[c] = ['"' + c + '"', eol] 267*99e0aae7SDavid Rees 268*99e0aae7SDavid Rees def make_test_case(case): 269*99e0aae7SDavid Rees 270*99e0aae7SDavid Rees def test_case(self): 271*99e0aae7SDavid Rees tokens, errors = tokenizer.tokenize(case, "name") 272*99e0aae7SDavid Rees symbols = _token_symbols(tokens) 273*99e0aae7SDavid Rees self.assertFalse(errors) 274*99e0aae7SDavid Rees self.assertEqual(symbols, cases[case]) 275*99e0aae7SDavid Rees 276*99e0aae7SDavid Rees return test_case 277*99e0aae7SDavid Rees 278*99e0aae7SDavid Rees for c in cases: 279*99e0aae7SDavid Rees setattr(TokenizerTest, "testShortTokenMatch{!r}".format(c), 280*99e0aae7SDavid Rees make_test_case(c)) 281*99e0aae7SDavid Rees 282*99e0aae7SDavid Rees 283*99e0aae7SDavid Reesdef _make_bad_char_tests(): 284*99e0aae7SDavid Rees """Makes tests that an error is returned for bad characters.""" 285*99e0aae7SDavid Rees 286*99e0aae7SDavid Rees def make_test_case(case): 287*99e0aae7SDavid Rees 288*99e0aae7SDavid Rees def test_case(self): 289*99e0aae7SDavid Rees tokens, errors = tokenizer.tokenize(case, "name") 290*99e0aae7SDavid Rees self.assertFalse(tokens) 291*99e0aae7SDavid Rees self.assertEqual([[error.error("name", parser_types.make_location( 292*99e0aae7SDavid Rees (1, 1), (1, 2)), "Unrecognized token")]], errors) 293*99e0aae7SDavid Rees 294*99e0aae7SDavid Rees return test_case 295*99e0aae7SDavid Rees 296*99e0aae7SDavid Rees for c in "~`!@%^&\\|;'\"/{}": 297*99e0aae7SDavid Rees setattr(TokenizerTest, "testBadChar{!r}".format(c), make_test_case(c)) 298*99e0aae7SDavid Rees 299*99e0aae7SDavid Rees 300*99e0aae7SDavid Reesdef _make_bad_string_tests(): 301*99e0aae7SDavid Rees """Makes tests that an error is returned for bad strings.""" 302*99e0aae7SDavid Rees bad_strings = (r'"\"', '"\\\n"', r'"\\\"', r'"', r'"\q"', r'"\\\q"') 303*99e0aae7SDavid Rees 304*99e0aae7SDavid Rees def make_test_case(string): 305*99e0aae7SDavid Rees 306*99e0aae7SDavid Rees def test_case(self): 307*99e0aae7SDavid Rees tokens, errors = tokenizer.tokenize(string, "name") 308*99e0aae7SDavid Rees self.assertFalse(tokens) 309*99e0aae7SDavid Rees self.assertEqual([[error.error("name", parser_types.make_location( 310*99e0aae7SDavid Rees (1, 1), (1, 2)), "Unrecognized token")]], errors) 311*99e0aae7SDavid Rees 312*99e0aae7SDavid Rees return test_case 313*99e0aae7SDavid Rees 314*99e0aae7SDavid Rees for s in bad_strings: 315*99e0aae7SDavid Rees setattr(TokenizerTest, "testBadString{!r}".format(s), make_test_case(s)) 316*99e0aae7SDavid Rees 317*99e0aae7SDavid Rees 318*99e0aae7SDavid Reesdef _make_multiline_tests(): 319*99e0aae7SDavid Rees """Makes tests for indent/dedent insertion and eol insertion.""" 320*99e0aae7SDavid Rees 321*99e0aae7SDavid Rees c = "Comment" 322*99e0aae7SDavid Rees eol = '"\\n"' 323*99e0aae7SDavid Rees sw = "SnakeWord" 324*99e0aae7SDavid Rees ind = "Indent" 325*99e0aae7SDavid Rees ded = "Dedent" 326*99e0aae7SDavid Rees cases = { 327*99e0aae7SDavid Rees "a\nb\n": [sw, eol, sw, eol], 328*99e0aae7SDavid Rees "a\n\nb\n": [sw, eol, eol, sw, eol], 329*99e0aae7SDavid Rees "a\n#foo\nb\n": [sw, eol, c, eol, sw, eol], 330*99e0aae7SDavid Rees "a\n #foo\nb\n": [sw, eol, c, eol, sw, eol], 331*99e0aae7SDavid Rees "a\n b\n": [sw, eol, ind, sw, eol, ded], 332*99e0aae7SDavid Rees "a\n b\n\n": [sw, eol, ind, sw, eol, eol, ded], 333*99e0aae7SDavid Rees "a\n b\n c\n": [sw, eol, ind, sw, eol, ind, sw, eol, ded, ded], 334*99e0aae7SDavid Rees "a\n b\n c\n": [sw, eol, ind, sw, eol, sw, eol, ded], 335*99e0aae7SDavid Rees "a\n b\n\n c\n": [sw, eol, ind, sw, eol, eol, sw, eol, ded], 336*99e0aae7SDavid Rees "a\n b\n #\n c\n": [sw, eol, ind, sw, eol, c, eol, sw, eol, ded], 337*99e0aae7SDavid Rees "a\n\tb\n #\n\tc\n": [sw, eol, ind, sw, eol, c, eol, sw, eol, ded], 338*99e0aae7SDavid Rees " a\n b\n c\n d\n": [ind, sw, eol, ind, sw, eol, ind, sw, eol, ded, 339*99e0aae7SDavid Rees ded, sw, eol, ded], 340*99e0aae7SDavid Rees } 341*99e0aae7SDavid Rees 342*99e0aae7SDavid Rees def make_test_case(case): 343*99e0aae7SDavid Rees 344*99e0aae7SDavid Rees def test_case(self): 345*99e0aae7SDavid Rees tokens, errors = tokenizer.tokenize(case, "file") 346*99e0aae7SDavid Rees self.assertFalse(errors) 347*99e0aae7SDavid Rees self.assertEqual(_token_symbols(tokens), cases[case]) 348*99e0aae7SDavid Rees 349*99e0aae7SDavid Rees return test_case 350*99e0aae7SDavid Rees 351*99e0aae7SDavid Rees for c in cases: 352*99e0aae7SDavid Rees setattr(TokenizerTest, "testMultiline{!r}".format(c), make_test_case(c)) 353*99e0aae7SDavid Rees 354*99e0aae7SDavid Rees 355*99e0aae7SDavid Reesdef _make_offset_tests(): 356*99e0aae7SDavid Rees """Makes tests that the tokenizer fills in correct source locations.""" 357*99e0aae7SDavid Rees cases = { 358*99e0aae7SDavid Rees "a+": ["1:1-1:2", "1:2-1:3", "1:3-1:3"], 359*99e0aae7SDavid Rees "a + ": ["1:1-1:2", "1:5-1:6", "1:9-1:9"], 360*99e0aae7SDavid Rees "a\n\nb": ["1:1-1:2", "1:2-1:2", "2:1-2:1", "3:1-3:2", "3:2-3:2"], 361*99e0aae7SDavid Rees "a\n b": ["1:1-1:2", "1:2-1:2", "2:1-2:3", "2:3-2:4", "2:4-2:4", 362*99e0aae7SDavid Rees "3:1-3:1"], 363*99e0aae7SDavid Rees "a\n b\nc": ["1:1-1:2", "1:2-1:2", "2:1-2:3", "2:3-2:4", "2:4-2:4", 364*99e0aae7SDavid Rees "3:1-3:1", "3:1-3:2", "3:2-3:2"], 365*99e0aae7SDavid Rees "a\n b\n c": ["1:1-1:2", "1:2-1:2", "2:1-2:2", "2:2-2:3", "2:3-2:3", 366*99e0aae7SDavid Rees "3:2-3:3", "3:3-3:4", "3:4-3:4", "4:1-4:1", "4:1-4:1"], 367*99e0aae7SDavid Rees } 368*99e0aae7SDavid Rees 369*99e0aae7SDavid Rees def make_test_case(case): 370*99e0aae7SDavid Rees 371*99e0aae7SDavid Rees def test_case(self): 372*99e0aae7SDavid Rees self.assertEqual([parser_types.format_location(l.source_location) 373*99e0aae7SDavid Rees for l in tokenizer.tokenize(case, "file")[0]], 374*99e0aae7SDavid Rees cases[case]) 375*99e0aae7SDavid Rees 376*99e0aae7SDavid Rees return test_case 377*99e0aae7SDavid Rees 378*99e0aae7SDavid Rees for c in cases: 379*99e0aae7SDavid Rees setattr(TokenizerTest, "testOffset{!r}".format(c), make_test_case(c)) 380*99e0aae7SDavid Rees 381*99e0aae7SDavid Rees_make_short_token_match_tests() 382*99e0aae7SDavid Rees_make_bad_char_tests() 383*99e0aae7SDavid Rees_make_bad_string_tests() 384*99e0aae7SDavid Rees_make_multiline_tests() 385*99e0aae7SDavid Rees_make_offset_tests() 386*99e0aae7SDavid Rees 387*99e0aae7SDavid Reesif __name__ == "__main__": 388*99e0aae7SDavid Rees unittest.main() 389