1*99e0aae7SDavid Rees# Copyright 2019 Google LLC 2*99e0aae7SDavid Rees# 3*99e0aae7SDavid Rees# Licensed under the Apache License, Version 2.0 (the "License"); 4*99e0aae7SDavid Rees# you may not use this file except in compliance with the License. 5*99e0aae7SDavid Rees# You may obtain a copy of the License at 6*99e0aae7SDavid Rees# 7*99e0aae7SDavid Rees# https://www.apache.org/licenses/LICENSE-2.0 8*99e0aae7SDavid Rees# 9*99e0aae7SDavid Rees# Unless required by applicable law or agreed to in writing, software 10*99e0aae7SDavid Rees# distributed under the License is distributed on an "AS IS" BASIS, 11*99e0aae7SDavid Rees# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12*99e0aae7SDavid Rees# See the License for the specific language governing permissions and 13*99e0aae7SDavid Rees# limitations under the License. 14*99e0aae7SDavid Rees 15*99e0aae7SDavid Rees"""Routines to generate a shift-reduce parser from the module_ir module.""" 16*99e0aae7SDavid Rees 17*99e0aae7SDavid Reesfrom compiler.front_end import lr1 18*99e0aae7SDavid Reesfrom compiler.front_end import module_ir 19*99e0aae7SDavid Reesfrom compiler.front_end import tokenizer 20*99e0aae7SDavid Reesfrom compiler.util import resources 21*99e0aae7SDavid Reesfrom compiler.util import simple_memoizer 22*99e0aae7SDavid Rees 23*99e0aae7SDavid Rees 24*99e0aae7SDavid Reesclass ParserGenerationError(Exception): 25*99e0aae7SDavid Rees """An error occurred during parser generation.""" 26*99e0aae7SDavid Rees pass 27*99e0aae7SDavid Rees 28*99e0aae7SDavid Rees 29*99e0aae7SDavid Reesdef parse_error_examples(error_example_text): 30*99e0aae7SDavid Rees """Parses error examples from error_example_text. 31*99e0aae7SDavid Rees 32*99e0aae7SDavid Rees Arguments: 33*99e0aae7SDavid Rees error_example_text: The text of an error example file. 34*99e0aae7SDavid Rees 35*99e0aae7SDavid Rees Returns: 36*99e0aae7SDavid Rees A list of tuples, suitable for passing into generate_parser. 37*99e0aae7SDavid Rees 38*99e0aae7SDavid Rees Raises: 39*99e0aae7SDavid Rees ParserGenerationError: There is a problem parsing the error examples. 40*99e0aae7SDavid Rees """ 41*99e0aae7SDavid Rees error_examples = error_example_text.split("\n" + "=" * 80 + "\n") 42*99e0aae7SDavid Rees result = [] 43*99e0aae7SDavid Rees # Everything before the first "======" line is explanatory text: ignore it. 44*99e0aae7SDavid Rees for error_example in error_examples[1:]: 45*99e0aae7SDavid Rees message_and_examples = error_example.split("\n" + "-" * 80 + "\n") 46*99e0aae7SDavid Rees if len(message_and_examples) != 2: 47*99e0aae7SDavid Rees raise ParserGenerationError( 48*99e0aae7SDavid Rees "Expected one error message and one example section in:\n" + 49*99e0aae7SDavid Rees error_example) 50*99e0aae7SDavid Rees message, example_text = message_and_examples 51*99e0aae7SDavid Rees examples = example_text.split("\n---\n") 52*99e0aae7SDavid Rees for example in examples: 53*99e0aae7SDavid Rees # TODO(bolms): feed a line number into tokenize, so that tokenization 54*99e0aae7SDavid Rees # failures refer to the correct line within error_example_text. 55*99e0aae7SDavid Rees tokens, errors = tokenizer.tokenize(example, "") 56*99e0aae7SDavid Rees if errors: 57*99e0aae7SDavid Rees raise ParserGenerationError(str(errors)) 58*99e0aae7SDavid Rees 59*99e0aae7SDavid Rees for i in range(len(tokens)): 60*99e0aae7SDavid Rees if tokens[i].symbol == "BadWord" and tokens[i].text == "$ANY": 61*99e0aae7SDavid Rees tokens[i] = lr1.ANY_TOKEN 62*99e0aae7SDavid Rees 63*99e0aae7SDavid Rees error_token = None 64*99e0aae7SDavid Rees for i in range(len(tokens)): 65*99e0aae7SDavid Rees if tokens[i].symbol == "BadWord" and tokens[i].text == "$ERR": 66*99e0aae7SDavid Rees error_token = tokens[i + 1] 67*99e0aae7SDavid Rees del tokens[i] 68*99e0aae7SDavid Rees break 69*99e0aae7SDavid Rees else: 70*99e0aae7SDavid Rees raise ParserGenerationError( 71*99e0aae7SDavid Rees "No error token marker '$ERR' in:\n" + error_example) 72*99e0aae7SDavid Rees 73*99e0aae7SDavid Rees result.append((tokens, error_token, message.strip(), example)) 74*99e0aae7SDavid Rees return result 75*99e0aae7SDavid Rees 76*99e0aae7SDavid Rees 77*99e0aae7SDavid Reesdef generate_parser(start_symbol, productions, error_examples): 78*99e0aae7SDavid Rees """Generates a parser from grammar, and applies error_examples. 79*99e0aae7SDavid Rees 80*99e0aae7SDavid Rees Arguments: 81*99e0aae7SDavid Rees start_symbol: the start symbol of the grammar (a string) 82*99e0aae7SDavid Rees productions: a list of parser_types.Production in the grammar 83*99e0aae7SDavid Rees error_examples: A list of (source tokens, error message, source text) 84*99e0aae7SDavid Rees tuples. 85*99e0aae7SDavid Rees 86*99e0aae7SDavid Rees Returns: 87*99e0aae7SDavid Rees A parser. 88*99e0aae7SDavid Rees 89*99e0aae7SDavid Rees Raises: 90*99e0aae7SDavid Rees ParserGenerationError: There is a problem generating the parser. 91*99e0aae7SDavid Rees """ 92*99e0aae7SDavid Rees parser = lr1.Grammar(start_symbol, productions).parser() 93*99e0aae7SDavid Rees if parser.conflicts: 94*99e0aae7SDavid Rees raise ParserGenerationError("\n".join([str(c) for c in parser.conflicts])) 95*99e0aae7SDavid Rees for example in error_examples: 96*99e0aae7SDavid Rees mark_result = parser.mark_error(example[0], example[1], example[2]) 97*99e0aae7SDavid Rees if mark_result: 98*99e0aae7SDavid Rees raise ParserGenerationError( 99*99e0aae7SDavid Rees "error marking example: {}\nExample:\n{}".format( 100*99e0aae7SDavid Rees mark_result, example[3])) 101*99e0aae7SDavid Rees return parser 102*99e0aae7SDavid Rees 103*99e0aae7SDavid Rees 104*99e0aae7SDavid Rees@simple_memoizer.memoize 105*99e0aae7SDavid Reesdef _load_module_parser(): 106*99e0aae7SDavid Rees error_examples = parse_error_examples( 107*99e0aae7SDavid Rees resources.load("compiler.front_end", "error_examples")) 108*99e0aae7SDavid Rees return generate_parser(module_ir.START_SYMBOL, module_ir.PRODUCTIONS, 109*99e0aae7SDavid Rees error_examples) 110*99e0aae7SDavid Rees 111*99e0aae7SDavid Rees 112*99e0aae7SDavid Rees@simple_memoizer.memoize 113*99e0aae7SDavid Reesdef _load_expression_parser(): 114*99e0aae7SDavid Rees return generate_parser(module_ir.EXPRESSION_START_SYMBOL, 115*99e0aae7SDavid Rees module_ir.PRODUCTIONS, []) 116*99e0aae7SDavid Rees 117*99e0aae7SDavid Rees 118*99e0aae7SDavid Reesdef parse_module(tokens): 119*99e0aae7SDavid Rees """Parses the provided Emboss token list into an Emboss module parse tree.""" 120*99e0aae7SDavid Rees return _load_module_parser().parse(tokens) 121*99e0aae7SDavid Rees 122*99e0aae7SDavid Rees 123*99e0aae7SDavid Reesdef parse_expression(tokens): 124*99e0aae7SDavid Rees """Parses the provided Emboss token list into an expression parse tree.""" 125*99e0aae7SDavid Rees return _load_expression_parser().parse(tokens) 126