xref: /aosp_15_r20/external/emboss/compiler/front_end/parser.py (revision 99e0aae7469b87d12f0ad23e61142c2d74c1ef70)
1*99e0aae7SDavid Rees# Copyright 2019 Google LLC
2*99e0aae7SDavid Rees#
3*99e0aae7SDavid Rees# Licensed under the Apache License, Version 2.0 (the "License");
4*99e0aae7SDavid Rees# you may not use this file except in compliance with the License.
5*99e0aae7SDavid Rees# You may obtain a copy of the License at
6*99e0aae7SDavid Rees#
7*99e0aae7SDavid Rees#     https://www.apache.org/licenses/LICENSE-2.0
8*99e0aae7SDavid Rees#
9*99e0aae7SDavid Rees# Unless required by applicable law or agreed to in writing, software
10*99e0aae7SDavid Rees# distributed under the License is distributed on an "AS IS" BASIS,
11*99e0aae7SDavid Rees# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*99e0aae7SDavid Rees# See the License for the specific language governing permissions and
13*99e0aae7SDavid Rees# limitations under the License.
14*99e0aae7SDavid Rees
15*99e0aae7SDavid Rees"""Routines to generate a shift-reduce parser from the module_ir module."""
16*99e0aae7SDavid Rees
17*99e0aae7SDavid Reesfrom compiler.front_end import lr1
18*99e0aae7SDavid Reesfrom compiler.front_end import module_ir
19*99e0aae7SDavid Reesfrom compiler.front_end import tokenizer
20*99e0aae7SDavid Reesfrom compiler.util import resources
21*99e0aae7SDavid Reesfrom compiler.util import simple_memoizer
22*99e0aae7SDavid Rees
23*99e0aae7SDavid Rees
24*99e0aae7SDavid Reesclass ParserGenerationError(Exception):
25*99e0aae7SDavid Rees  """An error occurred during parser generation."""
26*99e0aae7SDavid Rees  pass
27*99e0aae7SDavid Rees
28*99e0aae7SDavid Rees
29*99e0aae7SDavid Reesdef parse_error_examples(error_example_text):
30*99e0aae7SDavid Rees  """Parses error examples from error_example_text.
31*99e0aae7SDavid Rees
32*99e0aae7SDavid Rees  Arguments:
33*99e0aae7SDavid Rees    error_example_text: The text of an error example file.
34*99e0aae7SDavid Rees
35*99e0aae7SDavid Rees  Returns:
36*99e0aae7SDavid Rees    A list of tuples, suitable for passing into generate_parser.
37*99e0aae7SDavid Rees
38*99e0aae7SDavid Rees  Raises:
39*99e0aae7SDavid Rees    ParserGenerationError: There is a problem parsing the error examples.
40*99e0aae7SDavid Rees  """
41*99e0aae7SDavid Rees  error_examples = error_example_text.split("\n" + "=" * 80 + "\n")
42*99e0aae7SDavid Rees  result = []
43*99e0aae7SDavid Rees  # Everything before the first "======" line is explanatory text: ignore it.
44*99e0aae7SDavid Rees  for error_example in error_examples[1:]:
45*99e0aae7SDavid Rees    message_and_examples = error_example.split("\n" + "-" * 80 + "\n")
46*99e0aae7SDavid Rees    if len(message_and_examples) != 2:
47*99e0aae7SDavid Rees      raise ParserGenerationError(
48*99e0aae7SDavid Rees          "Expected one error message and one example section in:\n" +
49*99e0aae7SDavid Rees          error_example)
50*99e0aae7SDavid Rees    message, example_text = message_and_examples
51*99e0aae7SDavid Rees    examples = example_text.split("\n---\n")
52*99e0aae7SDavid Rees    for example in examples:
53*99e0aae7SDavid Rees      # TODO(bolms): feed a line number into tokenize, so that tokenization
54*99e0aae7SDavid Rees      # failures refer to the correct line within error_example_text.
55*99e0aae7SDavid Rees      tokens, errors = tokenizer.tokenize(example, "")
56*99e0aae7SDavid Rees      if errors:
57*99e0aae7SDavid Rees        raise ParserGenerationError(str(errors))
58*99e0aae7SDavid Rees
59*99e0aae7SDavid Rees      for i in range(len(tokens)):
60*99e0aae7SDavid Rees        if tokens[i].symbol == "BadWord" and tokens[i].text == "$ANY":
61*99e0aae7SDavid Rees          tokens[i] = lr1.ANY_TOKEN
62*99e0aae7SDavid Rees
63*99e0aae7SDavid Rees      error_token = None
64*99e0aae7SDavid Rees      for i in range(len(tokens)):
65*99e0aae7SDavid Rees        if tokens[i].symbol == "BadWord" and tokens[i].text == "$ERR":
66*99e0aae7SDavid Rees          error_token = tokens[i + 1]
67*99e0aae7SDavid Rees          del tokens[i]
68*99e0aae7SDavid Rees          break
69*99e0aae7SDavid Rees      else:
70*99e0aae7SDavid Rees        raise ParserGenerationError(
71*99e0aae7SDavid Rees            "No error token marker '$ERR' in:\n" + error_example)
72*99e0aae7SDavid Rees
73*99e0aae7SDavid Rees      result.append((tokens, error_token, message.strip(), example))
74*99e0aae7SDavid Rees  return result
75*99e0aae7SDavid Rees
76*99e0aae7SDavid Rees
77*99e0aae7SDavid Reesdef generate_parser(start_symbol, productions, error_examples):
78*99e0aae7SDavid Rees  """Generates a parser from grammar, and applies error_examples.
79*99e0aae7SDavid Rees
80*99e0aae7SDavid Rees  Arguments:
81*99e0aae7SDavid Rees      start_symbol: the start symbol of the grammar (a string)
82*99e0aae7SDavid Rees      productions: a list of parser_types.Production in the grammar
83*99e0aae7SDavid Rees      error_examples: A list of (source tokens, error message, source text)
84*99e0aae7SDavid Rees          tuples.
85*99e0aae7SDavid Rees
86*99e0aae7SDavid Rees  Returns:
87*99e0aae7SDavid Rees      A parser.
88*99e0aae7SDavid Rees
89*99e0aae7SDavid Rees  Raises:
90*99e0aae7SDavid Rees      ParserGenerationError: There is a problem generating the parser.
91*99e0aae7SDavid Rees  """
92*99e0aae7SDavid Rees  parser = lr1.Grammar(start_symbol, productions).parser()
93*99e0aae7SDavid Rees  if parser.conflicts:
94*99e0aae7SDavid Rees    raise ParserGenerationError("\n".join([str(c) for c in parser.conflicts]))
95*99e0aae7SDavid Rees  for example in error_examples:
96*99e0aae7SDavid Rees    mark_result = parser.mark_error(example[0], example[1], example[2])
97*99e0aae7SDavid Rees    if mark_result:
98*99e0aae7SDavid Rees      raise ParserGenerationError(
99*99e0aae7SDavid Rees          "error marking example: {}\nExample:\n{}".format(
100*99e0aae7SDavid Rees              mark_result, example[3]))
101*99e0aae7SDavid Rees  return parser
102*99e0aae7SDavid Rees
103*99e0aae7SDavid Rees
104*99e0aae7SDavid Rees@simple_memoizer.memoize
105*99e0aae7SDavid Reesdef _load_module_parser():
106*99e0aae7SDavid Rees  error_examples = parse_error_examples(
107*99e0aae7SDavid Rees      resources.load("compiler.front_end", "error_examples"))
108*99e0aae7SDavid Rees  return generate_parser(module_ir.START_SYMBOL, module_ir.PRODUCTIONS,
109*99e0aae7SDavid Rees                         error_examples)
110*99e0aae7SDavid Rees
111*99e0aae7SDavid Rees
112*99e0aae7SDavid Rees@simple_memoizer.memoize
113*99e0aae7SDavid Reesdef _load_expression_parser():
114*99e0aae7SDavid Rees  return generate_parser(module_ir.EXPRESSION_START_SYMBOL,
115*99e0aae7SDavid Rees                         module_ir.PRODUCTIONS, [])
116*99e0aae7SDavid Rees
117*99e0aae7SDavid Rees
118*99e0aae7SDavid Reesdef parse_module(tokens):
119*99e0aae7SDavid Rees  """Parses the provided Emboss token list into an Emboss module parse tree."""
120*99e0aae7SDavid Rees  return _load_module_parser().parse(tokens)
121*99e0aae7SDavid Rees
122*99e0aae7SDavid Rees
123*99e0aae7SDavid Reesdef parse_expression(tokens):
124*99e0aae7SDavid Rees  """Parses the provided Emboss token list into an expression parse tree."""
125*99e0aae7SDavid Rees  return _load_expression_parser().parse(tokens)
126