# Copyright 2019 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Intermediate representation (IR) for Emboss.

This is limited to purely data and type annotations.
"""

import dataclasses
import enum
import sys
from typing import ClassVar, Optional

from compiler.util import ir_data_fields


@dataclasses.dataclass
class Message:
  """Base class for IR data objects.

  Historically protocol buffers were used for serializing this data which has
  led to some legacy naming conventions and references. In particular this
  class is named `Message` in the sense of a protocol buffer message,
  indicating that it is intended to just be data that is used by other higher
  level services.

  There are some other legacy idioms leftover from the protocol buffer-based
  definition such as support for "oneof" and optional fields.
  """

  IR_DATACLASS: ClassVar[object] = object()
  field_specs: ClassVar[ir_data_fields.FilteredIrFieldSpecs]

  def __post_init__(self):
    """Called by dataclass subclasses after init.

    Post-processes any lists passed in to use our custom list type.
    """
    # Convert any lists passed in to CopyValuesList
    for spec in self.field_specs.sequence_field_specs:
      cur_val = getattr(self, spec.name)
      if isinstance(cur_val, ir_data_fields.TemporaryCopyValuesList):
        copy_val = cur_val.temp_list
      else:
        copy_val = ir_data_fields.CopyValuesList(spec.data_type)
        if cur_val:
          copy_val.shallow_copy(cur_val)
      setattr(self, spec.name, copy_val)

  # This hook adds a 15% overhead to end-to-end code generation in some cases
  # so we guard it in a `__debug__` block. Users can opt-out of this check by
  # running python with the `-O` flag, ie: `python3 -O ./embossc`.
  if __debug__:
    def __setattr__(self, name: str, value) -> None:
      """Debug-only hook that adds basic type checking for ir_data fields."""
      if spec := self.field_specs.all_field_specs.get(name):
        if not (
            # Check if it's the expected type
            isinstance(value, spec.data_type) or
            # Oneof fields are a special case
            spec.is_oneof or
            # Optional fields can be set to None
            (spec.container is ir_data_fields.FieldContainer.OPTIONAL and
                 value is None) or
            # Sequences can be a few variants of lists
            (spec.is_sequence and
                 isinstance(value, (
                    list, ir_data_fields.TemporaryCopyValuesList,
                    ir_data_fields.CopyValuesList))) or
            # An enum value can be an int
            (spec.is_enum and isinstance(value, int))):
          raise AttributeError(
            f"Cannot set {value} (type {value.__class__}) for type"
             "{spec.data_type}")
      object.__setattr__(self, name, value)

  # Non-PEP8 name to mimic the Google Protobuf interface.
  def HasField(self, name):  # pylint:disable=invalid-name
    """Indicates if this class has the given field defined and it is set."""
    return getattr(self, name, None) is not None

  # Non-PEP8 name to mimic the Google Protobuf interface.
  def WhichOneof(self, oneof_name):  # pylint:disable=invalid-name
    """Indicates which field has been set for the oneof value.

    Returns None if no field has been set.
    """
    for field_name, oneof in self.field_specs.oneof_mappings:
      if oneof == oneof_name and self.HasField(field_name):
        return field_name
    return None


################################################################################
# From here to the end of the file are actual structure definitions.


@dataclasses.dataclass
class Position(Message):
  """A zero-width position within a source file."""

  line: int = 0
  """Line (starts from 1)."""
  column: int = 0
  """Column (starts from 1)."""


@dataclasses.dataclass
class Location(Message):
  """A half-open start:end range within a source file."""

  start: Optional[Position] = None
  """Beginning of the range"""
  end: Optional[Position] = None
  """One column past the end of the range."""

  is_disjoint_from_parent: Optional[bool] = None
  """True if this Location is outside of the parent object's Location."""

  is_synthetic: Optional[bool] = None
  """True if this Location's parent was synthesized, and does not directly
  appear in the source file.

  The Emboss front end uses this field to cull
  irrelevant error messages.
  """


@dataclasses.dataclass
class Word(Message):
  """IR for a bare word in the source file.

  This is used in NameDefinitions and References.
  """

  text: Optional[str] = None
  source_location: Optional[Location] = None


@dataclasses.dataclass
class String(Message):
  """IR for a string in the source file."""

  text: Optional[str] = None
  source_location: Optional[Location] = None


@dataclasses.dataclass
class Documentation(Message):
  text: Optional[str] = None
  source_location: Optional[Location] = None


@dataclasses.dataclass
class BooleanConstant(Message):
  """IR for a boolean constant."""

  value: Optional[bool] = None
  source_location: Optional[Location] = None


@dataclasses.dataclass
class Empty(Message):
  """Placeholder message for automatic element counts for arrays."""

  source_location: Optional[Location] = None


@dataclasses.dataclass
class NumericConstant(Message):
  """IR for any numeric constant."""

  # Numeric constants are stored as decimal strings; this is the simplest way
  # to store the full -2**63..+2**64 range.
  #
  # TODO(bolms): switch back to int, and just use strings during
  # serialization, now that we're free of proto.
  value: Optional[str] = None
  source_location: Optional[Location] = None


class FunctionMapping(int, enum.Enum):
  """Enum of supported function types"""

  UNKNOWN = 0
  ADDITION = 1
  """`+`"""
  SUBTRACTION = 2
  """`-`"""
  MULTIPLICATION = 3
  """`*`"""
  EQUALITY = 4
  """`==`"""
  INEQUALITY = 5
  """`!=`"""
  AND = 6
  """`&&`"""
  OR = 7
  """`||`"""
  LESS = 8
  """`<`"""
  LESS_OR_EQUAL = 9
  """`<=`"""
  GREATER = 10
  """`>`"""
  GREATER_OR_EQUAL = 11
  """`>=`"""
  CHOICE = 12
  """`?:`"""
  MAXIMUM = 13
  """`$max()`"""
  PRESENCE = 14
  """`$present()`"""
  UPPER_BOUND = 15
  """`$upper_bound()`"""
  LOWER_BOUND = 16
  """`$lower_bound()`"""


@dataclasses.dataclass
class Function(Message):
  """IR for a single function (+, -, *, ==, $max, etc.) in an expression."""

  function: Optional[FunctionMapping] = None
  args: list["Expression"] = ir_data_fields.list_field(lambda: Expression)
  function_name: Optional[Word] = None
  source_location: Optional[Location] = None


@dataclasses.dataclass
class CanonicalName(Message):
  """CanonicalName is the unique, absolute name for some object.

  A CanonicalName is the unique, absolute name for some object (Type, field,
  etc.) in the IR.  It is used both in the definitions of objects ("struct
  Foo"), and in references to objects (a field of type "Foo").
  """

  module_file: str = ir_data_fields.str_field()
  """The module_file is the Module.source_file_name of the Module in which this
  object's definition appears.

  Note that the Prelude always has a Module.source_file_name of "", and thus
  references to Prelude names will have module_file == "".
  """

  object_path: list[str] = ir_data_fields.list_field(str)
  """The object_path is the canonical path to the object definition within its
  module file.

  For example, the field "bar" would have an object path of
  ["Foo", "bar"]:

  struct Foo:
    0:3  UInt  bar


  The enumerated name "BOB" would have an object path of ["Baz", "Qux",
  "BOB"]:

  struct Baz:
    0:3  Qux   qux

    enum Qux:
      BOB = 0
  """


@dataclasses.dataclass
class NameDefinition(Message):
  """NameDefinition is IR for the name of an object, within the object.

  That is, a TypeDefinition or Field will hold a NameDefinition as its
  name.
  """

  name: Optional[Word] = None
  """The name, as directly generated from the source text.

  name.text will match the last element of canonical_name.object_path. Note
  that in some cases, the exact string in name.text may not appear in the
  source text.
  """

  canonical_name: Optional[CanonicalName] = None
  """The CanonicalName that will appear in References.
  This field is technically redundant: canonical_name.module_file should always
  match the source_file_name of the enclosing Module, and
  canonical_name.object_path should always match the names of parent nodes.
  """

  is_anonymous: Optional[bool] = None
  """If true, indicates that this is an automatically-generated name, which
  should not be visible outside of its immediate namespace.
  """

  source_location: Optional[Location] = None
  """The location of this NameDefinition in source code."""


@dataclasses.dataclass
class Reference(Message):
  """A Reference holds the canonical name of something defined elsewhere.

  For example, take this fragment:

   struct Foo:
    0:3  UInt    size (s)
    4:s  Int:8[] payload

  "Foo", "size", and "payload" will become NameDefinitions in their
  corresponding Field and Message IR objects, while "UInt", the second "s",
  and "Int" are References.  Note that the second "s" will have a
  canonical_name.object_path of ["Foo", "size"], not ["Foo", "s"]: the
  Reference always holds the single "true" name of the object, regardless of
  what appears in the .emb.
  """

  canonical_name: Optional[CanonicalName] = None
  """The canonical name of the object being referred to.

  This name should be used to find the object in the IR.
  """

  source_name: list[Word] = ir_data_fields.list_field(Word)
  """The source_name is the name the user entered in the source file.

  The source_name could be either relative or absolute, and may be an alias
  (and thus not match any part of the canonical_name).  Back ends should use
  canonical_name for name lookup, and reserve source_name for error messages.
  """

  is_local_name: Optional[bool] = None
  """If true, then symbol resolution should only look at local names when
  resolving source_name.

  This is used so that the names of inline types aren't "ambiguous" if there
  happens to be another type with the same name at a parent scope.
  """

  # TODO(bolms): Allow absolute paths starting with ".".

  source_location: Optional[Location] = None
  """Note that this is the source_location of the *Reference*, not of the
  object to which it refers.
  """


@dataclasses.dataclass
class FieldReference(Message):
  """IR for a "field" or "field.sub.subsub" reference in an expression.

  The first element of "path" is the "base" field, which should be directly
  readable in the (runtime) context of the expression.  For example:

    struct Foo:
     0:1  UInt      header_size (h)
     0:h  UInt:8[]  header_bytes

  The "h" will translate to ["Foo", "header_size"], which will be the first
  (and in this case only) element of "path".

  Subsequent path elements should be treated as subfields.  For example, in:

    struct Foo:
     struct Sizes:
      0:1  UInt  header_size
      1:2  UInt  body_size
     0                 [+2]                  Sizes     sizes
     0                 [+sizes.header_size]  UInt:8[]  header
     sizes.header_size [+sizes.body_size]    UInt:8[]  body

  The references to "sizes.header_size" will have a path of [["Foo",
  "sizes"], ["Foo", "Sizes", "header_size"]].  Note that each path element is
  a fully-qualified reference; some back ends (C++, Python) may only use the
  last element, while others (C) may use the complete path.

  This representation is a bit awkward, and is fundamentally limited to a
  dotted list of static field names.  It does not allow an expression like
  `array[n]` on the left side of a `.`.  At this point, it is an artifact of
  the era during which I (bolms@) thought I could get away with skipping
  compiler-y things.
  """

  # TODO(bolms): Add composite types to the expression type system, and
  # replace FieldReference with a "member access" Expression kind.  Further,
  # move the symbol resolution for FieldReferences that is currently in
  # symbol_resolver.py into type_check.py.

  # TODO(bolms): Make the above change before declaring the IR to be "stable".

  path: list[Reference] = ir_data_fields.list_field(Reference)
  source_location: Optional[Location] = None


@dataclasses.dataclass
class OpaqueType(Message):
  pass


@dataclasses.dataclass
class IntegerType(Message):
  """Type of an integer expression."""

  # For optimization, the modular congruence of an integer expression is
  # tracked.  This consists of a modulus and a modular_value, such that for
  # all possible values of expression, expression MOD modulus ==
  # modular_value.
  #
  # The modulus may be the special value "infinity" to indicate that the
  # expression's value is exactly modular_value; otherwise, it should be a
  # positive integer.
  #
  # A modulus of 1 places no constraints on the value.
  #
  # The modular_value should always be a nonnegative integer that is smaller
  # than the modulus.
  #
  # Note that this is specifically the *modulus*, which is not equivalent to
  # the value from C's '%' operator when the dividend is negative: in C, -7 %
  # 4 == -3, but the modular_value here would be 1.  Python uses modulus: in
  # Python, -7 % 4 == 1.
  modulus: Optional[str] = None
  """The modulus portion of the modular congruence of an integer expression.

  The modulus may be the special value "infinity" to indicate that the
  expression's value is exactly modular_value; otherwise, it should be a
  positive integer.

  A modulus of 1 places no constraints on the value.
  """
  modular_value: Optional[str] = None
  """ The modular_value portion of the modular congruence of an integer expression.

  The modular_value should always be a nonnegative integer that is smaller
  than the modulus.
  """

  # The minimum and maximum values of an integer are tracked and checked so
  # that Emboss can implement reliable arithmetic with no operations
  # overflowing either 64-bit unsigned or 64-bit signed 2's-complement
  # integers.
  #
  # Note that constant subexpressions are allowed to overflow, as long as the
  # final, computed constant value of the subexpression fits in a 64-bit
  # value.
  #
  # The minimum_value may take the value "-infinity", and the maximum_value
  # may take the value "infinity".  These sentinel values indicate that
  # Emboss has no bound information for the Expression, and therefore the
  # Expression may only be evaluated during compilation; the back end should
  # never need to compile such an expression into the target language (e.g.,
  # C++).
  minimum_value: Optional[str] = None
  maximum_value: Optional[str] = None


@dataclasses.dataclass
class BooleanType(Message):
  value: Optional[bool] = None


@dataclasses.dataclass
class EnumType(Message):
  name: Optional[Reference] = None
  value: Optional[str] = None


@dataclasses.dataclass
class ExpressionType(Message):
  opaque: Optional[OpaqueType] = ir_data_fields.oneof_field("type")
  integer: Optional[IntegerType] = ir_data_fields.oneof_field("type")
  boolean: Optional[BooleanType] = ir_data_fields.oneof_field("type")
  enumeration: Optional[EnumType] = ir_data_fields.oneof_field("type")


@dataclasses.dataclass
class Expression(Message):
  """IR for an expression.

  An Expression is a potentially-recursive data structure.  It can either
  represent a leaf node (constant or reference) or an operation combining
  other Expressions (function).
  """

  constant: Optional[NumericConstant] = ir_data_fields.oneof_field("expression")
  constant_reference: Optional[Reference] = ir_data_fields.oneof_field(
      "expression"
  )
  function: Optional[Function] = ir_data_fields.oneof_field("expression")
  field_reference: Optional[FieldReference] = ir_data_fields.oneof_field(
      "expression"
  )
  boolean_constant: Optional[BooleanConstant] = ir_data_fields.oneof_field(
      "expression"
  )
  builtin_reference: Optional[Reference] = ir_data_fields.oneof_field(
      "expression"
  )

  type: Optional[ExpressionType] = None
  source_location: Optional[Location] = None


@dataclasses.dataclass
class ArrayType(Message):
  """IR for an array type ("Int:8[12]" or "Message[2]" or "UInt[3][2]")."""

  base_type: Optional["Type"] = None

  element_count: Optional[Expression] = ir_data_fields.oneof_field("size")
  automatic: Optional[Empty] = ir_data_fields.oneof_field("size")

  source_location: Optional[Location] = None


@dataclasses.dataclass
class AtomicType(Message):
  """IR for a non-array type ("UInt" or "Foo(Version.SIX)")."""

  reference: Optional[Reference] = None
  runtime_parameter: list[Expression] = ir_data_fields.list_field(Expression)
  source_location: Optional[Location] = None


@dataclasses.dataclass
class Type(Message):
  """IR for a type reference ("UInt", "Int:8[12]", etc.)."""

  atomic_type: Optional[AtomicType] = ir_data_fields.oneof_field("type")
  array_type: Optional[ArrayType] = ir_data_fields.oneof_field("type")

  size_in_bits: Optional[Expression] = None
  source_location: Optional[Location] = None


@dataclasses.dataclass
class AttributeValue(Message):
  """IR for a attribute value."""

  # TODO(bolms): Make String a type of Expression, and replace
  # AttributeValue with Expression.
  expression: Optional[Expression] = ir_data_fields.oneof_field("value")
  string_constant: Optional[String] = ir_data_fields.oneof_field("value")

  source_location: Optional[Location] = None


@dataclasses.dataclass
class Attribute(Message):
  """IR for a [name = value] attribute."""

  name: Optional[Word] = None
  value: Optional[AttributeValue] = None
  back_end: Optional[Word] = None
  is_default: Optional[bool] = None
  source_location: Optional[Location] = None


@dataclasses.dataclass
class WriteTransform(Message):
  """IR which defines an expression-based virtual field write scheme.

  E.g., for a virtual field like `x_plus_one`:

    struct Foo:
     0 [+1]  UInt  x
     let x_plus_one = x + 1

  ... the `WriteMethod` would be `transform`, with `$logical_value - 1` for
  `function_body` and `x` for `destination`.
  """

  function_body: Optional[Expression] = None
  destination: Optional[FieldReference] = None


@dataclasses.dataclass
class WriteMethod(Message):
  """IR which defines the method used for writing to a virtual field."""

  physical: Optional[bool] = ir_data_fields.oneof_field("method")
  """A physical Field can be written directly."""

  read_only: Optional[bool] = ir_data_fields.oneof_field("method")
  """A read_only Field cannot be written."""

  alias: Optional[FieldReference] = ir_data_fields.oneof_field("method")
  """An alias is a direct, untransformed forward of another field; it can be
  implemented by directly returning a reference to the aliased field.

  Aliases are the only kind of virtual field that may have an opaque type.
  """

  transform: Optional[WriteTransform] = ir_data_fields.oneof_field("method")
  """A transform is a way of turning a logical value into a value which should
  be written to another field.

  A virtual field like `let y = x + 1` would
  have a transform WriteMethod to subtract 1 from the new `y` value, and
  write that to `x`.
  """


@dataclasses.dataclass
class FieldLocation(Message):
  """IR for a field location."""

  start: Optional[Expression] = None
  size: Optional[Expression] = None
  source_location: Optional[Location] = None


@dataclasses.dataclass
class Field(Message):  # pylint:disable=too-many-instance-attributes
  """IR for a field in a struct definition.

  There are two kinds of Field: physical fields have location and (physical)
  type; virtual fields have read_transform.  Although there are differences,
  in many situations physical and virtual fields are treated the same way,
  and they can be freely intermingled in the source file.
  """

  location: Optional[FieldLocation] = None
  """The physical location of the field."""
  type: Optional[Type] = None
  """The physical type of the field."""

  read_transform: Optional[Expression] = None
  """The value of a virtual field."""

  write_method: Optional[WriteMethod] = None
  """How this virtual field should be written."""

  name: Optional[NameDefinition] = None
  """The name of the field."""
  abbreviation: Optional[Word] = None
  """An optional short name for the field, only visible inside the enclosing bits/struct."""
  attribute: list[Attribute] = ir_data_fields.list_field(Attribute)
  """Field-specific attributes."""
  documentation: list[Documentation] = ir_data_fields.list_field(Documentation)
  """Field-specific documentation."""

  # TODO(bolms): Document conditional fields better, and replace some of this
  # explanation with a reference to the documentation.
  existence_condition: Optional[Expression] = None
  """The field only exists when existence_condition evaluates to true.

  For example:
  ```
  struct Message:
    0 [+4]  UInt         length
    4 [+8]  MessageType  message_type
    if message_type == MessageType.FOO:
      8 [+length]  Foo   foo
    if message_type == MessageType.BAR:
      8 [+length]  Bar   bar
    8+length [+4]  UInt  crc
  ```
  For `length`, `message_type`, and `crc`, existence_condition will be
  `boolean_constant { value: true }`

  For `foo`, existence_condition will be:
  ```
      function { function: EQUALITY
                 args: [reference to message_type]
                 args: { [reference to MessageType.FOO] } }
  ```

  The `bar` field will have a similar existence_condition to `foo`:
  ```
      function { function: EQUALITY
                 args: [reference to message_type]
                 args: { [reference to MessageType.BAR] } }
  ```

  When `message_type` is `MessageType.BAR`, the `Message` struct does not contain
  field `foo`, and vice versa for `message_type == MessageType.FOO` and field
  `bar`: those fields only conditionally exist in the structure.
  """

  source_location: Optional[Location] = None


@dataclasses.dataclass
class Structure(Message):
  """IR for a bits or struct definition."""

  field: list[Field] = ir_data_fields.list_field(Field)

  fields_in_dependency_order: list[int] = ir_data_fields.list_field(int)
  """The fields in `field` are listed in the order they appear in the original
  .emb.

  For text format output, this can lead to poor results.  Take the following
  struct:
  ```
      struct Foo:
        b [+4]  UInt  a
        0 [+4]  UInt  b
  ```
  Here, the location of `a` depends on the current value of `b`.  Because of
  this, if someone calls
  ```
      emboss::UpdateFromText(foo_view, "{ a: 10, b: 4 }");
  ```
  then foo_view will not be updated the way one would expect: if `b`'s value
  was something other than 4 to start with, then `UpdateFromText` will write
  the 10 to some other location, then update `b` to 4.

  To avoid surprises, `emboss::DumpAsText` should return `"{ b: 4, a: 10
  }"`.

  The `fields_in_dependency_order` field provides a permutation of `field`
  such that each field appears after all of its dependencies.  For example,
  `struct Foo`, above, would have `{ 1, 0 }` in
  `fields_in_dependency_order`.

  The exact ordering of `fields_in_dependency_order` is not guaranteed, but
  some effort is made to keep the order close to the order fields are listed
  in the original `.emb` file.  In particular, if the ordering 0, 1, 2, 3,
  ... satisfies dependency ordering, then `fields_in_dependency_order` will
  be `{ 0, 1, 2, 3, ... }`.
  """

  source_location: Optional[Location] = None


@dataclasses.dataclass
class External(Message):
  """IR for an external type declaration."""

  # Externals have no values other than name and attribute list, which are
  # common to all type definitions.

  source_location: Optional[Location] = None


@dataclasses.dataclass
class EnumValue(Message):
  """IR for a single value within an enumerated type."""

  name: Optional[NameDefinition] = None
  """The name of the enum value."""
  value: Optional[Expression] = None
  """The value of the enum value."""
  documentation: list[Documentation] = ir_data_fields.list_field(Documentation)
  """Value-specific documentation."""
  attribute: list[Attribute] = ir_data_fields.list_field(Attribute)
  """Value-specific attributes."""

  source_location: Optional[Location] = None


@dataclasses.dataclass
class Enum(Message):
  """IR for an enumerated type definition."""

  value: list[EnumValue] = ir_data_fields.list_field(EnumValue)
  source_location: Optional[Location] = None


@dataclasses.dataclass
class Import(Message):
  """IR for an import statement in a module."""

  file_name: Optional[String] = None
  """The file to import."""
  local_name: Optional[Word] = None
  """The name to use within this module."""
  source_location: Optional[Location] = None


@dataclasses.dataclass
class RuntimeParameter(Message):
  """IR for a runtime parameter definition."""

  name: Optional[NameDefinition] = None
  """The name of the parameter."""
  type: Optional[ExpressionType] = None
  """The type of the parameter."""

  # TODO(bolms): Actually implement the set builder type notation.
  physical_type_alias: Optional[Type] = None
  """For convenience and readability, physical types may be used in the .emb
  source instead of a full expression type.

  That way, users can write
  something like:
  ```
      struct Foo(version :: UInt:8):
  ```
  instead of:
  ```
      struct Foo(version :: {$int x |: 0 <= x <= 255}):
  ```
  In these cases, physical_type_alias holds the user-supplied type, and type
  is filled in after initial parsing is finished.
  """

  source_location: Optional[Location] = None


class AddressableUnit(int, enum.Enum):
  """The "addressable unit" is the size of the smallest unit that can be read

  from the backing store that this type expects.  For `struct`s, this is
  BYTE; for `enum`s and `bits`, this is BIT, and for `external`s it depends
  on the specific type
  """

  NONE = 0
  BIT = 1
  BYTE = 8


@dataclasses.dataclass
class TypeDefinition(Message):
  """Container IR for a type definition (struct, union, etc.)"""

  external: Optional[External] = ir_data_fields.oneof_field("type")
  enumeration: Optional[Enum] = ir_data_fields.oneof_field("type")
  structure: Optional[Structure] = ir_data_fields.oneof_field("type")

  name: Optional[NameDefinition] = None
  """The name of the type."""
  attribute: list[Attribute] = ir_data_fields.list_field(Attribute)
  """All attributes attached to the type."""
  documentation: list[Documentation] = ir_data_fields.list_field(Documentation)
  """Docs for the type."""
  # pylint:disable=undefined-variable
  subtype: list["TypeDefinition"] = ir_data_fields.list_field(
      lambda: TypeDefinition
  )
  """Subtypes of this type."""
  addressable_unit: Optional[AddressableUnit] = None

  runtime_parameter: list[RuntimeParameter] = ir_data_fields.list_field(
      RuntimeParameter
  )
  """If the type requires parameters at runtime, these are its parameters.

  These are currently only allowed on structures, but in the future they
  should be allowed on externals.
  """
  source_location: Optional[Location] = None


@dataclasses.dataclass
class Module(Message):
  """The IR for an individual Emboss module (file)."""

  attribute: list[Attribute] = ir_data_fields.list_field(Attribute)
  """Module-level attributes."""
  type: list[TypeDefinition] = ir_data_fields.list_field(TypeDefinition)
  """Module-level type definitions."""
  documentation: list[Documentation] = ir_data_fields.list_field(Documentation)
  """Module-level docs."""
  foreign_import: list[Import] = ir_data_fields.list_field(Import)
  """Other modules imported."""
  source_text: Optional[str] = None
  """The original source code."""
  source_location: Optional[Location] = None
  """Source code covered by this IR."""
  source_file_name: Optional[str] = None
  """Name of the source file."""


@dataclasses.dataclass
class EmbossIr(Message):
  """The top-level IR for an Emboss module and all of its dependencies."""

  module: list[Module] = ir_data_fields.list_field(Module)
  """All modules.

  The first entry will be the main module; back ends should
  generate code corresponding to that module.  The second entry will be the
  prelude module.
  """


# Post-process the dataclasses to add cached fields.
ir_data_fields.cache_message_specs(sys.modules[Message.__module__], Message)