1  // pest. The Elegant Parser
2  // Copyright (c) 2018 DragoČ™ Tiselice
3  //
4  // Licensed under the Apache License, Version 2.0
5  // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6  // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7  // option. All files in the project carrying such notice may not be copied,
8  // modified, or distributed except according to those terms.
9  
10  //! Different optimizations for pest's ASTs.
11  
12  use crate::ast::*;
13  use std::collections::HashMap;
14  
15  #[cfg(test)]
16  macro_rules! box_tree {
17      ( $node:ident( $(                      $child:ident( $($args:tt)* )     ),+ ) ) => (
18        $node      ( $( Box::new( box_tree!( $child      ( $($args   )* ) ) ) ),+ )
19      );
20      ($expr:expr) => ($expr);
21  }
22  
23  mod concatenator;
24  mod factorizer;
25  mod lister;
26  mod restorer;
27  mod rotater;
28  mod skipper;
29  mod unroller;
30  
31  /// Takes pest's ASTs and optimizes them
optimize(rules: Vec<Rule>) -> Vec<OptimizedRule>32  pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> {
33      let optimized: Vec<OptimizedRule> = rules
34          .into_iter()
35          .map(rotater::rotate)
36          .map(skipper::skip)
37          .map(unroller::unroll)
38          .map(concatenator::concatenate)
39          .map(factorizer::factor)
40          .map(lister::list)
41          .map(rule_to_optimized_rule)
42          .collect();
43  
44      let rules = to_hash_map(&optimized);
45      optimized
46          .into_iter()
47          .map(|rule| restorer::restore_on_err(rule, &rules))
48          .collect()
49  }
50  
rule_to_optimized_rule(rule: Rule) -> OptimizedRule51  fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule {
52      fn to_optimized(expr: Expr) -> OptimizedExpr {
53          match expr {
54              Expr::Str(string) => OptimizedExpr::Str(string),
55              Expr::Insens(string) => OptimizedExpr::Insens(string),
56              Expr::Range(start, end) => OptimizedExpr::Range(start, end),
57              Expr::Ident(ident) => OptimizedExpr::Ident(ident),
58              Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end),
59              Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))),
60              Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))),
61              Expr::Seq(lhs, rhs) => {
62                  OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
63              }
64              Expr::Choice(lhs, rhs) => {
65                  OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs)))
66              }
67              Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))),
68              Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))),
69              Expr::Skip(strings) => OptimizedExpr::Skip(strings),
70              Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))),
71              #[cfg(feature = "grammar-extras")]
72              Expr::NodeTag(expr, tag) => OptimizedExpr::NodeTag(Box::new(to_optimized(*expr)), tag),
73              #[cfg(feature = "grammar-extras")]
74              Expr::RepOnce(expr) => OptimizedExpr::RepOnce(Box::new(to_optimized(*expr))),
75              #[cfg(not(feature = "grammar-extras"))]
76              Expr::RepOnce(_) => unreachable!("No valid transformation to OptimizedRule"),
77              Expr::RepExact(..) | Expr::RepMin(..) | Expr::RepMax(..) | Expr::RepMinMax(..) => {
78                  unreachable!("No valid transformation to OptimizedRule")
79              }
80          }
81      }
82  
83      OptimizedRule {
84          name: rule.name,
85          ty: rule.ty,
86          expr: to_optimized(rule.expr),
87      }
88  }
89  
to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr>90  fn to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr> {
91      rules
92          .iter()
93          .map(|r| (r.name.clone(), r.expr.clone()))
94          .collect()
95  }
96  
97  /// The optimized version of the pest AST's `Rule`.
98  #[derive(Clone, Debug, Eq, PartialEq)]
99  pub struct OptimizedRule {
100      /// The name of the rule.
101      pub name: String,
102      /// The type of the rule.
103      pub ty: RuleType,
104      /// The optimized expression of the rule.
105      pub expr: OptimizedExpr,
106  }
107  
108  /// The optimized version of the pest AST's `Expr`.
109  ///
110  /// # Warning: Semantic Versioning
111  /// There may be non-breaking changes to the meta-grammar
112  /// between minor versions. Those non-breaking changes, however,
113  /// may translate into semver-breaking changes due to the additional variants
114  /// propaged from the `Rule` enum. This is a known issue and will be fixed in the
115  /// future (e.g. by increasing MSRV and non_exhaustive annotations).
116  #[derive(Clone, Debug, Eq, PartialEq)]
117  pub enum OptimizedExpr {
118      /// Matches an exact string, e.g. `"a"`
119      Str(String),
120      /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
121      Insens(String),
122      /// Matches one character in the range, e.g. `'a'..'z'`
123      Range(String, String),
124      /// Matches the rule with the given name, e.g. `a`
125      Ident(String),
126      /// Matches a custom part of the stack, e.g. `PEEK[..]`
127      PeekSlice(i32, Option<i32>),
128      /// Positive lookahead; matches expression without making progress, e.g. `&e`
129      PosPred(Box<OptimizedExpr>),
130      /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
131      NegPred(Box<OptimizedExpr>),
132      /// Matches a sequence of two expressions, e.g. `e1 ~ e2`
133      Seq(Box<OptimizedExpr>, Box<OptimizedExpr>),
134      /// Matches either of two expressions, e.g. `e1 | e2`
135      Choice(Box<OptimizedExpr>, Box<OptimizedExpr>),
136      /// Optionally matches an expression, e.g. `e?`
137      Opt(Box<OptimizedExpr>),
138      /// Matches an expression zero or more times, e.g. `e*`
139      Rep(Box<OptimizedExpr>),
140      /// Matches an expression one or more times, e.g. `e+`
141      #[cfg(feature = "grammar-extras")]
142      RepOnce(Box<OptimizedExpr>),
143      /// Continues to match expressions until one of the strings in the `Vec` is found
144      Skip(Vec<String>),
145      /// Matches an expression and pushes it to the stack, e.g. `push(e)`
146      Push(Box<OptimizedExpr>),
147      /// Matches an expression and assigns a label to it, e.g. #label = exp
148      #[cfg(feature = "grammar-extras")]
149      NodeTag(Box<OptimizedExpr>, String),
150      /// Restores an expression's checkpoint
151      RestoreOnErr(Box<OptimizedExpr>),
152  }
153  
154  impl OptimizedExpr {
155      /// Returns a top-down iterator over the `OptimizedExpr`.
iter_top_down(&self) -> OptimizedExprTopDownIterator156      pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator {
157          OptimizedExprTopDownIterator::new(self)
158      }
159  
160      /// Applies `f` to the `OptimizedExpr` top-down.
map_top_down<F>(self, mut f: F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr,161      pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr
162      where
163          F: FnMut(OptimizedExpr) -> OptimizedExpr,
164      {
165          fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
166          where
167              F: FnMut(OptimizedExpr) -> OptimizedExpr,
168          {
169              let expr = f(expr);
170  
171              match expr {
172                  OptimizedExpr::PosPred(expr) => {
173                      let mapped = Box::new(map_internal(*expr, f));
174                      OptimizedExpr::PosPred(mapped)
175                  }
176                  OptimizedExpr::NegPred(expr) => {
177                      let mapped = Box::new(map_internal(*expr, f));
178                      OptimizedExpr::NegPred(mapped)
179                  }
180                  OptimizedExpr::Seq(lhs, rhs) => {
181                      let mapped_lhs = Box::new(map_internal(*lhs, f));
182                      let mapped_rhs = Box::new(map_internal(*rhs, f));
183                      OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
184                  }
185                  OptimizedExpr::Choice(lhs, rhs) => {
186                      let mapped_lhs = Box::new(map_internal(*lhs, f));
187                      let mapped_rhs = Box::new(map_internal(*rhs, f));
188                      OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
189                  }
190                  OptimizedExpr::Rep(expr) => {
191                      let mapped = Box::new(map_internal(*expr, f));
192                      OptimizedExpr::Rep(mapped)
193                  }
194                  OptimizedExpr::Opt(expr) => {
195                      let mapped = Box::new(map_internal(*expr, f));
196                      OptimizedExpr::Opt(mapped)
197                  }
198                  OptimizedExpr::Push(expr) => {
199                      let mapped = Box::new(map_internal(*expr, f));
200                      OptimizedExpr::Push(mapped)
201                  }
202                  expr => expr,
203              }
204          }
205  
206          map_internal(self, &mut f)
207      }
208  
209      /// Applies `f` to the `OptimizedExpr` bottom-up.
map_bottom_up<F>(self, mut f: F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr,210      pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr
211      where
212          F: FnMut(OptimizedExpr) -> OptimizedExpr,
213      {
214          fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr
215          where
216              F: FnMut(OptimizedExpr) -> OptimizedExpr,
217          {
218              let mapped = match expr {
219                  OptimizedExpr::PosPred(expr) => {
220                      let mapped = Box::new(map_internal(*expr, f));
221                      OptimizedExpr::PosPred(mapped)
222                  }
223                  OptimizedExpr::NegPred(expr) => {
224                      let mapped = Box::new(map_internal(*expr, f));
225                      OptimizedExpr::NegPred(mapped)
226                  }
227                  OptimizedExpr::Seq(lhs, rhs) => {
228                      let mapped_lhs = Box::new(map_internal(*lhs, f));
229                      let mapped_rhs = Box::new(map_internal(*rhs, f));
230                      OptimizedExpr::Seq(mapped_lhs, mapped_rhs)
231                  }
232                  OptimizedExpr::Choice(lhs, rhs) => {
233                      let mapped_lhs = Box::new(map_internal(*lhs, f));
234                      let mapped_rhs = Box::new(map_internal(*rhs, f));
235                      OptimizedExpr::Choice(mapped_lhs, mapped_rhs)
236                  }
237                  OptimizedExpr::Rep(expr) => {
238                      let mapped = Box::new(map_internal(*expr, f));
239                      OptimizedExpr::Rep(mapped)
240                  }
241                  OptimizedExpr::Opt(expr) => {
242                      let mapped = Box::new(map_internal(*expr, f));
243                      OptimizedExpr::Opt(mapped)
244                  }
245                  OptimizedExpr::Push(expr) => {
246                      let mapped = Box::new(map_internal(*expr, f));
247                      OptimizedExpr::Push(mapped)
248                  }
249                  expr => expr,
250              };
251  
252              f(mapped)
253          }
254  
255          map_internal(self, &mut f)
256      }
257  }
258  
259  impl core::fmt::Display for OptimizedExpr {
fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result260      fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
261          match self {
262              OptimizedExpr::Str(s) => write!(f, "{:?}", s),
263              OptimizedExpr::Insens(s) => write!(f, "^{:?}", s),
264              OptimizedExpr::Range(start, end) => {
265                  let start = start.chars().next().expect("Empty range start.");
266                  let end = end.chars().next().expect("Empty range end.");
267                  write!(f, "({:?}..{:?})", start, end)
268              }
269              OptimizedExpr::Ident(id) => write!(f, "{}", id),
270              OptimizedExpr::PeekSlice(start, end) => match end {
271                  Some(end) => write!(f, "PEEK[{}..{}]", start, end),
272                  None => write!(f, "PEEK[{}..]", start),
273              },
274              OptimizedExpr::PosPred(expr) => write!(f, "&{}", expr.as_ref()),
275              OptimizedExpr::NegPred(expr) => write!(f, "!{}", expr.as_ref()),
276              OptimizedExpr::Seq(lhs, rhs) => {
277                  let mut nodes = Vec::new();
278                  nodes.push(lhs);
279                  let mut current = rhs;
280                  while let OptimizedExpr::Seq(lhs, rhs) = current.as_ref() {
281                      nodes.push(lhs);
282                      current = rhs;
283                  }
284                  nodes.push(current);
285                  let sequence = nodes
286                      .iter()
287                      .map(|node| format!("{}", node))
288                      .collect::<Vec<_>>()
289                      .join(" ~ ");
290                  write!(f, "({})", sequence)
291              }
292              OptimizedExpr::Choice(lhs, rhs) => {
293                  let mut nodes = Vec::new();
294                  nodes.push(lhs);
295                  let mut current = rhs;
296                  while let OptimizedExpr::Choice(lhs, rhs) = current.as_ref() {
297                      nodes.push(lhs);
298                      current = rhs;
299                  }
300                  nodes.push(current);
301                  let sequence = nodes
302                      .iter()
303                      .map(|node| format!("{}", node))
304                      .collect::<Vec<_>>()
305                      .join(" | ");
306                  write!(f, "({})", sequence)
307              }
308              OptimizedExpr::Opt(expr) => write!(f, "{}?", expr),
309              OptimizedExpr::Rep(expr) => write!(f, "{}*", expr),
310              #[cfg(feature = "grammar-extras")]
311              OptimizedExpr::RepOnce(expr) => write!(f, "{}+", expr),
312              OptimizedExpr::Skip(strings) => {
313                  let strings = strings
314                      .iter()
315                      .map(|s| format!("{:?}", s))
316                      .collect::<Vec<_>>()
317                      .join(" | ");
318                  write!(f, "(!({}) ~ ANY)*", strings)
319              }
320              OptimizedExpr::Push(expr) => write!(f, "PUSH({})", expr),
321              #[cfg(feature = "grammar-extras")]
322              OptimizedExpr::NodeTag(expr, tag) => {
323                  write!(f, "(#{} = {})", tag, expr)
324              }
325              OptimizedExpr::RestoreOnErr(expr) => core::fmt::Display::fmt(expr.as_ref(), f),
326          }
327      }
328  }
329  
330  /// A top-down iterator over an `OptimizedExpr`.
331  pub struct OptimizedExprTopDownIterator {
332      current: Option<OptimizedExpr>,
333      next: Option<OptimizedExpr>,
334      right_branches: Vec<OptimizedExpr>,
335  }
336  
337  impl OptimizedExprTopDownIterator {
338      /// Creates a new top down iterator from an `OptimizedExpr`.
new(expr: &OptimizedExpr) -> Self339      pub fn new(expr: &OptimizedExpr) -> Self {
340          let mut iter = OptimizedExprTopDownIterator {
341              current: None,
342              next: None,
343              right_branches: vec![],
344          };
345          iter.iterate_expr(expr.clone());
346          iter
347      }
348  
iterate_expr(&mut self, expr: OptimizedExpr)349      fn iterate_expr(&mut self, expr: OptimizedExpr) {
350          self.current = Some(expr.clone());
351          match expr {
352              OptimizedExpr::Seq(lhs, rhs) => {
353                  self.right_branches.push(*rhs);
354                  self.next = Some(*lhs);
355              }
356              OptimizedExpr::Choice(lhs, rhs) => {
357                  self.right_branches.push(*rhs);
358                  self.next = Some(*lhs);
359              }
360              OptimizedExpr::PosPred(expr)
361              | OptimizedExpr::NegPred(expr)
362              | OptimizedExpr::Rep(expr)
363              | OptimizedExpr::Opt(expr)
364              | OptimizedExpr::Push(expr) => {
365                  self.next = Some(*expr);
366              }
367              _ => {
368                  self.next = None;
369              }
370          }
371      }
372  }
373  
374  impl Iterator for OptimizedExprTopDownIterator {
375      type Item = OptimizedExpr;
376  
next(&mut self) -> Option<Self::Item>377      fn next(&mut self) -> Option<Self::Item> {
378          let result = self.current.take();
379  
380          if let Some(expr) = self.next.take() {
381              self.iterate_expr(expr);
382          } else if let Some(expr) = self.right_branches.pop() {
383              self.iterate_expr(expr);
384          }
385  
386          result
387      }
388  }
389  
390  #[cfg(test)]
391  mod tests {
392      use super::*;
393  
394      #[test]
rotate()395      fn rotate() {
396          let rules = {
397              use crate::ast::Expr::*;
398              vec![Rule {
399                  name: "rule".to_owned(),
400                  ty: RuleType::Normal,
401                  expr: box_tree!(Choice(
402                      Choice(
403                          Choice(Str(String::from("a")), Str(String::from("b"))),
404                          Str(String::from("c"))
405                      ),
406                      Str(String::from("d"))
407                  )),
408              }]
409          };
410          let rotated = {
411              use crate::optimizer::OptimizedExpr::*;
412              vec![OptimizedRule {
413                  name: "rule".to_owned(),
414                  ty: RuleType::Normal,
415                  expr: box_tree!(Choice(
416                      Str(String::from("a")),
417                      Choice(
418                          Str(String::from("b")),
419                          Choice(Str(String::from("c")), Str(String::from("d")))
420                      )
421                  )),
422              }]
423          };
424  
425          assert_eq!(optimize(rules), rotated);
426      }
427  
428      #[test]
skip()429      fn skip() {
430          let rules = {
431              use crate::ast::Expr::*;
432              vec![Rule {
433                  name: "rule".to_owned(),
434                  ty: RuleType::Atomic,
435                  expr: box_tree!(Rep(Seq(
436                      NegPred(Choice(Str(String::from("a")), Str(String::from("b")))),
437                      Ident(String::from("ANY"))
438                  ))),
439              }]
440          };
441          let skipped = vec![OptimizedRule {
442              name: "rule".to_owned(),
443              ty: RuleType::Atomic,
444              expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]),
445          }];
446  
447          assert_eq!(optimize(rules), skipped);
448      }
449  
450      #[test]
concat_strings()451      fn concat_strings() {
452          let rules = {
453              use crate::ast::Expr::*;
454              vec![Rule {
455                  name: "rule".to_owned(),
456                  ty: RuleType::Atomic,
457                  expr: box_tree!(Seq(
458                      Seq(Str(String::from("a")), Str(String::from("b"))),
459                      Seq(Str(String::from("c")), Str(String::from("d")))
460                  )),
461              }]
462          };
463          let concatenated = vec![OptimizedRule {
464              name: "rule".to_owned(),
465              ty: RuleType::Atomic,
466              expr: OptimizedExpr::Str(String::from("abcd")),
467          }];
468  
469          assert_eq!(optimize(rules), concatenated);
470      }
471  
472      #[test]
unroll_loop_exact()473      fn unroll_loop_exact() {
474          let rules = vec![Rule {
475              name: "rule".to_owned(),
476              ty: RuleType::Atomic,
477              expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3),
478          }];
479          let unrolled = {
480              use crate::optimizer::OptimizedExpr::*;
481              vec![OptimizedRule {
482                  name: "rule".to_owned(),
483                  ty: RuleType::Atomic,
484                  expr: box_tree!(Seq(
485                      Ident(String::from("a")),
486                      Seq(Ident(String::from("a")), Ident(String::from("a")))
487                  )),
488              }]
489          };
490  
491          assert_eq!(optimize(rules), unrolled);
492      }
493  
494      #[test]
unroll_loop_max()495      fn unroll_loop_max() {
496          let rules = vec![Rule {
497              name: "rule".to_owned(),
498              ty: RuleType::Atomic,
499              expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3),
500          }];
501          let unrolled = {
502              use crate::optimizer::OptimizedExpr::*;
503              vec![OptimizedRule {
504                  name: "rule".to_owned(),
505                  ty: RuleType::Atomic,
506                  expr: box_tree!(Seq(
507                      Opt(Str(String::from("a"))),
508                      Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a"))))
509                  )),
510              }]
511          };
512  
513          assert_eq!(optimize(rules), unrolled);
514      }
515  
516      #[test]
unroll_loop_min()517      fn unroll_loop_min() {
518          let rules = vec![Rule {
519              name: "rule".to_owned(),
520              ty: RuleType::Atomic,
521              expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2),
522          }];
523          let unrolled = {
524              use crate::optimizer::OptimizedExpr::*;
525              vec![OptimizedRule {
526                  name: "rule".to_owned(),
527                  ty: RuleType::Atomic,
528                  expr: box_tree!(Seq(
529                      Str(String::from("a")),
530                      Seq(Str(String::from("a")), Rep(Str(String::from("a"))))
531                  )),
532              }]
533          };
534  
535          assert_eq!(optimize(rules), unrolled);
536      }
537  
538      #[test]
unroll_loop_min_max()539      fn unroll_loop_min_max() {
540          let rules = vec![Rule {
541              name: "rule".to_owned(),
542              ty: RuleType::Atomic,
543              expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3),
544          }];
545          let unrolled = {
546              use crate::optimizer::OptimizedExpr::*;
547              vec![OptimizedRule {
548                  name: "rule".to_owned(),
549                  ty: RuleType::Atomic,
550                  /* TODO possible room for improvement here:
551                   * if the sequences were rolled out in the opposite
552                   * order, we could further optimize the strings
553                   * in cases like this.
554                  Str(String::from(("aa")),
555                  Opt(Str(String::from("a")))
556                  */
557                  expr: box_tree!(Seq(
558                      Str(String::from("a")),
559                      Seq(Str(String::from("a")), Opt(Str(String::from("a"))))
560                  )),
561              }]
562          };
563  
564          assert_eq!(optimize(rules), unrolled);
565      }
566  
567      #[test]
concat_insensitive_strings()568      fn concat_insensitive_strings() {
569          let rules = {
570              use crate::ast::Expr::*;
571              vec![Rule {
572                  name: "rule".to_owned(),
573                  ty: RuleType::Atomic,
574                  expr: box_tree!(Seq(
575                      Seq(Insens(String::from("a")), Insens(String::from("b"))),
576                      Seq(Insens(String::from("c")), Insens(String::from("d")))
577                  )),
578              }]
579          };
580          let concatenated = vec![OptimizedRule {
581              name: "rule".to_owned(),
582              ty: RuleType::Atomic,
583              expr: OptimizedExpr::Insens(String::from("abcd")),
584          }];
585  
586          assert_eq!(optimize(rules), concatenated);
587      }
588  
589      #[test]
long_common_sequence()590      fn long_common_sequence() {
591          let rules = {
592              use crate::ast::Expr::*;
593              vec![Rule {
594                  name: "rule".to_owned(),
595                  ty: RuleType::Silent,
596                  expr: box_tree!(Choice(
597                      Seq(
598                          Ident(String::from("a")),
599                          Seq(Ident(String::from("b")), Ident(String::from("c")))
600                      ),
601                      Seq(
602                          Seq(Ident(String::from("a")), Ident(String::from("b"))),
603                          Ident(String::from("d"))
604                      )
605                  )),
606              }]
607          };
608          let optimized = {
609              use crate::optimizer::OptimizedExpr::*;
610              vec![OptimizedRule {
611                  name: "rule".to_owned(),
612                  ty: RuleType::Silent,
613                  expr: box_tree!(Seq(
614                      Ident(String::from("a")),
615                      Seq(
616                          Ident(String::from("b")),
617                          Choice(Ident(String::from("c")), Ident(String::from("d")))
618                      )
619                  )),
620              }]
621          };
622  
623          assert_eq!(optimize(rules), optimized);
624      }
625  
626      #[test]
short_common_sequence()627      fn short_common_sequence() {
628          let rules = {
629              use crate::ast::Expr::*;
630              vec![Rule {
631                  name: "rule".to_owned(),
632                  ty: RuleType::Atomic,
633                  expr: box_tree!(Choice(
634                      Seq(Ident(String::from("a")), Ident(String::from("b"))),
635                      Ident(String::from("a"))
636                  )),
637              }]
638          };
639          let optimized = {
640              use crate::optimizer::OptimizedExpr::*;
641              vec![OptimizedRule {
642                  name: "rule".to_owned(),
643                  ty: RuleType::Atomic,
644                  expr: box_tree!(Seq(Ident(String::from("a")), Opt(Ident(String::from("b"))))),
645              }]
646          };
647  
648          assert_eq!(optimize(rules), optimized);
649      }
650  
651      #[test]
impossible_common_sequence()652      fn impossible_common_sequence() {
653          let rules = {
654              use crate::ast::Expr::*;
655              vec![Rule {
656                  name: "rule".to_owned(),
657                  ty: RuleType::Silent,
658                  expr: box_tree!(Choice(
659                      Ident(String::from("a")),
660                      Seq(Ident(String::from("a")), Ident(String::from("b")))
661                  )),
662              }]
663          };
664          let optimized = {
665              use crate::optimizer::OptimizedExpr::*;
666              vec![OptimizedRule {
667                  name: "rule".to_owned(),
668                  ty: RuleType::Silent,
669                  expr: box_tree!(Ident(String::from("a"))),
670              }]
671          };
672  
673          assert_eq!(optimize(rules), optimized);
674      }
675  
676      #[test]
lister()677      fn lister() {
678          let rules = {
679              use crate::ast::Expr::*;
680              vec![Rule {
681                  name: "rule".to_owned(),
682                  ty: RuleType::Silent,
683                  expr: box_tree!(Seq(
684                      Rep(Seq(Ident(String::from("a")), Ident(String::from("b")))),
685                      Ident(String::from("a"))
686                  )),
687              }]
688          };
689          let optimized = {
690              use crate::optimizer::OptimizedExpr::*;
691              vec![OptimizedRule {
692                  name: "rule".to_owned(),
693                  ty: RuleType::Silent,
694                  expr: box_tree!(Seq(
695                      Ident(String::from("a")),
696                      Rep(Seq(Ident(String::from("b")), Ident(String::from("a"))))
697                  )),
698              }]
699          };
700  
701          assert_eq!(optimize(rules), optimized);
702      }
703  
704      mod display {
705          use super::super::*;
706          /// In previous implementation of Display for OptimizedExpr
707          /// in commit 48e0a8bd3d43a17c1c78f099610b745d18ec0c5f (actually committed by me),
708          /// Str("\n") will be displayed as
709          /// "
710          /// "
711          ///
712          /// It will not break the compilation in normal use.
713          ///
714          /// But when I use it in automatically generating documents,
715          /// it will quite confusing and we'll be unable to distinguish \n and \r.
716          ///
717          /// And `cargo expand` will emit codes that can't be compiled,
718          /// for it expand `#[doc("...")]` to `/// ...`,
719          /// and when the document comment breaks the line,
720          /// it will be expanded into wrong codes.
721          #[test]
control_character()722          fn control_character() {
723              assert_eq!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\\n\"");
724              assert_eq!(
725                  OptimizedExpr::Insens("\n".to_owned()).to_string(),
726                  "^\"\\n\"",
727              );
728              assert_eq!(
729                  OptimizedExpr::Range("\n".to_owned(), "\r".to_owned()).to_string(),
730                  "('\\n'..'\\r')",
731              );
732              assert_eq!(
733                  OptimizedExpr::Skip(vec![
734                      "\n".to_owned(),
735                      "\r".to_owned(),
736                      "\n\r".to_owned(),
737                      "\0".to_owned(),
738                  ])
739                  .to_string(),
740                  r#"(!("\n" | "\r" | "\n\r" | "\0") ~ ANY)*"#,
741              );
742  
743              assert_ne!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\n\"");
744          }
745  
746          #[test]
str()747          fn str() {
748              assert_eq!(OptimizedExpr::Str("a".to_owned()).to_string(), r#""a""#);
749          }
750  
751          #[test]
insens()752          fn insens() {
753              assert_eq!(OptimizedExpr::Insens("a".to_owned()).to_string(), r#"^"a""#);
754          }
755  
756          #[test]
range()757          fn range() {
758              assert_eq!(
759                  OptimizedExpr::Range("a".to_owned(), "z".to_owned()).to_string(),
760                  r#"('a'..'z')"#,
761              );
762          }
763  
764          #[test]
ident()765          fn ident() {
766              assert_eq!(OptimizedExpr::Ident("a".to_owned()).to_string(), r#"a"#);
767          }
768  
769          #[test]
peek_slice()770          fn peek_slice() {
771              assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]");
772              assert_eq!(
773                  OptimizedExpr::PeekSlice(0, Some(-1)).to_string(),
774                  "PEEK[0..-1]",
775              );
776              assert_eq!(
777                  OptimizedExpr::PeekSlice(2, Some(3)).to_string(),
778                  "PEEK[2..3]",
779              );
780              assert_eq!(
781                  OptimizedExpr::PeekSlice(2, Some(-1)).to_string(),
782                  "PEEK[2..-1]",
783              );
784              assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]");
785          }
786  
787          #[test]
pos_pred()788          fn pos_pred() {
789              assert_eq!(
790                  OptimizedExpr::PosPred(Box::new(OptimizedExpr::NegPred(Box::new(
791                      OptimizedExpr::Ident("a".to_owned()),
792                  ))))
793                  .to_string(),
794                  "&!a",
795              );
796              assert_eq!(
797                  OptimizedExpr::PosPred(Box::new(OptimizedExpr::Choice(
798                      Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident(
799                          "a".to_owned(),
800                      )))),
801                      Box::new(OptimizedExpr::Str("a".to_owned())),
802                  )))
803                  .to_string(),
804                  r#"&(a* | "a")"#,
805              );
806              assert_eq!(
807                  OptimizedExpr::PosPred(Box::new(OptimizedExpr::RestoreOnErr(Box::new(
808                      OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("a".to_owned()))),
809                  ))))
810                  .to_string(),
811                  "&!a",
812              );
813          }
814  
815          #[test]
neg_pred()816          fn neg_pred() {
817              assert_eq!(
818                  OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
819                  r#"!e"#,
820              );
821              assert_eq!(
822                  OptimizedExpr::NegPred(Box::new(OptimizedExpr::Choice(
823                      Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident(
824                          "a".to_owned(),
825                      )))),
826                      Box::new(OptimizedExpr::Str("a".to_owned())),
827                  )))
828                  .to_string(),
829                  r#"!(PUSH(a) | "a")"#,
830              );
831          }
832  
833          #[test]
seq()834          fn seq() {
835              assert_eq!(
836                  OptimizedExpr::Seq(
837                      Box::new(OptimizedExpr::Ident("e1".to_owned())),
838                      Box::new(OptimizedExpr::Ident("e2".to_owned())),
839                  )
840                  .to_string(),
841                  r#"(e1 ~ e2)"#,
842              );
843              assert_eq!(
844                  Expr::Seq(
845                      Box::new(Expr::Ident("e1".to_owned())),
846                      Box::new(Expr::Seq(
847                          Box::new(Expr::Ident("e2".to_owned())),
848                          Box::new(Expr::Ident("e3".to_owned())),
849                      )),
850                  )
851                  .to_string(),
852                  "(e1 ~ e2 ~ e3)",
853              );
854              assert_eq!(
855                  Expr::Seq(
856                      Box::new(Expr::Ident("e1".to_owned())),
857                      Box::new(Expr::Seq(
858                          Box::new(Expr::Ident("e2".to_owned())),
859                          Box::new(Expr::Seq(
860                              Box::new(Expr::Ident("e3".to_owned())),
861                              Box::new(Expr::Ident("e4".to_owned())),
862                          )),
863                      )),
864                  )
865                  .to_string(),
866                  "(e1 ~ e2 ~ e3 ~ e4)",
867              );
868              assert_eq!(
869                  Expr::Seq(
870                      Box::new(Expr::Ident("e1".to_owned())),
871                      Box::new(Expr::Choice(
872                          Box::new(Expr::Ident("e2".to_owned())),
873                          Box::new(Expr::Seq(
874                              Box::new(Expr::Ident("e3".to_owned())),
875                              Box::new(Expr::Ident("e4".to_owned())),
876                          )),
877                      )),
878                  )
879                  .to_string(),
880                  "(e1 ~ (e2 | (e3 ~ e4)))",
881              );
882              assert_eq!(
883                  Expr::Seq(
884                      Box::new(Expr::Ident("e1".to_owned())),
885                      Box::new(Expr::Seq(
886                          Box::new(Expr::Ident("e2".to_owned())),
887                          Box::new(Expr::Choice(
888                              Box::new(Expr::Ident("e3".to_owned())),
889                              Box::new(Expr::Ident("e4".to_owned())),
890                          )),
891                      )),
892                  )
893                  .to_string(),
894                  "(e1 ~ e2 ~ (e3 | e4))",
895              );
896              assert_eq!(
897                  OptimizedExpr::Seq(
898                      Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Str(
899                          "a".to_owned(),
900                      )))),
901                      Box::new(OptimizedExpr::Seq(
902                          Box::new(OptimizedExpr::Ident("b".to_owned())),
903                          Box::new(OptimizedExpr::Insens("c".to_owned())),
904                      )),
905                  )
906                  .to_string(),
907                  r#"("a"* ~ b ~ ^"c")"#,
908              );
909              assert_eq!(
910                  OptimizedExpr::Seq(
911                      Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Range(
912                          "a".to_owned(),
913                          "z".to_owned(),
914                      )))),
915                      Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Opt(
916                          Box::new(OptimizedExpr::Range("A".to_owned(), "Z".to_owned())),
917                      )))),
918                  )
919                  .to_string(),
920                  "(&('a'..'z') ~ !('A'..'Z')?)",
921              );
922          }
923  
924          #[test]
choice()925          fn choice() {
926              assert_eq!(
927                  OptimizedExpr::Choice(
928                      Box::new(OptimizedExpr::Ident("e1".to_owned())),
929                      Box::new(OptimizedExpr::Ident("e2".to_owned())),
930                  )
931                  .to_string(),
932                  r#"(e1 | e2)"#,
933              );
934              assert_eq!(
935                  Expr::Choice(
936                      Box::new(Expr::Ident("e1".to_owned())),
937                      Box::new(Expr::Choice(
938                          Box::new(Expr::Ident("e2".to_owned())),
939                          Box::new(Expr::Ident("e3".to_owned())),
940                      )),
941                  )
942                  .to_string(),
943                  "(e1 | e2 | e3)",
944              );
945              assert_eq!(
946                  Expr::Choice(
947                      Box::new(Expr::Ident("e1".to_owned())),
948                      Box::new(Expr::Choice(
949                          Box::new(Expr::Ident("e2".to_owned())),
950                          Box::new(Expr::Choice(
951                              Box::new(Expr::Ident("e3".to_owned())),
952                              Box::new(Expr::Ident("e4".to_owned())),
953                          )),
954                      )),
955                  )
956                  .to_string(),
957                  "(e1 | e2 | e3 | e4)",
958              );
959              assert_eq!(
960                  Expr::Choice(
961                      Box::new(Expr::Ident("e1".to_owned())),
962                      Box::new(Expr::Seq(
963                          Box::new(Expr::Ident("e2".to_owned())),
964                          Box::new(Expr::Choice(
965                              Box::new(Expr::Ident("e3".to_owned())),
966                              Box::new(Expr::Ident("e4".to_owned())),
967                          )),
968                      )),
969                  )
970                  .to_string(),
971                  "(e1 | (e2 ~ (e3 | e4)))",
972              );
973              assert_eq!(
974                  OptimizedExpr::Choice(
975                      Box::new(OptimizedExpr::Str("a".to_owned())),
976                      Box::new(OptimizedExpr::Choice(
977                          Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident(
978                              "b".to_owned(),
979                          )))),
980                          Box::new(OptimizedExpr::Insens("c".to_owned())),
981                      )),
982                  )
983                  .to_string(),
984                  r#"("a" | PUSH(b) | ^"c")"#,
985              );
986          }
987  
988          #[test]
opt()989          fn opt() {
990              assert_eq!(
991                  OptimizedExpr::Opt(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
992                  "e?",
993              );
994          }
995  
996          #[test]
rep()997          fn rep() {
998              assert_eq!(
999                  OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident("x".to_owned()))).to_string(),
1000                  "x*",
1001              );
1002              assert_eq!(
1003                  OptimizedExpr::Rep(Box::new(OptimizedExpr::Range(
1004                      "0".to_owned(),
1005                      "9".to_owned(),
1006                  )))
1007                  .to_string(),
1008                  "('0'..'9')*",
1009              );
1010          }
1011  
1012          #[test]
1013          #[cfg(feature = "grammar-extras")]
rep_once()1014          fn rep_once() {
1015              assert_eq!(
1016                  OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1017                  "e+",
1018              );
1019              assert_eq!(
1020                  OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Range(
1021                      "0".to_owned(),
1022                      "9".to_owned(),
1023                  )))
1024                  .to_string(),
1025                  "('0'..'9')+",
1026              );
1027          }
1028  
1029          #[test]
skip()1030          fn skip() {
1031              assert_eq!(
1032                  OptimizedExpr::Skip(
1033                      ["a", "bc"]
1034                          .into_iter()
1035                          .map(|s| s.to_owned())
1036                          .collect::<Vec<_>>(),
1037                  )
1038                  .to_string(),
1039                  r#"(!("a" | "bc") ~ ANY)*"#,
1040              );
1041          }
1042  
1043          #[test]
push()1044          fn push() {
1045              assert_eq!(
1046                  OptimizedExpr::Push(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(),
1047                  "PUSH(e)",
1048              );
1049          }
1050  
1051          #[test]
1052          #[cfg(feature = "grammar-extras")]
node_tag()1053          fn node_tag() {
1054              assert_eq!(
1055                  OptimizedExpr::NodeTag(
1056                      Box::new(OptimizedExpr::Ident("expr".to_owned())),
1057                      "label".to_owned(),
1058                  )
1059                  .to_string(),
1060                  r#"(#label = expr)"#,
1061              );
1062              assert_eq!(
1063                  OptimizedExpr::NodeTag(
1064                      Box::new(OptimizedExpr::Ident("x".to_owned())),
1065                      "X".to_owned(),
1066                  )
1067                  .to_string(),
1068                  r#"(#X = x)"#,
1069              );
1070              assert_eq!(
1071                  OptimizedExpr::NodeTag(
1072                      Box::new(OptimizedExpr::Seq(
1073                          Box::new(OptimizedExpr::Ident("x".to_owned())),
1074                          Box::new(OptimizedExpr::Str("y".to_owned())),
1075                      )),
1076                      "X".to_owned(),
1077                  )
1078                  .to_string(),
1079                  r#"(#X = (x ~ "y"))"#,
1080              );
1081          }
1082  
1083          #[test]
restore_on_err()1084          fn restore_on_err() {
1085              assert_eq!(
1086                  OptimizedExpr::RestoreOnErr(Box::new(OptimizedExpr::Ident("e".to_owned())))
1087                      .to_string(),
1088                  "e",
1089              );
1090          }
1091      }
1092  }
1093