1 // pest. The Elegant Parser 2 // Copyright (c) 2018 DragoČ™ Tiselice 3 // 4 // Licensed under the Apache License, Version 2.0 5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT 6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 7 // option. All files in the project carrying such notice may not be copied, 8 // modified, or distributed except according to those terms. 9 10 //! Different optimizations for pest's ASTs. 11 12 use crate::ast::*; 13 use std::collections::HashMap; 14 15 #[cfg(test)] 16 macro_rules! box_tree { 17 ( $node:ident( $( $child:ident( $($args:tt)* ) ),+ ) ) => ( 18 $node ( $( Box::new( box_tree!( $child ( $($args )* ) ) ) ),+ ) 19 ); 20 ($expr:expr) => ($expr); 21 } 22 23 mod concatenator; 24 mod factorizer; 25 mod lister; 26 mod restorer; 27 mod rotater; 28 mod skipper; 29 mod unroller; 30 31 /// Takes pest's ASTs and optimizes them optimize(rules: Vec<Rule>) -> Vec<OptimizedRule>32 pub fn optimize(rules: Vec<Rule>) -> Vec<OptimizedRule> { 33 let optimized: Vec<OptimizedRule> = rules 34 .into_iter() 35 .map(rotater::rotate) 36 .map(skipper::skip) 37 .map(unroller::unroll) 38 .map(concatenator::concatenate) 39 .map(factorizer::factor) 40 .map(lister::list) 41 .map(rule_to_optimized_rule) 42 .collect(); 43 44 let rules = to_hash_map(&optimized); 45 optimized 46 .into_iter() 47 .map(|rule| restorer::restore_on_err(rule, &rules)) 48 .collect() 49 } 50 rule_to_optimized_rule(rule: Rule) -> OptimizedRule51 fn rule_to_optimized_rule(rule: Rule) -> OptimizedRule { 52 fn to_optimized(expr: Expr) -> OptimizedExpr { 53 match expr { 54 Expr::Str(string) => OptimizedExpr::Str(string), 55 Expr::Insens(string) => OptimizedExpr::Insens(string), 56 Expr::Range(start, end) => OptimizedExpr::Range(start, end), 57 Expr::Ident(ident) => OptimizedExpr::Ident(ident), 58 Expr::PeekSlice(start, end) => OptimizedExpr::PeekSlice(start, end), 59 Expr::PosPred(expr) => OptimizedExpr::PosPred(Box::new(to_optimized(*expr))), 60 Expr::NegPred(expr) => OptimizedExpr::NegPred(Box::new(to_optimized(*expr))), 61 Expr::Seq(lhs, rhs) => { 62 OptimizedExpr::Seq(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs))) 63 } 64 Expr::Choice(lhs, rhs) => { 65 OptimizedExpr::Choice(Box::new(to_optimized(*lhs)), Box::new(to_optimized(*rhs))) 66 } 67 Expr::Opt(expr) => OptimizedExpr::Opt(Box::new(to_optimized(*expr))), 68 Expr::Rep(expr) => OptimizedExpr::Rep(Box::new(to_optimized(*expr))), 69 Expr::Skip(strings) => OptimizedExpr::Skip(strings), 70 Expr::Push(expr) => OptimizedExpr::Push(Box::new(to_optimized(*expr))), 71 #[cfg(feature = "grammar-extras")] 72 Expr::NodeTag(expr, tag) => OptimizedExpr::NodeTag(Box::new(to_optimized(*expr)), tag), 73 #[cfg(feature = "grammar-extras")] 74 Expr::RepOnce(expr) => OptimizedExpr::RepOnce(Box::new(to_optimized(*expr))), 75 #[cfg(not(feature = "grammar-extras"))] 76 Expr::RepOnce(_) => unreachable!("No valid transformation to OptimizedRule"), 77 Expr::RepExact(..) | Expr::RepMin(..) | Expr::RepMax(..) | Expr::RepMinMax(..) => { 78 unreachable!("No valid transformation to OptimizedRule") 79 } 80 } 81 } 82 83 OptimizedRule { 84 name: rule.name, 85 ty: rule.ty, 86 expr: to_optimized(rule.expr), 87 } 88 } 89 to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr>90 fn to_hash_map(rules: &[OptimizedRule]) -> HashMap<String, OptimizedExpr> { 91 rules 92 .iter() 93 .map(|r| (r.name.clone(), r.expr.clone())) 94 .collect() 95 } 96 97 /// The optimized version of the pest AST's `Rule`. 98 #[derive(Clone, Debug, Eq, PartialEq)] 99 pub struct OptimizedRule { 100 /// The name of the rule. 101 pub name: String, 102 /// The type of the rule. 103 pub ty: RuleType, 104 /// The optimized expression of the rule. 105 pub expr: OptimizedExpr, 106 } 107 108 /// The optimized version of the pest AST's `Expr`. 109 /// 110 /// # Warning: Semantic Versioning 111 /// There may be non-breaking changes to the meta-grammar 112 /// between minor versions. Those non-breaking changes, however, 113 /// may translate into semver-breaking changes due to the additional variants 114 /// propaged from the `Rule` enum. This is a known issue and will be fixed in the 115 /// future (e.g. by increasing MSRV and non_exhaustive annotations). 116 #[derive(Clone, Debug, Eq, PartialEq)] 117 pub enum OptimizedExpr { 118 /// Matches an exact string, e.g. `"a"` 119 Str(String), 120 /// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"` 121 Insens(String), 122 /// Matches one character in the range, e.g. `'a'..'z'` 123 Range(String, String), 124 /// Matches the rule with the given name, e.g. `a` 125 Ident(String), 126 /// Matches a custom part of the stack, e.g. `PEEK[..]` 127 PeekSlice(i32, Option<i32>), 128 /// Positive lookahead; matches expression without making progress, e.g. `&e` 129 PosPred(Box<OptimizedExpr>), 130 /// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e` 131 NegPred(Box<OptimizedExpr>), 132 /// Matches a sequence of two expressions, e.g. `e1 ~ e2` 133 Seq(Box<OptimizedExpr>, Box<OptimizedExpr>), 134 /// Matches either of two expressions, e.g. `e1 | e2` 135 Choice(Box<OptimizedExpr>, Box<OptimizedExpr>), 136 /// Optionally matches an expression, e.g. `e?` 137 Opt(Box<OptimizedExpr>), 138 /// Matches an expression zero or more times, e.g. `e*` 139 Rep(Box<OptimizedExpr>), 140 /// Matches an expression one or more times, e.g. `e+` 141 #[cfg(feature = "grammar-extras")] 142 RepOnce(Box<OptimizedExpr>), 143 /// Continues to match expressions until one of the strings in the `Vec` is found 144 Skip(Vec<String>), 145 /// Matches an expression and pushes it to the stack, e.g. `push(e)` 146 Push(Box<OptimizedExpr>), 147 /// Matches an expression and assigns a label to it, e.g. #label = exp 148 #[cfg(feature = "grammar-extras")] 149 NodeTag(Box<OptimizedExpr>, String), 150 /// Restores an expression's checkpoint 151 RestoreOnErr(Box<OptimizedExpr>), 152 } 153 154 impl OptimizedExpr { 155 /// Returns a top-down iterator over the `OptimizedExpr`. iter_top_down(&self) -> OptimizedExprTopDownIterator156 pub fn iter_top_down(&self) -> OptimizedExprTopDownIterator { 157 OptimizedExprTopDownIterator::new(self) 158 } 159 160 /// Applies `f` to the `OptimizedExpr` top-down. map_top_down<F>(self, mut f: F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr,161 pub fn map_top_down<F>(self, mut f: F) -> OptimizedExpr 162 where 163 F: FnMut(OptimizedExpr) -> OptimizedExpr, 164 { 165 fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr 166 where 167 F: FnMut(OptimizedExpr) -> OptimizedExpr, 168 { 169 let expr = f(expr); 170 171 match expr { 172 OptimizedExpr::PosPred(expr) => { 173 let mapped = Box::new(map_internal(*expr, f)); 174 OptimizedExpr::PosPred(mapped) 175 } 176 OptimizedExpr::NegPred(expr) => { 177 let mapped = Box::new(map_internal(*expr, f)); 178 OptimizedExpr::NegPred(mapped) 179 } 180 OptimizedExpr::Seq(lhs, rhs) => { 181 let mapped_lhs = Box::new(map_internal(*lhs, f)); 182 let mapped_rhs = Box::new(map_internal(*rhs, f)); 183 OptimizedExpr::Seq(mapped_lhs, mapped_rhs) 184 } 185 OptimizedExpr::Choice(lhs, rhs) => { 186 let mapped_lhs = Box::new(map_internal(*lhs, f)); 187 let mapped_rhs = Box::new(map_internal(*rhs, f)); 188 OptimizedExpr::Choice(mapped_lhs, mapped_rhs) 189 } 190 OptimizedExpr::Rep(expr) => { 191 let mapped = Box::new(map_internal(*expr, f)); 192 OptimizedExpr::Rep(mapped) 193 } 194 OptimizedExpr::Opt(expr) => { 195 let mapped = Box::new(map_internal(*expr, f)); 196 OptimizedExpr::Opt(mapped) 197 } 198 OptimizedExpr::Push(expr) => { 199 let mapped = Box::new(map_internal(*expr, f)); 200 OptimizedExpr::Push(mapped) 201 } 202 expr => expr, 203 } 204 } 205 206 map_internal(self, &mut f) 207 } 208 209 /// Applies `f` to the `OptimizedExpr` bottom-up. map_bottom_up<F>(self, mut f: F) -> OptimizedExpr where F: FnMut(OptimizedExpr) -> OptimizedExpr,210 pub fn map_bottom_up<F>(self, mut f: F) -> OptimizedExpr 211 where 212 F: FnMut(OptimizedExpr) -> OptimizedExpr, 213 { 214 fn map_internal<F>(expr: OptimizedExpr, f: &mut F) -> OptimizedExpr 215 where 216 F: FnMut(OptimizedExpr) -> OptimizedExpr, 217 { 218 let mapped = match expr { 219 OptimizedExpr::PosPred(expr) => { 220 let mapped = Box::new(map_internal(*expr, f)); 221 OptimizedExpr::PosPred(mapped) 222 } 223 OptimizedExpr::NegPred(expr) => { 224 let mapped = Box::new(map_internal(*expr, f)); 225 OptimizedExpr::NegPred(mapped) 226 } 227 OptimizedExpr::Seq(lhs, rhs) => { 228 let mapped_lhs = Box::new(map_internal(*lhs, f)); 229 let mapped_rhs = Box::new(map_internal(*rhs, f)); 230 OptimizedExpr::Seq(mapped_lhs, mapped_rhs) 231 } 232 OptimizedExpr::Choice(lhs, rhs) => { 233 let mapped_lhs = Box::new(map_internal(*lhs, f)); 234 let mapped_rhs = Box::new(map_internal(*rhs, f)); 235 OptimizedExpr::Choice(mapped_lhs, mapped_rhs) 236 } 237 OptimizedExpr::Rep(expr) => { 238 let mapped = Box::new(map_internal(*expr, f)); 239 OptimizedExpr::Rep(mapped) 240 } 241 OptimizedExpr::Opt(expr) => { 242 let mapped = Box::new(map_internal(*expr, f)); 243 OptimizedExpr::Opt(mapped) 244 } 245 OptimizedExpr::Push(expr) => { 246 let mapped = Box::new(map_internal(*expr, f)); 247 OptimizedExpr::Push(mapped) 248 } 249 expr => expr, 250 }; 251 252 f(mapped) 253 } 254 255 map_internal(self, &mut f) 256 } 257 } 258 259 impl core::fmt::Display for OptimizedExpr { fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result260 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { 261 match self { 262 OptimizedExpr::Str(s) => write!(f, "{:?}", s), 263 OptimizedExpr::Insens(s) => write!(f, "^{:?}", s), 264 OptimizedExpr::Range(start, end) => { 265 let start = start.chars().next().expect("Empty range start."); 266 let end = end.chars().next().expect("Empty range end."); 267 write!(f, "({:?}..{:?})", start, end) 268 } 269 OptimizedExpr::Ident(id) => write!(f, "{}", id), 270 OptimizedExpr::PeekSlice(start, end) => match end { 271 Some(end) => write!(f, "PEEK[{}..{}]", start, end), 272 None => write!(f, "PEEK[{}..]", start), 273 }, 274 OptimizedExpr::PosPred(expr) => write!(f, "&{}", expr.as_ref()), 275 OptimizedExpr::NegPred(expr) => write!(f, "!{}", expr.as_ref()), 276 OptimizedExpr::Seq(lhs, rhs) => { 277 let mut nodes = Vec::new(); 278 nodes.push(lhs); 279 let mut current = rhs; 280 while let OptimizedExpr::Seq(lhs, rhs) = current.as_ref() { 281 nodes.push(lhs); 282 current = rhs; 283 } 284 nodes.push(current); 285 let sequence = nodes 286 .iter() 287 .map(|node| format!("{}", node)) 288 .collect::<Vec<_>>() 289 .join(" ~ "); 290 write!(f, "({})", sequence) 291 } 292 OptimizedExpr::Choice(lhs, rhs) => { 293 let mut nodes = Vec::new(); 294 nodes.push(lhs); 295 let mut current = rhs; 296 while let OptimizedExpr::Choice(lhs, rhs) = current.as_ref() { 297 nodes.push(lhs); 298 current = rhs; 299 } 300 nodes.push(current); 301 let sequence = nodes 302 .iter() 303 .map(|node| format!("{}", node)) 304 .collect::<Vec<_>>() 305 .join(" | "); 306 write!(f, "({})", sequence) 307 } 308 OptimizedExpr::Opt(expr) => write!(f, "{}?", expr), 309 OptimizedExpr::Rep(expr) => write!(f, "{}*", expr), 310 #[cfg(feature = "grammar-extras")] 311 OptimizedExpr::RepOnce(expr) => write!(f, "{}+", expr), 312 OptimizedExpr::Skip(strings) => { 313 let strings = strings 314 .iter() 315 .map(|s| format!("{:?}", s)) 316 .collect::<Vec<_>>() 317 .join(" | "); 318 write!(f, "(!({}) ~ ANY)*", strings) 319 } 320 OptimizedExpr::Push(expr) => write!(f, "PUSH({})", expr), 321 #[cfg(feature = "grammar-extras")] 322 OptimizedExpr::NodeTag(expr, tag) => { 323 write!(f, "(#{} = {})", tag, expr) 324 } 325 OptimizedExpr::RestoreOnErr(expr) => core::fmt::Display::fmt(expr.as_ref(), f), 326 } 327 } 328 } 329 330 /// A top-down iterator over an `OptimizedExpr`. 331 pub struct OptimizedExprTopDownIterator { 332 current: Option<OptimizedExpr>, 333 next: Option<OptimizedExpr>, 334 right_branches: Vec<OptimizedExpr>, 335 } 336 337 impl OptimizedExprTopDownIterator { 338 /// Creates a new top down iterator from an `OptimizedExpr`. new(expr: &OptimizedExpr) -> Self339 pub fn new(expr: &OptimizedExpr) -> Self { 340 let mut iter = OptimizedExprTopDownIterator { 341 current: None, 342 next: None, 343 right_branches: vec![], 344 }; 345 iter.iterate_expr(expr.clone()); 346 iter 347 } 348 iterate_expr(&mut self, expr: OptimizedExpr)349 fn iterate_expr(&mut self, expr: OptimizedExpr) { 350 self.current = Some(expr.clone()); 351 match expr { 352 OptimizedExpr::Seq(lhs, rhs) => { 353 self.right_branches.push(*rhs); 354 self.next = Some(*lhs); 355 } 356 OptimizedExpr::Choice(lhs, rhs) => { 357 self.right_branches.push(*rhs); 358 self.next = Some(*lhs); 359 } 360 OptimizedExpr::PosPred(expr) 361 | OptimizedExpr::NegPred(expr) 362 | OptimizedExpr::Rep(expr) 363 | OptimizedExpr::Opt(expr) 364 | OptimizedExpr::Push(expr) => { 365 self.next = Some(*expr); 366 } 367 _ => { 368 self.next = None; 369 } 370 } 371 } 372 } 373 374 impl Iterator for OptimizedExprTopDownIterator { 375 type Item = OptimizedExpr; 376 next(&mut self) -> Option<Self::Item>377 fn next(&mut self) -> Option<Self::Item> { 378 let result = self.current.take(); 379 380 if let Some(expr) = self.next.take() { 381 self.iterate_expr(expr); 382 } else if let Some(expr) = self.right_branches.pop() { 383 self.iterate_expr(expr); 384 } 385 386 result 387 } 388 } 389 390 #[cfg(test)] 391 mod tests { 392 use super::*; 393 394 #[test] rotate()395 fn rotate() { 396 let rules = { 397 use crate::ast::Expr::*; 398 vec![Rule { 399 name: "rule".to_owned(), 400 ty: RuleType::Normal, 401 expr: box_tree!(Choice( 402 Choice( 403 Choice(Str(String::from("a")), Str(String::from("b"))), 404 Str(String::from("c")) 405 ), 406 Str(String::from("d")) 407 )), 408 }] 409 }; 410 let rotated = { 411 use crate::optimizer::OptimizedExpr::*; 412 vec![OptimizedRule { 413 name: "rule".to_owned(), 414 ty: RuleType::Normal, 415 expr: box_tree!(Choice( 416 Str(String::from("a")), 417 Choice( 418 Str(String::from("b")), 419 Choice(Str(String::from("c")), Str(String::from("d"))) 420 ) 421 )), 422 }] 423 }; 424 425 assert_eq!(optimize(rules), rotated); 426 } 427 428 #[test] skip()429 fn skip() { 430 let rules = { 431 use crate::ast::Expr::*; 432 vec![Rule { 433 name: "rule".to_owned(), 434 ty: RuleType::Atomic, 435 expr: box_tree!(Rep(Seq( 436 NegPred(Choice(Str(String::from("a")), Str(String::from("b")))), 437 Ident(String::from("ANY")) 438 ))), 439 }] 440 }; 441 let skipped = vec![OptimizedRule { 442 name: "rule".to_owned(), 443 ty: RuleType::Atomic, 444 expr: OptimizedExpr::Skip(vec![String::from("a"), String::from("b")]), 445 }]; 446 447 assert_eq!(optimize(rules), skipped); 448 } 449 450 #[test] concat_strings()451 fn concat_strings() { 452 let rules = { 453 use crate::ast::Expr::*; 454 vec![Rule { 455 name: "rule".to_owned(), 456 ty: RuleType::Atomic, 457 expr: box_tree!(Seq( 458 Seq(Str(String::from("a")), Str(String::from("b"))), 459 Seq(Str(String::from("c")), Str(String::from("d"))) 460 )), 461 }] 462 }; 463 let concatenated = vec![OptimizedRule { 464 name: "rule".to_owned(), 465 ty: RuleType::Atomic, 466 expr: OptimizedExpr::Str(String::from("abcd")), 467 }]; 468 469 assert_eq!(optimize(rules), concatenated); 470 } 471 472 #[test] unroll_loop_exact()473 fn unroll_loop_exact() { 474 let rules = vec![Rule { 475 name: "rule".to_owned(), 476 ty: RuleType::Atomic, 477 expr: Expr::RepExact(Box::new(Expr::Ident(String::from("a"))), 3), 478 }]; 479 let unrolled = { 480 use crate::optimizer::OptimizedExpr::*; 481 vec![OptimizedRule { 482 name: "rule".to_owned(), 483 ty: RuleType::Atomic, 484 expr: box_tree!(Seq( 485 Ident(String::from("a")), 486 Seq(Ident(String::from("a")), Ident(String::from("a"))) 487 )), 488 }] 489 }; 490 491 assert_eq!(optimize(rules), unrolled); 492 } 493 494 #[test] unroll_loop_max()495 fn unroll_loop_max() { 496 let rules = vec![Rule { 497 name: "rule".to_owned(), 498 ty: RuleType::Atomic, 499 expr: Expr::RepMax(Box::new(Expr::Str("a".to_owned())), 3), 500 }]; 501 let unrolled = { 502 use crate::optimizer::OptimizedExpr::*; 503 vec![OptimizedRule { 504 name: "rule".to_owned(), 505 ty: RuleType::Atomic, 506 expr: box_tree!(Seq( 507 Opt(Str(String::from("a"))), 508 Seq(Opt(Str(String::from("a"))), Opt(Str(String::from("a")))) 509 )), 510 }] 511 }; 512 513 assert_eq!(optimize(rules), unrolled); 514 } 515 516 #[test] unroll_loop_min()517 fn unroll_loop_min() { 518 let rules = vec![Rule { 519 name: "rule".to_owned(), 520 ty: RuleType::Atomic, 521 expr: Expr::RepMin(Box::new(Expr::Str("a".to_owned())), 2), 522 }]; 523 let unrolled = { 524 use crate::optimizer::OptimizedExpr::*; 525 vec![OptimizedRule { 526 name: "rule".to_owned(), 527 ty: RuleType::Atomic, 528 expr: box_tree!(Seq( 529 Str(String::from("a")), 530 Seq(Str(String::from("a")), Rep(Str(String::from("a")))) 531 )), 532 }] 533 }; 534 535 assert_eq!(optimize(rules), unrolled); 536 } 537 538 #[test] unroll_loop_min_max()539 fn unroll_loop_min_max() { 540 let rules = vec![Rule { 541 name: "rule".to_owned(), 542 ty: RuleType::Atomic, 543 expr: Expr::RepMinMax(Box::new(Expr::Str("a".to_owned())), 2, 3), 544 }]; 545 let unrolled = { 546 use crate::optimizer::OptimizedExpr::*; 547 vec![OptimizedRule { 548 name: "rule".to_owned(), 549 ty: RuleType::Atomic, 550 /* TODO possible room for improvement here: 551 * if the sequences were rolled out in the opposite 552 * order, we could further optimize the strings 553 * in cases like this. 554 Str(String::from(("aa")), 555 Opt(Str(String::from("a"))) 556 */ 557 expr: box_tree!(Seq( 558 Str(String::from("a")), 559 Seq(Str(String::from("a")), Opt(Str(String::from("a")))) 560 )), 561 }] 562 }; 563 564 assert_eq!(optimize(rules), unrolled); 565 } 566 567 #[test] concat_insensitive_strings()568 fn concat_insensitive_strings() { 569 let rules = { 570 use crate::ast::Expr::*; 571 vec![Rule { 572 name: "rule".to_owned(), 573 ty: RuleType::Atomic, 574 expr: box_tree!(Seq( 575 Seq(Insens(String::from("a")), Insens(String::from("b"))), 576 Seq(Insens(String::from("c")), Insens(String::from("d"))) 577 )), 578 }] 579 }; 580 let concatenated = vec![OptimizedRule { 581 name: "rule".to_owned(), 582 ty: RuleType::Atomic, 583 expr: OptimizedExpr::Insens(String::from("abcd")), 584 }]; 585 586 assert_eq!(optimize(rules), concatenated); 587 } 588 589 #[test] long_common_sequence()590 fn long_common_sequence() { 591 let rules = { 592 use crate::ast::Expr::*; 593 vec![Rule { 594 name: "rule".to_owned(), 595 ty: RuleType::Silent, 596 expr: box_tree!(Choice( 597 Seq( 598 Ident(String::from("a")), 599 Seq(Ident(String::from("b")), Ident(String::from("c"))) 600 ), 601 Seq( 602 Seq(Ident(String::from("a")), Ident(String::from("b"))), 603 Ident(String::from("d")) 604 ) 605 )), 606 }] 607 }; 608 let optimized = { 609 use crate::optimizer::OptimizedExpr::*; 610 vec![OptimizedRule { 611 name: "rule".to_owned(), 612 ty: RuleType::Silent, 613 expr: box_tree!(Seq( 614 Ident(String::from("a")), 615 Seq( 616 Ident(String::from("b")), 617 Choice(Ident(String::from("c")), Ident(String::from("d"))) 618 ) 619 )), 620 }] 621 }; 622 623 assert_eq!(optimize(rules), optimized); 624 } 625 626 #[test] short_common_sequence()627 fn short_common_sequence() { 628 let rules = { 629 use crate::ast::Expr::*; 630 vec![Rule { 631 name: "rule".to_owned(), 632 ty: RuleType::Atomic, 633 expr: box_tree!(Choice( 634 Seq(Ident(String::from("a")), Ident(String::from("b"))), 635 Ident(String::from("a")) 636 )), 637 }] 638 }; 639 let optimized = { 640 use crate::optimizer::OptimizedExpr::*; 641 vec![OptimizedRule { 642 name: "rule".to_owned(), 643 ty: RuleType::Atomic, 644 expr: box_tree!(Seq(Ident(String::from("a")), Opt(Ident(String::from("b"))))), 645 }] 646 }; 647 648 assert_eq!(optimize(rules), optimized); 649 } 650 651 #[test] impossible_common_sequence()652 fn impossible_common_sequence() { 653 let rules = { 654 use crate::ast::Expr::*; 655 vec![Rule { 656 name: "rule".to_owned(), 657 ty: RuleType::Silent, 658 expr: box_tree!(Choice( 659 Ident(String::from("a")), 660 Seq(Ident(String::from("a")), Ident(String::from("b"))) 661 )), 662 }] 663 }; 664 let optimized = { 665 use crate::optimizer::OptimizedExpr::*; 666 vec![OptimizedRule { 667 name: "rule".to_owned(), 668 ty: RuleType::Silent, 669 expr: box_tree!(Ident(String::from("a"))), 670 }] 671 }; 672 673 assert_eq!(optimize(rules), optimized); 674 } 675 676 #[test] lister()677 fn lister() { 678 let rules = { 679 use crate::ast::Expr::*; 680 vec![Rule { 681 name: "rule".to_owned(), 682 ty: RuleType::Silent, 683 expr: box_tree!(Seq( 684 Rep(Seq(Ident(String::from("a")), Ident(String::from("b")))), 685 Ident(String::from("a")) 686 )), 687 }] 688 }; 689 let optimized = { 690 use crate::optimizer::OptimizedExpr::*; 691 vec![OptimizedRule { 692 name: "rule".to_owned(), 693 ty: RuleType::Silent, 694 expr: box_tree!(Seq( 695 Ident(String::from("a")), 696 Rep(Seq(Ident(String::from("b")), Ident(String::from("a")))) 697 )), 698 }] 699 }; 700 701 assert_eq!(optimize(rules), optimized); 702 } 703 704 mod display { 705 use super::super::*; 706 /// In previous implementation of Display for OptimizedExpr 707 /// in commit 48e0a8bd3d43a17c1c78f099610b745d18ec0c5f (actually committed by me), 708 /// Str("\n") will be displayed as 709 /// " 710 /// " 711 /// 712 /// It will not break the compilation in normal use. 713 /// 714 /// But when I use it in automatically generating documents, 715 /// it will quite confusing and we'll be unable to distinguish \n and \r. 716 /// 717 /// And `cargo expand` will emit codes that can't be compiled, 718 /// for it expand `#[doc("...")]` to `/// ...`, 719 /// and when the document comment breaks the line, 720 /// it will be expanded into wrong codes. 721 #[test] control_character()722 fn control_character() { 723 assert_eq!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\\n\""); 724 assert_eq!( 725 OptimizedExpr::Insens("\n".to_owned()).to_string(), 726 "^\"\\n\"", 727 ); 728 assert_eq!( 729 OptimizedExpr::Range("\n".to_owned(), "\r".to_owned()).to_string(), 730 "('\\n'..'\\r')", 731 ); 732 assert_eq!( 733 OptimizedExpr::Skip(vec![ 734 "\n".to_owned(), 735 "\r".to_owned(), 736 "\n\r".to_owned(), 737 "\0".to_owned(), 738 ]) 739 .to_string(), 740 r#"(!("\n" | "\r" | "\n\r" | "\0") ~ ANY)*"#, 741 ); 742 743 assert_ne!(OptimizedExpr::Str("\n".to_owned()).to_string(), "\"\n\""); 744 } 745 746 #[test] str()747 fn str() { 748 assert_eq!(OptimizedExpr::Str("a".to_owned()).to_string(), r#""a""#); 749 } 750 751 #[test] insens()752 fn insens() { 753 assert_eq!(OptimizedExpr::Insens("a".to_owned()).to_string(), r#"^"a""#); 754 } 755 756 #[test] range()757 fn range() { 758 assert_eq!( 759 OptimizedExpr::Range("a".to_owned(), "z".to_owned()).to_string(), 760 r#"('a'..'z')"#, 761 ); 762 } 763 764 #[test] ident()765 fn ident() { 766 assert_eq!(OptimizedExpr::Ident("a".to_owned()).to_string(), r#"a"#); 767 } 768 769 #[test] peek_slice()770 fn peek_slice() { 771 assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]"); 772 assert_eq!( 773 OptimizedExpr::PeekSlice(0, Some(-1)).to_string(), 774 "PEEK[0..-1]", 775 ); 776 assert_eq!( 777 OptimizedExpr::PeekSlice(2, Some(3)).to_string(), 778 "PEEK[2..3]", 779 ); 780 assert_eq!( 781 OptimizedExpr::PeekSlice(2, Some(-1)).to_string(), 782 "PEEK[2..-1]", 783 ); 784 assert_eq!(OptimizedExpr::PeekSlice(0, None).to_string(), "PEEK[0..]"); 785 } 786 787 #[test] pos_pred()788 fn pos_pred() { 789 assert_eq!( 790 OptimizedExpr::PosPred(Box::new(OptimizedExpr::NegPred(Box::new( 791 OptimizedExpr::Ident("a".to_owned()), 792 )))) 793 .to_string(), 794 "&!a", 795 ); 796 assert_eq!( 797 OptimizedExpr::PosPred(Box::new(OptimizedExpr::Choice( 798 Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident( 799 "a".to_owned(), 800 )))), 801 Box::new(OptimizedExpr::Str("a".to_owned())), 802 ))) 803 .to_string(), 804 r#"&(a* | "a")"#, 805 ); 806 assert_eq!( 807 OptimizedExpr::PosPred(Box::new(OptimizedExpr::RestoreOnErr(Box::new( 808 OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("a".to_owned()))), 809 )))) 810 .to_string(), 811 "&!a", 812 ); 813 } 814 815 #[test] neg_pred()816 fn neg_pred() { 817 assert_eq!( 818 OptimizedExpr::NegPred(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(), 819 r#"!e"#, 820 ); 821 assert_eq!( 822 OptimizedExpr::NegPred(Box::new(OptimizedExpr::Choice( 823 Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident( 824 "a".to_owned(), 825 )))), 826 Box::new(OptimizedExpr::Str("a".to_owned())), 827 ))) 828 .to_string(), 829 r#"!(PUSH(a) | "a")"#, 830 ); 831 } 832 833 #[test] seq()834 fn seq() { 835 assert_eq!( 836 OptimizedExpr::Seq( 837 Box::new(OptimizedExpr::Ident("e1".to_owned())), 838 Box::new(OptimizedExpr::Ident("e2".to_owned())), 839 ) 840 .to_string(), 841 r#"(e1 ~ e2)"#, 842 ); 843 assert_eq!( 844 Expr::Seq( 845 Box::new(Expr::Ident("e1".to_owned())), 846 Box::new(Expr::Seq( 847 Box::new(Expr::Ident("e2".to_owned())), 848 Box::new(Expr::Ident("e3".to_owned())), 849 )), 850 ) 851 .to_string(), 852 "(e1 ~ e2 ~ e3)", 853 ); 854 assert_eq!( 855 Expr::Seq( 856 Box::new(Expr::Ident("e1".to_owned())), 857 Box::new(Expr::Seq( 858 Box::new(Expr::Ident("e2".to_owned())), 859 Box::new(Expr::Seq( 860 Box::new(Expr::Ident("e3".to_owned())), 861 Box::new(Expr::Ident("e4".to_owned())), 862 )), 863 )), 864 ) 865 .to_string(), 866 "(e1 ~ e2 ~ e3 ~ e4)", 867 ); 868 assert_eq!( 869 Expr::Seq( 870 Box::new(Expr::Ident("e1".to_owned())), 871 Box::new(Expr::Choice( 872 Box::new(Expr::Ident("e2".to_owned())), 873 Box::new(Expr::Seq( 874 Box::new(Expr::Ident("e3".to_owned())), 875 Box::new(Expr::Ident("e4".to_owned())), 876 )), 877 )), 878 ) 879 .to_string(), 880 "(e1 ~ (e2 | (e3 ~ e4)))", 881 ); 882 assert_eq!( 883 Expr::Seq( 884 Box::new(Expr::Ident("e1".to_owned())), 885 Box::new(Expr::Seq( 886 Box::new(Expr::Ident("e2".to_owned())), 887 Box::new(Expr::Choice( 888 Box::new(Expr::Ident("e3".to_owned())), 889 Box::new(Expr::Ident("e4".to_owned())), 890 )), 891 )), 892 ) 893 .to_string(), 894 "(e1 ~ e2 ~ (e3 | e4))", 895 ); 896 assert_eq!( 897 OptimizedExpr::Seq( 898 Box::new(OptimizedExpr::Rep(Box::new(OptimizedExpr::Str( 899 "a".to_owned(), 900 )))), 901 Box::new(OptimizedExpr::Seq( 902 Box::new(OptimizedExpr::Ident("b".to_owned())), 903 Box::new(OptimizedExpr::Insens("c".to_owned())), 904 )), 905 ) 906 .to_string(), 907 r#"("a"* ~ b ~ ^"c")"#, 908 ); 909 assert_eq!( 910 OptimizedExpr::Seq( 911 Box::new(OptimizedExpr::PosPred(Box::new(OptimizedExpr::Range( 912 "a".to_owned(), 913 "z".to_owned(), 914 )))), 915 Box::new(OptimizedExpr::NegPred(Box::new(OptimizedExpr::Opt( 916 Box::new(OptimizedExpr::Range("A".to_owned(), "Z".to_owned())), 917 )))), 918 ) 919 .to_string(), 920 "(&('a'..'z') ~ !('A'..'Z')?)", 921 ); 922 } 923 924 #[test] choice()925 fn choice() { 926 assert_eq!( 927 OptimizedExpr::Choice( 928 Box::new(OptimizedExpr::Ident("e1".to_owned())), 929 Box::new(OptimizedExpr::Ident("e2".to_owned())), 930 ) 931 .to_string(), 932 r#"(e1 | e2)"#, 933 ); 934 assert_eq!( 935 Expr::Choice( 936 Box::new(Expr::Ident("e1".to_owned())), 937 Box::new(Expr::Choice( 938 Box::new(Expr::Ident("e2".to_owned())), 939 Box::new(Expr::Ident("e3".to_owned())), 940 )), 941 ) 942 .to_string(), 943 "(e1 | e2 | e3)", 944 ); 945 assert_eq!( 946 Expr::Choice( 947 Box::new(Expr::Ident("e1".to_owned())), 948 Box::new(Expr::Choice( 949 Box::new(Expr::Ident("e2".to_owned())), 950 Box::new(Expr::Choice( 951 Box::new(Expr::Ident("e3".to_owned())), 952 Box::new(Expr::Ident("e4".to_owned())), 953 )), 954 )), 955 ) 956 .to_string(), 957 "(e1 | e2 | e3 | e4)", 958 ); 959 assert_eq!( 960 Expr::Choice( 961 Box::new(Expr::Ident("e1".to_owned())), 962 Box::new(Expr::Seq( 963 Box::new(Expr::Ident("e2".to_owned())), 964 Box::new(Expr::Choice( 965 Box::new(Expr::Ident("e3".to_owned())), 966 Box::new(Expr::Ident("e4".to_owned())), 967 )), 968 )), 969 ) 970 .to_string(), 971 "(e1 | (e2 ~ (e3 | e4)))", 972 ); 973 assert_eq!( 974 OptimizedExpr::Choice( 975 Box::new(OptimizedExpr::Str("a".to_owned())), 976 Box::new(OptimizedExpr::Choice( 977 Box::new(OptimizedExpr::Push(Box::new(OptimizedExpr::Ident( 978 "b".to_owned(), 979 )))), 980 Box::new(OptimizedExpr::Insens("c".to_owned())), 981 )), 982 ) 983 .to_string(), 984 r#"("a" | PUSH(b) | ^"c")"#, 985 ); 986 } 987 988 #[test] opt()989 fn opt() { 990 assert_eq!( 991 OptimizedExpr::Opt(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(), 992 "e?", 993 ); 994 } 995 996 #[test] rep()997 fn rep() { 998 assert_eq!( 999 OptimizedExpr::Rep(Box::new(OptimizedExpr::Ident("x".to_owned()))).to_string(), 1000 "x*", 1001 ); 1002 assert_eq!( 1003 OptimizedExpr::Rep(Box::new(OptimizedExpr::Range( 1004 "0".to_owned(), 1005 "9".to_owned(), 1006 ))) 1007 .to_string(), 1008 "('0'..'9')*", 1009 ); 1010 } 1011 1012 #[test] 1013 #[cfg(feature = "grammar-extras")] rep_once()1014 fn rep_once() { 1015 assert_eq!( 1016 OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(), 1017 "e+", 1018 ); 1019 assert_eq!( 1020 OptimizedExpr::RepOnce(Box::new(OptimizedExpr::Range( 1021 "0".to_owned(), 1022 "9".to_owned(), 1023 ))) 1024 .to_string(), 1025 "('0'..'9')+", 1026 ); 1027 } 1028 1029 #[test] skip()1030 fn skip() { 1031 assert_eq!( 1032 OptimizedExpr::Skip( 1033 ["a", "bc"] 1034 .into_iter() 1035 .map(|s| s.to_owned()) 1036 .collect::<Vec<_>>(), 1037 ) 1038 .to_string(), 1039 r#"(!("a" | "bc") ~ ANY)*"#, 1040 ); 1041 } 1042 1043 #[test] push()1044 fn push() { 1045 assert_eq!( 1046 OptimizedExpr::Push(Box::new(OptimizedExpr::Ident("e".to_owned()))).to_string(), 1047 "PUSH(e)", 1048 ); 1049 } 1050 1051 #[test] 1052 #[cfg(feature = "grammar-extras")] node_tag()1053 fn node_tag() { 1054 assert_eq!( 1055 OptimizedExpr::NodeTag( 1056 Box::new(OptimizedExpr::Ident("expr".to_owned())), 1057 "label".to_owned(), 1058 ) 1059 .to_string(), 1060 r#"(#label = expr)"#, 1061 ); 1062 assert_eq!( 1063 OptimizedExpr::NodeTag( 1064 Box::new(OptimizedExpr::Ident("x".to_owned())), 1065 "X".to_owned(), 1066 ) 1067 .to_string(), 1068 r#"(#X = x)"#, 1069 ); 1070 assert_eq!( 1071 OptimizedExpr::NodeTag( 1072 Box::new(OptimizedExpr::Seq( 1073 Box::new(OptimizedExpr::Ident("x".to_owned())), 1074 Box::new(OptimizedExpr::Str("y".to_owned())), 1075 )), 1076 "X".to_owned(), 1077 ) 1078 .to_string(), 1079 r#"(#X = (x ~ "y"))"#, 1080 ); 1081 } 1082 1083 #[test] restore_on_err()1084 fn restore_on_err() { 1085 assert_eq!( 1086 OptimizedExpr::RestoreOnErr(Box::new(OptimizedExpr::Ident("e".to_owned()))) 1087 .to_string(), 1088 "e", 1089 ); 1090 } 1091 } 1092 } 1093