1 //! Module containing regex parsers on streams returning ranges of `&str` or `&[u8]`.
2 //!
3 //! All regex parsers are overloaded on `&str` and `&[u8]` ranges and can take a `Regex` by value
4 //! or shared reference (`&`).
5 //!
6 //! Enabled using the `regex` feature (for `regex-0.2`) or the `regex-1` feature for `regex-1.0`.
7 //!
8 //! ```
9 //! use once_cell::sync::Lazy;
10 //! use regex::{bytes, Regex};
11 //! use combine::Parser;
12 //! use combine::parser::regex::{find_many, match_};
13 //!
14 //! fn main() {
15 //!     let regex = bytes::Regex::new("[0-9]+").unwrap();
16 //!     // Shared references to any regex works as well
17 //!     assert_eq!(
18 //!         find_many(&regex).parse(&b"123 456 "[..]),
19 //!         Ok((vec![&b"123"[..], &b"456"[..]], &b" "[..]))
20 //!     );
21 //!     assert_eq!(
22 //!         find_many(regex).parse(&b""[..]),
23 //!         Ok((vec![], &b""[..]))
24 //!     );
25 //!
26 //!     static REGEX: Lazy<Regex> = Lazy::new(|| Regex::new("[:alpha:]+").unwrap());
27 //!     assert_eq!(
28 //!         match_(&*REGEX).parse("abc123"),
29 //!         Ok(("abc123", "abc123"))
30 //!     );
31 //! }
32 //! ```
33 
34 use std::{iter::FromIterator, marker::PhantomData};
35 
36 use crate::{
37     error::{
38         ParseError,
39         ParseResult::{self, *},
40         StreamError, Tracked,
41     },
42     parser::range::take,
43     stream::{RangeStream, StreamOnce},
44     Parser,
45 };
46 
47 struct First<T>(Option<T>);
48 
49 impl<A> FromIterator<A> for First<A> {
from_iter<T>(iter: T) -> Self where T: IntoIterator<Item = A>,50     fn from_iter<T>(iter: T) -> Self
51     where
52         T: IntoIterator<Item = A>,
53     {
54         First(iter.into_iter().next())
55     }
56 }
57 
58 pub trait MatchFind {
59     type Range;
end(&self) -> usize60     fn end(&self) -> usize;
as_match(&self) -> Self::Range61     fn as_match(&self) -> Self::Range;
62 }
63 
64 pub trait Regex<Range> {
is_match(&self, range: Range) -> bool65     fn is_match(&self, range: Range) -> bool;
find_iter<F>(&self, range: Range) -> (usize, F) where F: FromIterator<Range>66     fn find_iter<F>(&self, range: Range) -> (usize, F)
67     where
68         F: FromIterator<Range>;
captures<F, G>(&self, range: Range) -> (usize, G) where F: FromIterator<Range>, G: FromIterator<F>69     fn captures<F, G>(&self, range: Range) -> (usize, G)
70     where
71         F: FromIterator<Range>,
72         G: FromIterator<F>;
as_str(&self) -> &str73     fn as_str(&self) -> &str;
74 }
75 
76 impl<'a, R, Range> Regex<Range> for &'a R
77 where
78     R: Regex<Range>,
79 {
is_match(&self, range: Range) -> bool80     fn is_match(&self, range: Range) -> bool {
81         (**self).is_match(range)
82     }
find_iter<F>(&self, range: Range) -> (usize, F) where F: FromIterator<Range>,83     fn find_iter<F>(&self, range: Range) -> (usize, F)
84     where
85         F: FromIterator<Range>,
86     {
87         (**self).find_iter(range)
88     }
captures<F, G>(&self, range: Range) -> (usize, G) where F: FromIterator<Range>, G: FromIterator<F>,89     fn captures<F, G>(&self, range: Range) -> (usize, G)
90     where
91         F: FromIterator<Range>,
92         G: FromIterator<F>,
93     {
94         (**self).captures(range)
95     }
as_str(&self) -> &str96     fn as_str(&self) -> &str {
97         (**self).as_str()
98     }
99 }
100 
find_iter<'a, Input, F>(iterable: Input) -> (usize, F) where Input: IntoIterator, Input::Item: MatchFind, F: FromIterator<<Input::Item as MatchFind>::Range>,101 fn find_iter<'a, Input, F>(iterable: Input) -> (usize, F)
102 where
103     Input: IntoIterator,
104     Input::Item: MatchFind,
105     F: FromIterator<<Input::Item as MatchFind>::Range>,
106 {
107     let mut end = 0;
108     let value = iterable
109         .into_iter()
110         .map(|m| {
111             end = m.end();
112             m.as_match()
113         })
114         .collect();
115     (end, value)
116 }
117 
118 #[cfg(feature = "regex")]
119 mod regex {
120     pub extern crate regex;
121 
122     use std::iter::FromIterator;
123 
124     use super::{find_iter, MatchFind, Regex};
125 
126     pub use self::regex::*;
127 
128     impl<'t> MatchFind for regex::Match<'t> {
129         type Range = &'t str;
end(&self) -> usize130         fn end(&self) -> usize {
131             regex::Match::end(self)
132         }
as_match(&self) -> Self::Range133         fn as_match(&self) -> Self::Range {
134             self.as_str()
135         }
136     }
137 
138     impl<'t> MatchFind for regex::bytes::Match<'t> {
139         type Range = &'t [u8];
end(&self) -> usize140         fn end(&self) -> usize {
141             regex::bytes::Match::end(self)
142         }
as_match(&self) -> Self::Range143         fn as_match(&self) -> Self::Range {
144             self.as_bytes()
145         }
146     }
147 
148     impl<'a> Regex<&'a str> for regex::Regex {
is_match(&self, range: &'a str) -> bool149         fn is_match(&self, range: &'a str) -> bool {
150             regex::Regex::is_match(self, range)
151         }
find_iter<F>(&self, range: &'a str) -> (usize, F) where F: FromIterator<&'a str>,152         fn find_iter<F>(&self, range: &'a str) -> (usize, F)
153         where
154             F: FromIterator<&'a str>,
155         {
156             find_iter(regex::Regex::find_iter(self, range))
157         }
captures<F, G>(&self, range: &'a str) -> (usize, G) where F: FromIterator<&'a str>, G: FromIterator<F>,158         fn captures<F, G>(&self, range: &'a str) -> (usize, G)
159         where
160             F: FromIterator<&'a str>,
161             G: FromIterator<F>,
162         {
163             let mut end = 0;
164             let value = regex::Regex::captures_iter(self, range)
165                 .map(|captures| {
166                     let mut captures_iter = captures.iter();
167                     // The first group is the match on the entire regex
168                     let first_match = captures_iter.next().unwrap().unwrap();
169                     end = first_match.end();
170                     Some(Some(first_match))
171                         .into_iter()
172                         .chain(captures_iter)
173                         .filter_map(|match_| match_.map(|m| m.as_match()))
174                         .collect()
175                 })
176                 .collect();
177             (end, value)
178         }
as_str(&self) -> &str179         fn as_str(&self) -> &str {
180             regex::Regex::as_str(self)
181         }
182     }
183 
184     impl<'a> Regex<&'a [u8]> for regex::bytes::Regex {
is_match(&self, range: &'a [u8]) -> bool185         fn is_match(&self, range: &'a [u8]) -> bool {
186             regex::bytes::Regex::is_match(self, range)
187         }
find_iter<F>(&self, range: &'a [u8]) -> (usize, F) where F: FromIterator<&'a [u8]>,188         fn find_iter<F>(&self, range: &'a [u8]) -> (usize, F)
189         where
190             F: FromIterator<&'a [u8]>,
191         {
192             find_iter(regex::bytes::Regex::find_iter(self, range))
193         }
captures<F, G>(&self, range: &'a [u8]) -> (usize, G) where F: FromIterator<&'a [u8]>, G: FromIterator<F>,194         fn captures<F, G>(&self, range: &'a [u8]) -> (usize, G)
195         where
196             F: FromIterator<&'a [u8]>,
197             G: FromIterator<F>,
198         {
199             let mut end = 0;
200             let value = regex::bytes::Regex::captures_iter(self, range)
201                 .map(|captures| {
202                     let mut captures_iter = captures.iter();
203                     // The first group is the match on the entire regex
204                     let first_match = captures_iter.next().unwrap().unwrap();
205                     end = first_match.end();
206                     Some(Some(first_match))
207                         .into_iter()
208                         .chain(captures_iter)
209                         .filter_map(|match_| match_.map(|m| m.as_match()))
210                         .collect()
211                 })
212                 .collect();
213             (end, value)
214         }
as_str(&self) -> &str215         fn as_str(&self) -> &str {
216             regex::bytes::Regex::as_str(self)
217         }
218     }
219 }
220 
221 pub struct Match<R, Input>(R, PhantomData<Input>);
222 
223 impl<'a, Input, R> Parser<Input> for Match<R, Input>
224 where
225     R: Regex<Input::Range>,
226     Input: RangeStream,
227 {
228     type Output = Input::Range;
229     type PartialState = ();
230 
231     #[inline]
parse_lazy( &mut self, input: &mut Input, ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error>232     fn parse_lazy(
233         &mut self,
234         input: &mut Input,
235     ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error> {
236         if self.0.is_match(input.range()) {
237             PeekOk(input.range())
238         } else {
239             PeekErr(Input::Error::empty(input.position()).into())
240         }
241     }
add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>)242     fn add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>) {
243         error.error.add(StreamError::expected_format(format_args!(
244             "/{}/",
245             self.0.as_str()
246         )))
247     }
248 }
249 
250 /// Matches `regex` on the input returning the entire input if it matches.
251 /// Never consumes any input.
252 ///
253 /// ```
254 /// extern crate regex;
255 /// extern crate combine;
256 /// use regex::Regex;
257 /// use combine::Parser;
258 /// use combine::parser::regex::match_;
259 ///
260 /// fn main() {
261 ///     let regex = Regex::new("[:alpha:]+").unwrap();
262 ///     assert_eq!(
263 ///         match_(&regex).parse("abc123"),
264 ///         Ok(("abc123", "abc123"))
265 ///     );
266 /// }
267 /// ```
match_<R, Input>(regex: R) -> Match<R, Input> where R: Regex<Input::Range>, Input: RangeStream,268 pub fn match_<R, Input>(regex: R) -> Match<R, Input>
269 where
270     R: Regex<Input::Range>,
271     Input: RangeStream,
272 {
273     Match(regex, PhantomData)
274 }
275 
276 #[derive(Clone)]
277 pub struct Find<R, Input>(R, PhantomData<fn() -> Input>);
278 
279 impl<'a, Input, R> Parser<Input> for Find<R, Input>
280 where
281     R: Regex<Input::Range>,
282     Input: RangeStream,
283     Input::Range: crate::stream::Range,
284 {
285     type Output = Input::Range;
286     type PartialState = ();
287 
288     #[inline]
parse_lazy( &mut self, input: &mut Input, ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error>289     fn parse_lazy(
290         &mut self,
291         input: &mut Input,
292     ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error> {
293         let (end, First(value)) = self.0.find_iter(input.range());
294         match value {
295             Some(value) => take(end).parse_lazy(input).map(|_| value),
296             None => PeekErr(Input::Error::empty(input.position()).into()),
297         }
298     }
add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>)299     fn add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>) {
300         error.error.add(StreamError::expected_format(format_args!(
301             "/{}/",
302             self.0.as_str()
303         )))
304     }
305 }
306 
307 /// Matches `regex` on the input by running `find` on the input and returns the first match.
308 /// Consumes all input up until the end of the first match.
309 ///
310 /// ```
311 /// extern crate regex;
312 /// extern crate combine;
313 /// use regex::Regex;
314 /// use combine::Parser;
315 /// use combine::parser::regex::find;
316 ///
317 /// fn main() {
318 ///     let mut digits = find(Regex::new("^[0-9]+").unwrap());
319 ///     assert_eq!(digits.parse("123 456 "), Ok(("123", " 456 ")));
320 ///     assert!(
321 ///         digits.parse("abc 123 456 ").is_err());
322 ///
323 ///     let mut digits2 = find(Regex::new("[0-9]+").unwrap());
324 ///     assert_eq!(digits2.parse("123 456 "), Ok(("123", " 456 ")));
325 ///     assert_eq!(digits2.parse("abc 123 456 "), Ok(("123", " 456 ")));
326 /// }
327 /// ```
find<R, Input>(regex: R) -> Find<R, Input> where R: Regex<Input::Range>, Input: RangeStream, Input::Range: crate::stream::Range,328 pub fn find<R, Input>(regex: R) -> Find<R, Input>
329 where
330     R: Regex<Input::Range>,
331     Input: RangeStream,
332     Input::Range: crate::stream::Range,
333 {
334     Find(regex, PhantomData)
335 }
336 
337 #[derive(Clone)]
338 pub struct FindMany<F, R, Input>(R, PhantomData<fn() -> (Input, F)>);
339 
340 impl<'a, Input, F, R> Parser<Input> for FindMany<F, R, Input>
341 where
342     F: FromIterator<Input::Range>,
343     R: Regex<Input::Range>,
344     Input: RangeStream,
345     Input::Range: crate::stream::Range,
346 {
347     type Output = F;
348     type PartialState = ();
349 
350     #[inline]
parse_lazy( &mut self, input: &mut Input, ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error>351     fn parse_lazy(
352         &mut self,
353         input: &mut Input,
354     ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error> {
355         let (end, value) = self.0.find_iter(input.range());
356         take(end).parse_lazy(input).map(|_| value)
357     }
add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>)358     fn add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>) {
359         error.error.add(StreamError::expected_format(format_args!(
360             "/{}/",
361             self.0.as_str()
362         )))
363     }
364 }
365 
366 /// Matches `regex` on the input by running `find_iter` on the input.
367 /// Returns all matches in a `F: FromIterator<Input::Range>`.
368 /// Consumes all input up until the end of the last match.
369 ///
370 /// ```
371 /// extern crate regex;
372 /// extern crate combine;
373 /// use regex::Regex;
374 /// use regex::bytes;
375 /// use combine::Parser;
376 /// use combine::parser::regex::find_many;
377 ///
378 /// fn main() {
379 ///     let mut digits = find_many(Regex::new("[0-9]+").unwrap());
380 ///     assert_eq!(digits.parse("123 456 "), Ok((vec!["123", "456"], " ")));
381 ///     assert_eq!(digits.parse("abc 123 456 "), Ok((vec!["123", "456"], " ")));
382 ///     assert_eq!(digits.parse("abc"), Ok((vec![], "abc")));
383 /// }
384 /// ```
find_many<F, R, Input>(regex: R) -> FindMany<F, R, Input> where F: FromIterator<Input::Range>, R: Regex<Input::Range>, Input: RangeStream, Input::Range: crate::stream::Range,385 pub fn find_many<F, R, Input>(regex: R) -> FindMany<F, R, Input>
386 where
387     F: FromIterator<Input::Range>,
388     R: Regex<Input::Range>,
389     Input: RangeStream,
390     Input::Range: crate::stream::Range,
391 {
392     FindMany(regex, PhantomData)
393 }
394 
395 #[derive(Clone)]
396 pub struct Captures<F, R, Input>(R, PhantomData<fn() -> (Input, F)>);
397 
398 impl<'a, Input, F, R> Parser<Input> for Captures<F, R, Input>
399 where
400     F: FromIterator<Input::Range>,
401     R: Regex<Input::Range>,
402     Input: RangeStream,
403     Input::Range: crate::stream::Range,
404 {
405     type Output = F;
406     type PartialState = ();
407 
408     #[inline]
parse_lazy( &mut self, input: &mut Input, ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error>409     fn parse_lazy(
410         &mut self,
411         input: &mut Input,
412     ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error> {
413         let (end, First(value)) = self.0.captures(input.range());
414         match value {
415             Some(value) => take(end).parse_lazy(input).map(|_| value),
416             None => PeekErr(Input::Error::empty(input.position()).into()),
417         }
418     }
add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>)419     fn add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>) {
420         error.error.add(StreamError::expected_format(format_args!(
421             "/{}/",
422             self.0.as_str()
423         )))
424     }
425 }
426 
427 /// Matches `regex` on the input by running `captures_iter` on the input.
428 /// Returns the captures of the first match and consumes the input up until the end of that match.
429 ///
430 /// ```
431 /// extern crate regex;
432 /// extern crate combine;
433 /// use regex::Regex;
434 /// use combine::Parser;
435 /// use combine::parser::regex::captures;
436 ///
437 /// fn main() {
438 ///     let mut fields = captures(Regex::new("([a-z]+):([0-9]+)").unwrap());
439 ///     assert_eq!(
440 ///         fields.parse("test:123 field:456 "),
441 ///         Ok((vec!["test:123", "test", "123"],
442 ///             " field:456 "
443 ///         ))
444 ///     );
445 ///     assert_eq!(
446 ///         fields.parse("test:123 :456 "),
447 ///         Ok((vec!["test:123", "test", "123"],
448 ///             " :456 "
449 ///         ))
450 ///     );
451 /// }
452 /// ```
captures<F, R, Input>(regex: R) -> Captures<F, R, Input> where F: FromIterator<Input::Range>, R: Regex<Input::Range>, Input: RangeStream, Input::Range: crate::stream::Range,453 pub fn captures<F, R, Input>(regex: R) -> Captures<F, R, Input>
454 where
455     F: FromIterator<Input::Range>,
456     R: Regex<Input::Range>,
457     Input: RangeStream,
458     Input::Range: crate::stream::Range,
459 {
460     Captures(regex, PhantomData)
461 }
462 
463 #[derive(Clone)]
464 pub struct CapturesMany<F, G, R, Input>(R, PhantomData<fn() -> (Input, F, G)>);
465 
466 impl<'a, Input, F, G, R> Parser<Input> for CapturesMany<F, G, R, Input>
467 where
468     F: FromIterator<Input::Range>,
469     G: FromIterator<F>,
470     R: Regex<Input::Range>,
471     Input: RangeStream,
472     Input::Range: crate::stream::Range,
473 {
474     type Output = G;
475     type PartialState = ();
476 
477     #[inline]
parse_lazy( &mut self, input: &mut Input, ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error>478     fn parse_lazy(
479         &mut self,
480         input: &mut Input,
481     ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error> {
482         let (end, value) = self.0.captures(input.range());
483         take(end).parse_lazy(input).map(|_| value)
484     }
add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>)485     fn add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>) {
486         error.error.add(StreamError::expected_format(format_args!(
487             "/{}/",
488             self.0.as_str()
489         )))
490     }
491 }
492 
493 /// Matches `regex` on the input by running `captures_iter` on the input.
494 /// Returns all captures which is part of the match in a `F: FromIterator<Input::Range>`.
495 /// Consumes all input up until the end of the last match.
496 ///
497 /// ```
498 /// extern crate regex;
499 /// extern crate combine;
500 /// use regex::Regex;
501 /// use combine::Parser;
502 /// use combine::parser::regex::captures_many;
503 ///
504 /// fn main() {
505 ///     let mut fields = captures_many(Regex::new("([a-z]+):([0-9]+)").unwrap());
506 ///     assert_eq!(
507 ///         fields.parse("test:123 field:456 "),
508 ///         Ok((vec![vec!["test:123", "test", "123"],
509 ///                  vec!["field:456", "field", "456"]],
510 ///             " "
511 ///         ))
512 ///     );
513 ///     assert_eq!(
514 ///         fields.parse("test:123 :456 "),
515 ///         Ok((vec![vec!["test:123", "test", "123"]],
516 ///             " :456 "
517 ///         ))
518 ///     );
519 /// }
520 /// ```
captures_many<F, G, R, Input>(regex: R) -> CapturesMany<F, G, R, Input> where F: FromIterator<Input::Range>, G: FromIterator<F>, R: Regex<Input::Range>, Input: RangeStream, Input::Range: crate::stream::Range,521 pub fn captures_many<F, G, R, Input>(regex: R) -> CapturesMany<F, G, R, Input>
522 where
523     F: FromIterator<Input::Range>,
524     G: FromIterator<F>,
525     R: Regex<Input::Range>,
526     Input: RangeStream,
527     Input::Range: crate::stream::Range,
528 {
529     CapturesMany(regex, PhantomData)
530 }
531 
532 #[cfg(test)]
533 mod tests {
534 
535     use regex::Regex;
536 
537     use crate::{parser::regex::find, Parser};
538 
539     #[test]
test()540     fn test() {
541         let mut digits = find(Regex::new("^[0-9]+").unwrap());
542         assert_eq!(digits.parse("123 456 "), Ok(("123", " 456 ")));
543         assert!(digits.parse("abc 123 456 ").is_err());
544 
545         let mut digits2 = find(Regex::new("[0-9]+").unwrap());
546         assert_eq!(digits2.parse("123 456 "), Ok(("123", " 456 ")));
547         assert_eq!(digits2.parse("abc 123 456 "), Ok(("123", " 456 ")));
548     }
549 }
550