1 //! Module containing regex parsers on streams returning ranges of `&str` or `&[u8]`.
2 //!
3 //! All regex parsers are overloaded on `&str` and `&[u8]` ranges and can take a `Regex` by value
4 //! or shared reference (`&`).
5 //!
6 //! Enabled using the `regex` feature (for `regex-0.2`) or the `regex-1` feature for `regex-1.0`.
7 //!
8 //! ```
9 //! use once_cell::sync::Lazy;
10 //! use regex::{bytes, Regex};
11 //! use combine::Parser;
12 //! use combine::parser::regex::{find_many, match_};
13 //!
14 //! fn main() {
15 //! let regex = bytes::Regex::new("[0-9]+").unwrap();
16 //! // Shared references to any regex works as well
17 //! assert_eq!(
18 //! find_many(®ex).parse(&b"123 456 "[..]),
19 //! Ok((vec![&b"123"[..], &b"456"[..]], &b" "[..]))
20 //! );
21 //! assert_eq!(
22 //! find_many(regex).parse(&b""[..]),
23 //! Ok((vec![], &b""[..]))
24 //! );
25 //!
26 //! static REGEX: Lazy<Regex> = Lazy::new(|| Regex::new("[:alpha:]+").unwrap());
27 //! assert_eq!(
28 //! match_(&*REGEX).parse("abc123"),
29 //! Ok(("abc123", "abc123"))
30 //! );
31 //! }
32 //! ```
33
34 use std::{iter::FromIterator, marker::PhantomData};
35
36 use crate::{
37 error::{
38 ParseError,
39 ParseResult::{self, *},
40 StreamError, Tracked,
41 },
42 parser::range::take,
43 stream::{RangeStream, StreamOnce},
44 Parser,
45 };
46
47 struct First<T>(Option<T>);
48
49 impl<A> FromIterator<A> for First<A> {
from_iter<T>(iter: T) -> Self where T: IntoIterator<Item = A>,50 fn from_iter<T>(iter: T) -> Self
51 where
52 T: IntoIterator<Item = A>,
53 {
54 First(iter.into_iter().next())
55 }
56 }
57
58 pub trait MatchFind {
59 type Range;
end(&self) -> usize60 fn end(&self) -> usize;
as_match(&self) -> Self::Range61 fn as_match(&self) -> Self::Range;
62 }
63
64 pub trait Regex<Range> {
is_match(&self, range: Range) -> bool65 fn is_match(&self, range: Range) -> bool;
find_iter<F>(&self, range: Range) -> (usize, F) where F: FromIterator<Range>66 fn find_iter<F>(&self, range: Range) -> (usize, F)
67 where
68 F: FromIterator<Range>;
captures<F, G>(&self, range: Range) -> (usize, G) where F: FromIterator<Range>, G: FromIterator<F>69 fn captures<F, G>(&self, range: Range) -> (usize, G)
70 where
71 F: FromIterator<Range>,
72 G: FromIterator<F>;
as_str(&self) -> &str73 fn as_str(&self) -> &str;
74 }
75
76 impl<'a, R, Range> Regex<Range> for &'a R
77 where
78 R: Regex<Range>,
79 {
is_match(&self, range: Range) -> bool80 fn is_match(&self, range: Range) -> bool {
81 (**self).is_match(range)
82 }
find_iter<F>(&self, range: Range) -> (usize, F) where F: FromIterator<Range>,83 fn find_iter<F>(&self, range: Range) -> (usize, F)
84 where
85 F: FromIterator<Range>,
86 {
87 (**self).find_iter(range)
88 }
captures<F, G>(&self, range: Range) -> (usize, G) where F: FromIterator<Range>, G: FromIterator<F>,89 fn captures<F, G>(&self, range: Range) -> (usize, G)
90 where
91 F: FromIterator<Range>,
92 G: FromIterator<F>,
93 {
94 (**self).captures(range)
95 }
as_str(&self) -> &str96 fn as_str(&self) -> &str {
97 (**self).as_str()
98 }
99 }
100
find_iter<'a, Input, F>(iterable: Input) -> (usize, F) where Input: IntoIterator, Input::Item: MatchFind, F: FromIterator<<Input::Item as MatchFind>::Range>,101 fn find_iter<'a, Input, F>(iterable: Input) -> (usize, F)
102 where
103 Input: IntoIterator,
104 Input::Item: MatchFind,
105 F: FromIterator<<Input::Item as MatchFind>::Range>,
106 {
107 let mut end = 0;
108 let value = iterable
109 .into_iter()
110 .map(|m| {
111 end = m.end();
112 m.as_match()
113 })
114 .collect();
115 (end, value)
116 }
117
118 #[cfg(feature = "regex")]
119 mod regex {
120 pub extern crate regex;
121
122 use std::iter::FromIterator;
123
124 use super::{find_iter, MatchFind, Regex};
125
126 pub use self::regex::*;
127
128 impl<'t> MatchFind for regex::Match<'t> {
129 type Range = &'t str;
end(&self) -> usize130 fn end(&self) -> usize {
131 regex::Match::end(self)
132 }
as_match(&self) -> Self::Range133 fn as_match(&self) -> Self::Range {
134 self.as_str()
135 }
136 }
137
138 impl<'t> MatchFind for regex::bytes::Match<'t> {
139 type Range = &'t [u8];
end(&self) -> usize140 fn end(&self) -> usize {
141 regex::bytes::Match::end(self)
142 }
as_match(&self) -> Self::Range143 fn as_match(&self) -> Self::Range {
144 self.as_bytes()
145 }
146 }
147
148 impl<'a> Regex<&'a str> for regex::Regex {
is_match(&self, range: &'a str) -> bool149 fn is_match(&self, range: &'a str) -> bool {
150 regex::Regex::is_match(self, range)
151 }
find_iter<F>(&self, range: &'a str) -> (usize, F) where F: FromIterator<&'a str>,152 fn find_iter<F>(&self, range: &'a str) -> (usize, F)
153 where
154 F: FromIterator<&'a str>,
155 {
156 find_iter(regex::Regex::find_iter(self, range))
157 }
captures<F, G>(&self, range: &'a str) -> (usize, G) where F: FromIterator<&'a str>, G: FromIterator<F>,158 fn captures<F, G>(&self, range: &'a str) -> (usize, G)
159 where
160 F: FromIterator<&'a str>,
161 G: FromIterator<F>,
162 {
163 let mut end = 0;
164 let value = regex::Regex::captures_iter(self, range)
165 .map(|captures| {
166 let mut captures_iter = captures.iter();
167 // The first group is the match on the entire regex
168 let first_match = captures_iter.next().unwrap().unwrap();
169 end = first_match.end();
170 Some(Some(first_match))
171 .into_iter()
172 .chain(captures_iter)
173 .filter_map(|match_| match_.map(|m| m.as_match()))
174 .collect()
175 })
176 .collect();
177 (end, value)
178 }
as_str(&self) -> &str179 fn as_str(&self) -> &str {
180 regex::Regex::as_str(self)
181 }
182 }
183
184 impl<'a> Regex<&'a [u8]> for regex::bytes::Regex {
is_match(&self, range: &'a [u8]) -> bool185 fn is_match(&self, range: &'a [u8]) -> bool {
186 regex::bytes::Regex::is_match(self, range)
187 }
find_iter<F>(&self, range: &'a [u8]) -> (usize, F) where F: FromIterator<&'a [u8]>,188 fn find_iter<F>(&self, range: &'a [u8]) -> (usize, F)
189 where
190 F: FromIterator<&'a [u8]>,
191 {
192 find_iter(regex::bytes::Regex::find_iter(self, range))
193 }
captures<F, G>(&self, range: &'a [u8]) -> (usize, G) where F: FromIterator<&'a [u8]>, G: FromIterator<F>,194 fn captures<F, G>(&self, range: &'a [u8]) -> (usize, G)
195 where
196 F: FromIterator<&'a [u8]>,
197 G: FromIterator<F>,
198 {
199 let mut end = 0;
200 let value = regex::bytes::Regex::captures_iter(self, range)
201 .map(|captures| {
202 let mut captures_iter = captures.iter();
203 // The first group is the match on the entire regex
204 let first_match = captures_iter.next().unwrap().unwrap();
205 end = first_match.end();
206 Some(Some(first_match))
207 .into_iter()
208 .chain(captures_iter)
209 .filter_map(|match_| match_.map(|m| m.as_match()))
210 .collect()
211 })
212 .collect();
213 (end, value)
214 }
as_str(&self) -> &str215 fn as_str(&self) -> &str {
216 regex::bytes::Regex::as_str(self)
217 }
218 }
219 }
220
221 pub struct Match<R, Input>(R, PhantomData<Input>);
222
223 impl<'a, Input, R> Parser<Input> for Match<R, Input>
224 where
225 R: Regex<Input::Range>,
226 Input: RangeStream,
227 {
228 type Output = Input::Range;
229 type PartialState = ();
230
231 #[inline]
parse_lazy( &mut self, input: &mut Input, ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error>232 fn parse_lazy(
233 &mut self,
234 input: &mut Input,
235 ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error> {
236 if self.0.is_match(input.range()) {
237 PeekOk(input.range())
238 } else {
239 PeekErr(Input::Error::empty(input.position()).into())
240 }
241 }
add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>)242 fn add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>) {
243 error.error.add(StreamError::expected_format(format_args!(
244 "/{}/",
245 self.0.as_str()
246 )))
247 }
248 }
249
250 /// Matches `regex` on the input returning the entire input if it matches.
251 /// Never consumes any input.
252 ///
253 /// ```
254 /// extern crate regex;
255 /// extern crate combine;
256 /// use regex::Regex;
257 /// use combine::Parser;
258 /// use combine::parser::regex::match_;
259 ///
260 /// fn main() {
261 /// let regex = Regex::new("[:alpha:]+").unwrap();
262 /// assert_eq!(
263 /// match_(®ex).parse("abc123"),
264 /// Ok(("abc123", "abc123"))
265 /// );
266 /// }
267 /// ```
match_<R, Input>(regex: R) -> Match<R, Input> where R: Regex<Input::Range>, Input: RangeStream,268 pub fn match_<R, Input>(regex: R) -> Match<R, Input>
269 where
270 R: Regex<Input::Range>,
271 Input: RangeStream,
272 {
273 Match(regex, PhantomData)
274 }
275
276 #[derive(Clone)]
277 pub struct Find<R, Input>(R, PhantomData<fn() -> Input>);
278
279 impl<'a, Input, R> Parser<Input> for Find<R, Input>
280 where
281 R: Regex<Input::Range>,
282 Input: RangeStream,
283 Input::Range: crate::stream::Range,
284 {
285 type Output = Input::Range;
286 type PartialState = ();
287
288 #[inline]
parse_lazy( &mut self, input: &mut Input, ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error>289 fn parse_lazy(
290 &mut self,
291 input: &mut Input,
292 ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error> {
293 let (end, First(value)) = self.0.find_iter(input.range());
294 match value {
295 Some(value) => take(end).parse_lazy(input).map(|_| value),
296 None => PeekErr(Input::Error::empty(input.position()).into()),
297 }
298 }
add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>)299 fn add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>) {
300 error.error.add(StreamError::expected_format(format_args!(
301 "/{}/",
302 self.0.as_str()
303 )))
304 }
305 }
306
307 /// Matches `regex` on the input by running `find` on the input and returns the first match.
308 /// Consumes all input up until the end of the first match.
309 ///
310 /// ```
311 /// extern crate regex;
312 /// extern crate combine;
313 /// use regex::Regex;
314 /// use combine::Parser;
315 /// use combine::parser::regex::find;
316 ///
317 /// fn main() {
318 /// let mut digits = find(Regex::new("^[0-9]+").unwrap());
319 /// assert_eq!(digits.parse("123 456 "), Ok(("123", " 456 ")));
320 /// assert!(
321 /// digits.parse("abc 123 456 ").is_err());
322 ///
323 /// let mut digits2 = find(Regex::new("[0-9]+").unwrap());
324 /// assert_eq!(digits2.parse("123 456 "), Ok(("123", " 456 ")));
325 /// assert_eq!(digits2.parse("abc 123 456 "), Ok(("123", " 456 ")));
326 /// }
327 /// ```
find<R, Input>(regex: R) -> Find<R, Input> where R: Regex<Input::Range>, Input: RangeStream, Input::Range: crate::stream::Range,328 pub fn find<R, Input>(regex: R) -> Find<R, Input>
329 where
330 R: Regex<Input::Range>,
331 Input: RangeStream,
332 Input::Range: crate::stream::Range,
333 {
334 Find(regex, PhantomData)
335 }
336
337 #[derive(Clone)]
338 pub struct FindMany<F, R, Input>(R, PhantomData<fn() -> (Input, F)>);
339
340 impl<'a, Input, F, R> Parser<Input> for FindMany<F, R, Input>
341 where
342 F: FromIterator<Input::Range>,
343 R: Regex<Input::Range>,
344 Input: RangeStream,
345 Input::Range: crate::stream::Range,
346 {
347 type Output = F;
348 type PartialState = ();
349
350 #[inline]
parse_lazy( &mut self, input: &mut Input, ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error>351 fn parse_lazy(
352 &mut self,
353 input: &mut Input,
354 ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error> {
355 let (end, value) = self.0.find_iter(input.range());
356 take(end).parse_lazy(input).map(|_| value)
357 }
add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>)358 fn add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>) {
359 error.error.add(StreamError::expected_format(format_args!(
360 "/{}/",
361 self.0.as_str()
362 )))
363 }
364 }
365
366 /// Matches `regex` on the input by running `find_iter` on the input.
367 /// Returns all matches in a `F: FromIterator<Input::Range>`.
368 /// Consumes all input up until the end of the last match.
369 ///
370 /// ```
371 /// extern crate regex;
372 /// extern crate combine;
373 /// use regex::Regex;
374 /// use regex::bytes;
375 /// use combine::Parser;
376 /// use combine::parser::regex::find_many;
377 ///
378 /// fn main() {
379 /// let mut digits = find_many(Regex::new("[0-9]+").unwrap());
380 /// assert_eq!(digits.parse("123 456 "), Ok((vec!["123", "456"], " ")));
381 /// assert_eq!(digits.parse("abc 123 456 "), Ok((vec!["123", "456"], " ")));
382 /// assert_eq!(digits.parse("abc"), Ok((vec![], "abc")));
383 /// }
384 /// ```
find_many<F, R, Input>(regex: R) -> FindMany<F, R, Input> where F: FromIterator<Input::Range>, R: Regex<Input::Range>, Input: RangeStream, Input::Range: crate::stream::Range,385 pub fn find_many<F, R, Input>(regex: R) -> FindMany<F, R, Input>
386 where
387 F: FromIterator<Input::Range>,
388 R: Regex<Input::Range>,
389 Input: RangeStream,
390 Input::Range: crate::stream::Range,
391 {
392 FindMany(regex, PhantomData)
393 }
394
395 #[derive(Clone)]
396 pub struct Captures<F, R, Input>(R, PhantomData<fn() -> (Input, F)>);
397
398 impl<'a, Input, F, R> Parser<Input> for Captures<F, R, Input>
399 where
400 F: FromIterator<Input::Range>,
401 R: Regex<Input::Range>,
402 Input: RangeStream,
403 Input::Range: crate::stream::Range,
404 {
405 type Output = F;
406 type PartialState = ();
407
408 #[inline]
parse_lazy( &mut self, input: &mut Input, ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error>409 fn parse_lazy(
410 &mut self,
411 input: &mut Input,
412 ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error> {
413 let (end, First(value)) = self.0.captures(input.range());
414 match value {
415 Some(value) => take(end).parse_lazy(input).map(|_| value),
416 None => PeekErr(Input::Error::empty(input.position()).into()),
417 }
418 }
add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>)419 fn add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>) {
420 error.error.add(StreamError::expected_format(format_args!(
421 "/{}/",
422 self.0.as_str()
423 )))
424 }
425 }
426
427 /// Matches `regex` on the input by running `captures_iter` on the input.
428 /// Returns the captures of the first match and consumes the input up until the end of that match.
429 ///
430 /// ```
431 /// extern crate regex;
432 /// extern crate combine;
433 /// use regex::Regex;
434 /// use combine::Parser;
435 /// use combine::parser::regex::captures;
436 ///
437 /// fn main() {
438 /// let mut fields = captures(Regex::new("([a-z]+):([0-9]+)").unwrap());
439 /// assert_eq!(
440 /// fields.parse("test:123 field:456 "),
441 /// Ok((vec!["test:123", "test", "123"],
442 /// " field:456 "
443 /// ))
444 /// );
445 /// assert_eq!(
446 /// fields.parse("test:123 :456 "),
447 /// Ok((vec!["test:123", "test", "123"],
448 /// " :456 "
449 /// ))
450 /// );
451 /// }
452 /// ```
captures<F, R, Input>(regex: R) -> Captures<F, R, Input> where F: FromIterator<Input::Range>, R: Regex<Input::Range>, Input: RangeStream, Input::Range: crate::stream::Range,453 pub fn captures<F, R, Input>(regex: R) -> Captures<F, R, Input>
454 where
455 F: FromIterator<Input::Range>,
456 R: Regex<Input::Range>,
457 Input: RangeStream,
458 Input::Range: crate::stream::Range,
459 {
460 Captures(regex, PhantomData)
461 }
462
463 #[derive(Clone)]
464 pub struct CapturesMany<F, G, R, Input>(R, PhantomData<fn() -> (Input, F, G)>);
465
466 impl<'a, Input, F, G, R> Parser<Input> for CapturesMany<F, G, R, Input>
467 where
468 F: FromIterator<Input::Range>,
469 G: FromIterator<F>,
470 R: Regex<Input::Range>,
471 Input: RangeStream,
472 Input::Range: crate::stream::Range,
473 {
474 type Output = G;
475 type PartialState = ();
476
477 #[inline]
parse_lazy( &mut self, input: &mut Input, ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error>478 fn parse_lazy(
479 &mut self,
480 input: &mut Input,
481 ) -> ParseResult<Self::Output, <Input as StreamOnce>::Error> {
482 let (end, value) = self.0.captures(input.range());
483 take(end).parse_lazy(input).map(|_| value)
484 }
add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>)485 fn add_error(&mut self, error: &mut Tracked<<Input as StreamOnce>::Error>) {
486 error.error.add(StreamError::expected_format(format_args!(
487 "/{}/",
488 self.0.as_str()
489 )))
490 }
491 }
492
493 /// Matches `regex` on the input by running `captures_iter` on the input.
494 /// Returns all captures which is part of the match in a `F: FromIterator<Input::Range>`.
495 /// Consumes all input up until the end of the last match.
496 ///
497 /// ```
498 /// extern crate regex;
499 /// extern crate combine;
500 /// use regex::Regex;
501 /// use combine::Parser;
502 /// use combine::parser::regex::captures_many;
503 ///
504 /// fn main() {
505 /// let mut fields = captures_many(Regex::new("([a-z]+):([0-9]+)").unwrap());
506 /// assert_eq!(
507 /// fields.parse("test:123 field:456 "),
508 /// Ok((vec![vec!["test:123", "test", "123"],
509 /// vec!["field:456", "field", "456"]],
510 /// " "
511 /// ))
512 /// );
513 /// assert_eq!(
514 /// fields.parse("test:123 :456 "),
515 /// Ok((vec![vec!["test:123", "test", "123"]],
516 /// " :456 "
517 /// ))
518 /// );
519 /// }
520 /// ```
captures_many<F, G, R, Input>(regex: R) -> CapturesMany<F, G, R, Input> where F: FromIterator<Input::Range>, G: FromIterator<F>, R: Regex<Input::Range>, Input: RangeStream, Input::Range: crate::stream::Range,521 pub fn captures_many<F, G, R, Input>(regex: R) -> CapturesMany<F, G, R, Input>
522 where
523 F: FromIterator<Input::Range>,
524 G: FromIterator<F>,
525 R: Regex<Input::Range>,
526 Input: RangeStream,
527 Input::Range: crate::stream::Range,
528 {
529 CapturesMany(regex, PhantomData)
530 }
531
532 #[cfg(test)]
533 mod tests {
534
535 use regex::Regex;
536
537 use crate::{parser::regex::find, Parser};
538
539 #[test]
test()540 fn test() {
541 let mut digits = find(Regex::new("^[0-9]+").unwrap());
542 assert_eq!(digits.parse("123 456 "), Ok(("123", " 456 ")));
543 assert!(digits.parse("abc 123 456 ").is_err());
544
545 let mut digits2 = find(Regex::new("[0-9]+").unwrap());
546 assert_eq!(digits2.parse("123 456 "), Ok(("123", " 456 ")));
547 assert_eq!(digits2.parse("abc 123 456 "), Ok(("123", " 456 ")));
548 }
549 }
550