1 // pest. The Elegant Parser
2 // Copyright (c) 2018 DragoČ™ Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9 
10 use core::fmt;
11 use core::hash::{Hash, Hasher};
12 use core::ops::{Bound, RangeBounds};
13 use core::ptr;
14 use core::str;
15 
16 use crate::position;
17 
18 /// A span over a `&str`. It is created from either [two `Position`s] or from a [`Pair`].
19 ///
20 /// [two `Position`s]: struct.Position.html#method.span
21 /// [`Pair`]: ../iterators/struct.Pair.html#method.span
22 #[derive(Clone, Copy)]
23 pub struct Span<'i> {
24     input: &'i str,
25     /// # Safety
26     ///
27     /// Must be a valid character boundary index into `input`.
28     start: usize,
29     /// # Safety
30     ///
31     /// Must be a valid character boundary index into `input`.
32     end: usize,
33 }
34 
35 impl<'i> Span<'i> {
36     /// Create a new `Span` without checking invariants. (Checked with `debug_assertions`.)
37     ///
38     /// # Safety
39     ///
40     /// `input[start..end]` must be a valid subslice; that is, said indexing should not panic.
new_unchecked(input: &str, start: usize, end: usize) -> Span<'_>41     pub(crate) unsafe fn new_unchecked(input: &str, start: usize, end: usize) -> Span<'_> {
42         debug_assert!(input.get(start..end).is_some());
43         Span { input, start, end }
44     }
45 
46     /// Attempts to create a new span. Will return `None` if `input[start..end]` is an invalid index
47     /// into `input`.
48     ///
49     /// # Examples
50     ///
51     /// ```
52     /// # use pest::Span;
53     /// let input = "Hello!";
54     /// assert_eq!(None, Span::new(input, 100, 0));
55     /// assert!(Span::new(input, 0, input.len()).is_some());
56     /// ```
new(input: &str, start: usize, end: usize) -> Option<Span<'_>>57     pub fn new(input: &str, start: usize, end: usize) -> Option<Span<'_>> {
58         if input.get(start..end).is_some() {
59             Some(Span { input, start, end })
60         } else {
61             None
62         }
63     }
64 
65     /// Attempts to create a new span based on a sub-range.
66     ///
67     /// ```
68     /// use pest::Span;
69     /// let input = "Hello World!";
70     /// let world = Span::new(input, 6, input.len()).unwrap();
71     /// let orl = world.get(1..=3);
72     /// assert!(orl.is_some());
73     /// assert_eq!(orl.unwrap().as_str(), "orl");
74     /// ```
75     ///
76     /// # Examples
get(&self, range: impl RangeBounds<usize>) -> Option<Span<'i>>77     pub fn get(&self, range: impl RangeBounds<usize>) -> Option<Span<'i>> {
78         let start = match range.start_bound() {
79             Bound::Included(offset) => *offset,
80             Bound::Excluded(offset) => *offset + 1,
81             Bound::Unbounded => 0,
82         };
83         let end = match range.end_bound() {
84             Bound::Included(offset) => *offset + 1,
85             Bound::Excluded(offset) => *offset,
86             Bound::Unbounded => self.as_str().len(),
87         };
88 
89         self.as_str().get(start..end).map(|_| Span {
90             input: self.input,
91             start: self.start + start,
92             end: self.start + end,
93         })
94     }
95 
96     /// Returns the `Span`'s start byte position as a `usize`.
97     ///
98     /// # Examples
99     ///
100     /// ```
101     /// # use pest::Position;
102     /// let input = "ab";
103     /// let start = Position::from_start(input);
104     /// let end = start.clone();
105     /// let span = start.span(&end);
106     ///
107     /// assert_eq!(span.start(), 0);
108     /// ```
109     #[inline]
start(&self) -> usize110     pub fn start(&self) -> usize {
111         self.start
112     }
113 
114     /// Returns the `Span`'s end byte position as a `usize`.
115     ///
116     /// # Examples
117     ///
118     /// ```
119     /// # use pest::Position;
120     /// let input = "ab";
121     /// let start = Position::from_start(input);
122     /// let end = start.clone();
123     /// let span = start.span(&end);
124     ///
125     /// assert_eq!(span.end(), 0);
126     /// ```
127     #[inline]
end(&self) -> usize128     pub fn end(&self) -> usize {
129         self.end
130     }
131 
132     /// Returns the `Span`'s start `Position`.
133     ///
134     /// # Examples
135     ///
136     /// ```
137     /// # use pest::Position;
138     /// let input = "ab";
139     /// let start = Position::from_start(input);
140     /// let end = start.clone();
141     /// let span = start.clone().span(&end);
142     ///
143     /// assert_eq!(span.start_pos(), start);
144     /// ```
145     #[inline]
start_pos(&self) -> position::Position<'i>146     pub fn start_pos(&self) -> position::Position<'i> {
147         // Span's start position is always a UTF-8 border.
148         unsafe { position::Position::new_unchecked(self.input, self.start) }
149     }
150 
151     /// Returns the `Span`'s end `Position`.
152     ///
153     /// # Examples
154     ///
155     /// ```
156     /// # use pest::Position;
157     /// let input = "ab";
158     /// let start = Position::from_start(input);
159     /// let end = start.clone();
160     /// let span = start.span(&end);
161     ///
162     /// assert_eq!(span.end_pos(), end);
163     /// ```
164     #[inline]
end_pos(&self) -> position::Position<'i>165     pub fn end_pos(&self) -> position::Position<'i> {
166         // Span's end position is always a UTF-8 border.
167         unsafe { position::Position::new_unchecked(self.input, self.end) }
168     }
169 
170     /// Splits the `Span` into a pair of `Position`s.
171     ///
172     /// # Examples
173     ///
174     /// ```
175     /// # use pest::Position;
176     /// let input = "ab";
177     /// let start = Position::from_start(input);
178     /// let end = start.clone();
179     /// let span = start.clone().span(&end);
180     ///
181     /// assert_eq!(span.split(), (start, end));
182     /// ```
183     #[inline]
split(self) -> (position::Position<'i>, position::Position<'i>)184     pub fn split(self) -> (position::Position<'i>, position::Position<'i>) {
185         // Span's start and end positions are always a UTF-8 borders.
186         let pos1 = unsafe { position::Position::new_unchecked(self.input, self.start) };
187         let pos2 = unsafe { position::Position::new_unchecked(self.input, self.end) };
188 
189         (pos1, pos2)
190     }
191 
192     /// Captures a slice from the `&str` defined by the `Span`.
193     ///
194     /// # Examples
195     ///
196     /// ```
197     /// # use pest;
198     /// # #[allow(non_camel_case_types)]
199     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
200     /// enum Rule {}
201     ///
202     /// let input = "abc";
203     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(1).unwrap();
204     /// let start_pos = state.position().clone();
205     /// state = state.match_string("b").unwrap();
206     /// let span = start_pos.span(&state.position().clone());
207     /// assert_eq!(span.as_str(), "b");
208     /// ```
209     #[inline]
as_str(&self) -> &'i str210     pub fn as_str(&self) -> &'i str {
211         // Span's start and end positions are always a UTF-8 borders.
212         &self.input[self.start..self.end]
213     }
214 
215     /// Returns the input string of the `Span`.
216     ///
217     /// This function returns the input string of the `Span` as a `&str`. This is the source string
218     /// from which the `Span` was created. The returned `&str` can be used to examine the contents of
219     /// the `Span` or to perform further processing on the string.
220     ///
221     /// # Examples
222     ///
223     /// ```
224     /// # use pest;
225     /// # use pest::Span;
226     ///
227     /// // Example: Get input string from a span
228     /// let input = "abc\ndef\nghi";
229     /// let span = Span::new(input, 1, 7).unwrap();
230     /// assert_eq!(span.get_input(), input);
231     /// ```
get_input(&self) -> &'i str232     pub fn get_input(&self) -> &'i str {
233         self.input
234     }
235 
236     /// Iterates over all lines (partially) covered by this span. Yielding a `&str` for each line.
237     ///
238     /// # Examples
239     ///
240     /// ```
241     /// # use pest;
242     /// # #[allow(non_camel_case_types)]
243     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
244     /// enum Rule {}
245     ///
246     /// let input = "a\nb\nc";
247     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap();
248     /// let start_pos = state.position().clone();
249     /// state = state.match_string("b\nc").unwrap();
250     /// let span = start_pos.span(&state.position().clone());
251     /// assert_eq!(span.lines().collect::<Vec<_>>(), vec!["b\n", "c"]);
252     /// ```
253     #[inline]
lines(&self) -> Lines<'_>254     pub fn lines(&self) -> Lines<'_> {
255         Lines {
256             inner: self.lines_span(),
257         }
258     }
259 
260     /// Iterates over all lines (partially) covered by this span. Yielding a `Span` for each line.
261     ///
262     /// # Examples
263     ///
264     /// ```
265     /// # use pest;
266     /// # use pest::Span;
267     /// # #[allow(non_camel_case_types)]
268     /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
269     /// enum Rule {}
270     ///
271     /// let input = "a\nb\nc";
272     /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap();
273     /// let start_pos = state.position().clone();
274     /// state = state.match_string("b\nc").unwrap();
275     /// let span = start_pos.span(&state.position().clone());
276     /// assert_eq!(span.lines_span().collect::<Vec<_>>(), vec![Span::new(input, 2, 4).unwrap(), Span::new(input, 4, 5).unwrap()]);
277     /// ```
lines_span(&self) -> LinesSpan<'_>278     pub fn lines_span(&self) -> LinesSpan<'_> {
279         LinesSpan {
280             span: self,
281             pos: self.start,
282         }
283     }
284 }
285 
286 impl<'i> fmt::Debug for Span<'i> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result287     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
288         f.debug_struct("Span")
289             .field("str", &self.as_str())
290             .field("start", &self.start)
291             .field("end", &self.end)
292             .finish()
293     }
294 }
295 
296 impl<'i> PartialEq for Span<'i> {
eq(&self, other: &Span<'i>) -> bool297     fn eq(&self, other: &Span<'i>) -> bool {
298         ptr::eq(self.input, other.input) && self.start == other.start && self.end == other.end
299     }
300 }
301 
302 impl<'i> Eq for Span<'i> {}
303 
304 impl<'i> Hash for Span<'i> {
hash<H: Hasher>(&self, state: &mut H)305     fn hash<H: Hasher>(&self, state: &mut H) {
306         (self.input as *const str).hash(state);
307         self.start.hash(state);
308         self.end.hash(state);
309     }
310 }
311 
312 /// Merges two spans into one.
313 ///
314 /// This function merges two spans that are contiguous or overlapping into a single span
315 /// that covers the entire range of the two input spans. This is useful when you want to
316 /// aggregate information from multiple spans into a single entity.
317 ///
318 /// The function checks if the input spans are overlapping or contiguous by comparing their
319 /// start and end positions. If they are, a new span is created with the minimum start position
320 /// and the maximum end position of the two input spans.
321 ///
322 /// If the input spans are neither overlapping nor contiguous, the function returns None,
323 /// indicating that a merge operation was not possible.
324 ///
325 /// # Examples
326 ///
327 /// ```
328 /// # use pest;
329 /// # use pest::Span;
330 /// # use pest::merge_spans;
331 ///
332 /// // Example 1: Contiguous spans
333 /// let input = "abc\ndef\nghi";
334 /// let span1 = Span::new(input, 1, 7).unwrap();
335 /// let span2 = Span::new(input, 7, 11).unwrap();
336 /// let merged = merge_spans(&span1, &span2).unwrap();
337 /// assert_eq!(merged, Span::new(input, 1, 11).unwrap());
338 ///
339 /// // Example 2: Overlapping spans
340 /// let input = "abc\ndef\nghi";
341 /// let span1 = Span::new(input, 1, 7).unwrap();
342 /// let span2 = Span::new(input, 5, 11).unwrap();
343 /// let merged = merge_spans(&span1, &span2).unwrap();
344 /// assert_eq!(merged, Span::new(input, 1, 11).unwrap());
345 ///
346 /// // Example 3: Non-contiguous spans
347 /// let input = "abc\ndef\nghi";
348 /// let span1 = Span::new(input, 1, 7).unwrap();
349 /// let span2 = Span::new(input, 8, 11).unwrap();
350 /// let merged = merge_spans(&span1, &span2);
351 /// assert!(merged.is_none());
352 /// ```
merge_spans<'i>(a: &Span<'i>, b: &Span<'i>) -> Option<Span<'i>>353 pub fn merge_spans<'i>(a: &Span<'i>, b: &Span<'i>) -> Option<Span<'i>> {
354     if a.end() >= b.start() && a.start() <= b.end() {
355         // The spans overlap or are contiguous, so they can be merged.
356         Span::new(
357             a.get_input(),
358             core::cmp::min(a.start(), b.start()),
359             core::cmp::max(a.end(), b.end()),
360         )
361     } else {
362         // The spans don't overlap and aren't contiguous, so they can't be merged.
363         None
364     }
365 }
366 
367 /// Line iterator for Spans, created by [`Span::lines_span()`].
368 ///
369 /// Iterates all lines that are at least _partially_ covered by the span. Yielding a `Span` for each.
370 ///
371 /// [`Span::lines_span()`]: struct.Span.html#method.lines_span
372 pub struct LinesSpan<'i> {
373     span: &'i Span<'i>,
374     pos: usize,
375 }
376 
377 impl<'i> Iterator for LinesSpan<'i> {
378     type Item = Span<'i>;
next(&mut self) -> Option<Self::Item>379     fn next(&mut self) -> Option<Self::Item> {
380         if self.pos > self.span.end {
381             return None;
382         }
383         let pos = position::Position::new(self.span.input, self.pos)?;
384         if pos.at_end() {
385             return None;
386         }
387 
388         let line_start = pos.find_line_start();
389         self.pos = pos.find_line_end();
390 
391         Span::new(self.span.input, line_start, self.pos)
392     }
393 }
394 
395 /// Line iterator for Spans, created by [`Span::lines()`].
396 ///
397 /// Iterates all lines that are at least _partially_ covered by the span. Yielding a `&str` for each.
398 ///
399 /// [`Span::lines()`]: struct.Span.html#method.lines
400 pub struct Lines<'i> {
401     inner: LinesSpan<'i>,
402 }
403 
404 impl<'i> Iterator for Lines<'i> {
405     type Item = &'i str;
next(&mut self) -> Option<Self::Item>406     fn next(&mut self) -> Option<Self::Item> {
407         self.inner.next().map(|span| span.as_str())
408     }
409 }
410 
411 #[cfg(test)]
412 mod tests {
413     use super::*;
414     use alloc::borrow::ToOwned;
415     use alloc::vec::Vec;
416 
417     #[test]
get()418     fn get() {
419         let input = "abc123abc";
420         let span = Span::new(input, 3, input.len()).unwrap();
421         assert_eq!(span.as_str(), "123abc");
422         assert_eq!(span.input, input);
423 
424         let span1 = span.get(..=2);
425         assert!(span1.is_some());
426         assert_eq!(span1.unwrap().input, input);
427         assert_eq!(span1.unwrap().as_str(), "123");
428 
429         let span2 = span.get(..);
430         assert!(span2.is_some());
431         assert_eq!(span2.unwrap().input, input);
432         assert_eq!(span2.unwrap().as_str(), "123abc");
433 
434         let span3 = span.get(3..);
435         assert!(span3.is_some());
436         assert_eq!(span3.unwrap().input, input);
437         assert_eq!(span3.unwrap().as_str(), "abc");
438 
439         let span4 = span.get(0..0);
440         assert!(span4.is_some());
441         assert_eq!(span4.unwrap().input, input);
442         assert_eq!(span4.unwrap().as_str(), "");
443     }
444 
445     #[test]
get_fails()446     fn get_fails() {
447         let input = "abc";
448         let span = Span::new(input, 0, input.len()).unwrap();
449 
450         let span1 = span.get(0..100);
451         assert!(span1.is_none());
452 
453         let span2 = span.get(100..200);
454         assert!(span2.is_none());
455     }
456 
457     #[test]
span_comp()458     fn span_comp() {
459         let input = "abc\ndef\nghi";
460         let span = Span::new(input, 1, 7).unwrap();
461         let span2 = Span::new(input, 50, 51);
462         assert!(span2.is_none());
463         let span3 = Span::new(input, 0, 8).unwrap();
464         assert!(span != span3);
465     }
466 
467     #[test]
split()468     fn split() {
469         let input = "a";
470         let start = position::Position::from_start(input);
471         let mut end = start;
472 
473         assert!(end.skip(1));
474 
475         let span = start.clone().span(&end.clone());
476 
477         assert_eq!(span.split(), (start, end));
478     }
479 
480     #[test]
lines_mid()481     fn lines_mid() {
482         let input = "abc\ndef\nghi";
483         let span = Span::new(input, 1, 7).unwrap();
484         let lines: Vec<_> = span.lines().collect();
485         let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect();
486 
487         assert_eq!(lines.len(), 2);
488         assert_eq!(lines[0], "abc\n".to_owned());
489         assert_eq!(lines[1], "def\n".to_owned());
490         assert_eq!(lines, lines_span) // Verify parity with lines_span()
491     }
492 
493     #[test]
lines_eof()494     fn lines_eof() {
495         let input = "abc\ndef\nghi";
496         let span = Span::new(input, 5, 11).unwrap();
497         assert!(span.end_pos().at_end());
498         assert_eq!(span.end(), 11);
499         let lines: Vec<_> = span.lines().collect();
500         let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect();
501 
502         assert_eq!(lines.len(), 2);
503         assert_eq!(lines[0], "def\n".to_owned());
504         assert_eq!(lines[1], "ghi".to_owned());
505         assert_eq!(lines, lines_span) // Verify parity with lines_span()
506     }
507 
508     #[test]
lines_span()509     fn lines_span() {
510         let input = "abc\ndef\nghi";
511         let span = Span::new(input, 1, 7).unwrap();
512         let lines_span: Vec<_> = span.lines_span().collect();
513         let lines: Vec<_> = span.lines().collect();
514 
515         assert_eq!(lines_span.len(), 2);
516         assert_eq!(lines_span[0], Span::new(input, 0, 4).unwrap());
517         assert_eq!(lines_span[1], Span::new(input, 4, 8).unwrap());
518         assert_eq!(
519             lines_span
520                 .iter()
521                 .map(|span| span.as_str())
522                 .collect::<Vec<_>>(),
523             lines
524         );
525     }
526 
527     #[test]
get_input_of_span()528     fn get_input_of_span() {
529         let input = "abc\ndef\nghi";
530         let span = Span::new(input, 1, 7).unwrap();
531 
532         assert_eq!(span.get_input(), input);
533     }
534 
535     #[test]
merge_contiguous()536     fn merge_contiguous() {
537         let input = "abc\ndef\nghi";
538         let span1 = Span::new(input, 1, 7).unwrap();
539         let span2 = Span::new(input, 7, 11).unwrap();
540         let merged = merge_spans(&span1, &span2).unwrap();
541 
542         assert_eq!(merged, Span::new(input, 1, 11).unwrap());
543     }
544 
545     #[test]
merge_overlapping()546     fn merge_overlapping() {
547         let input = "abc\ndef\nghi";
548         let span1 = Span::new(input, 1, 7).unwrap();
549         let span2 = Span::new(input, 5, 11).unwrap();
550         let merged = merge_spans(&span1, &span2).unwrap();
551 
552         assert_eq!(merged, Span::new(input, 1, 11).unwrap());
553     }
554 
555     #[test]
merge_non_contiguous()556     fn merge_non_contiguous() {
557         let input = "abc\ndef\nghi";
558         let span1 = Span::new(input, 1, 7).unwrap();
559         let span2 = Span::new(input, 8, 11).unwrap();
560         let merged = merge_spans(&span1, &span2);
561 
562         assert!(merged.is_none());
563     }
564 }
565