1 // pest. The Elegant Parser
2 // Copyright (c) 2018 DragoČ™ Tiselice
3 //
4 // Licensed under the Apache License, Version 2.0
5 // <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6 // license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7 // option. All files in the project carrying such notice may not be copied,
8 // modified, or distributed except according to those terms.
9
10 use core::fmt;
11 use core::hash::{Hash, Hasher};
12 use core::ops::{Bound, RangeBounds};
13 use core::ptr;
14 use core::str;
15
16 use crate::position;
17
18 /// A span over a `&str`. It is created from either [two `Position`s] or from a [`Pair`].
19 ///
20 /// [two `Position`s]: struct.Position.html#method.span
21 /// [`Pair`]: ../iterators/struct.Pair.html#method.span
22 #[derive(Clone, Copy)]
23 pub struct Span<'i> {
24 input: &'i str,
25 /// # Safety
26 ///
27 /// Must be a valid character boundary index into `input`.
28 start: usize,
29 /// # Safety
30 ///
31 /// Must be a valid character boundary index into `input`.
32 end: usize,
33 }
34
35 impl<'i> Span<'i> {
36 /// Create a new `Span` without checking invariants. (Checked with `debug_assertions`.)
37 ///
38 /// # Safety
39 ///
40 /// `input[start..end]` must be a valid subslice; that is, said indexing should not panic.
new_unchecked(input: &str, start: usize, end: usize) -> Span<'_>41 pub(crate) unsafe fn new_unchecked(input: &str, start: usize, end: usize) -> Span<'_> {
42 debug_assert!(input.get(start..end).is_some());
43 Span { input, start, end }
44 }
45
46 /// Attempts to create a new span. Will return `None` if `input[start..end]` is an invalid index
47 /// into `input`.
48 ///
49 /// # Examples
50 ///
51 /// ```
52 /// # use pest::Span;
53 /// let input = "Hello!";
54 /// assert_eq!(None, Span::new(input, 100, 0));
55 /// assert!(Span::new(input, 0, input.len()).is_some());
56 /// ```
new(input: &str, start: usize, end: usize) -> Option<Span<'_>>57 pub fn new(input: &str, start: usize, end: usize) -> Option<Span<'_>> {
58 if input.get(start..end).is_some() {
59 Some(Span { input, start, end })
60 } else {
61 None
62 }
63 }
64
65 /// Attempts to create a new span based on a sub-range.
66 ///
67 /// ```
68 /// use pest::Span;
69 /// let input = "Hello World!";
70 /// let world = Span::new(input, 6, input.len()).unwrap();
71 /// let orl = world.get(1..=3);
72 /// assert!(orl.is_some());
73 /// assert_eq!(orl.unwrap().as_str(), "orl");
74 /// ```
75 ///
76 /// # Examples
get(&self, range: impl RangeBounds<usize>) -> Option<Span<'i>>77 pub fn get(&self, range: impl RangeBounds<usize>) -> Option<Span<'i>> {
78 let start = match range.start_bound() {
79 Bound::Included(offset) => *offset,
80 Bound::Excluded(offset) => *offset + 1,
81 Bound::Unbounded => 0,
82 };
83 let end = match range.end_bound() {
84 Bound::Included(offset) => *offset + 1,
85 Bound::Excluded(offset) => *offset,
86 Bound::Unbounded => self.as_str().len(),
87 };
88
89 self.as_str().get(start..end).map(|_| Span {
90 input: self.input,
91 start: self.start + start,
92 end: self.start + end,
93 })
94 }
95
96 /// Returns the `Span`'s start byte position as a `usize`.
97 ///
98 /// # Examples
99 ///
100 /// ```
101 /// # use pest::Position;
102 /// let input = "ab";
103 /// let start = Position::from_start(input);
104 /// let end = start.clone();
105 /// let span = start.span(&end);
106 ///
107 /// assert_eq!(span.start(), 0);
108 /// ```
109 #[inline]
start(&self) -> usize110 pub fn start(&self) -> usize {
111 self.start
112 }
113
114 /// Returns the `Span`'s end byte position as a `usize`.
115 ///
116 /// # Examples
117 ///
118 /// ```
119 /// # use pest::Position;
120 /// let input = "ab";
121 /// let start = Position::from_start(input);
122 /// let end = start.clone();
123 /// let span = start.span(&end);
124 ///
125 /// assert_eq!(span.end(), 0);
126 /// ```
127 #[inline]
end(&self) -> usize128 pub fn end(&self) -> usize {
129 self.end
130 }
131
132 /// Returns the `Span`'s start `Position`.
133 ///
134 /// # Examples
135 ///
136 /// ```
137 /// # use pest::Position;
138 /// let input = "ab";
139 /// let start = Position::from_start(input);
140 /// let end = start.clone();
141 /// let span = start.clone().span(&end);
142 ///
143 /// assert_eq!(span.start_pos(), start);
144 /// ```
145 #[inline]
start_pos(&self) -> position::Position<'i>146 pub fn start_pos(&self) -> position::Position<'i> {
147 // Span's start position is always a UTF-8 border.
148 unsafe { position::Position::new_unchecked(self.input, self.start) }
149 }
150
151 /// Returns the `Span`'s end `Position`.
152 ///
153 /// # Examples
154 ///
155 /// ```
156 /// # use pest::Position;
157 /// let input = "ab";
158 /// let start = Position::from_start(input);
159 /// let end = start.clone();
160 /// let span = start.span(&end);
161 ///
162 /// assert_eq!(span.end_pos(), end);
163 /// ```
164 #[inline]
end_pos(&self) -> position::Position<'i>165 pub fn end_pos(&self) -> position::Position<'i> {
166 // Span's end position is always a UTF-8 border.
167 unsafe { position::Position::new_unchecked(self.input, self.end) }
168 }
169
170 /// Splits the `Span` into a pair of `Position`s.
171 ///
172 /// # Examples
173 ///
174 /// ```
175 /// # use pest::Position;
176 /// let input = "ab";
177 /// let start = Position::from_start(input);
178 /// let end = start.clone();
179 /// let span = start.clone().span(&end);
180 ///
181 /// assert_eq!(span.split(), (start, end));
182 /// ```
183 #[inline]
split(self) -> (position::Position<'i>, position::Position<'i>)184 pub fn split(self) -> (position::Position<'i>, position::Position<'i>) {
185 // Span's start and end positions are always a UTF-8 borders.
186 let pos1 = unsafe { position::Position::new_unchecked(self.input, self.start) };
187 let pos2 = unsafe { position::Position::new_unchecked(self.input, self.end) };
188
189 (pos1, pos2)
190 }
191
192 /// Captures a slice from the `&str` defined by the `Span`.
193 ///
194 /// # Examples
195 ///
196 /// ```
197 /// # use pest;
198 /// # #[allow(non_camel_case_types)]
199 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
200 /// enum Rule {}
201 ///
202 /// let input = "abc";
203 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(1).unwrap();
204 /// let start_pos = state.position().clone();
205 /// state = state.match_string("b").unwrap();
206 /// let span = start_pos.span(&state.position().clone());
207 /// assert_eq!(span.as_str(), "b");
208 /// ```
209 #[inline]
as_str(&self) -> &'i str210 pub fn as_str(&self) -> &'i str {
211 // Span's start and end positions are always a UTF-8 borders.
212 &self.input[self.start..self.end]
213 }
214
215 /// Returns the input string of the `Span`.
216 ///
217 /// This function returns the input string of the `Span` as a `&str`. This is the source string
218 /// from which the `Span` was created. The returned `&str` can be used to examine the contents of
219 /// the `Span` or to perform further processing on the string.
220 ///
221 /// # Examples
222 ///
223 /// ```
224 /// # use pest;
225 /// # use pest::Span;
226 ///
227 /// // Example: Get input string from a span
228 /// let input = "abc\ndef\nghi";
229 /// let span = Span::new(input, 1, 7).unwrap();
230 /// assert_eq!(span.get_input(), input);
231 /// ```
get_input(&self) -> &'i str232 pub fn get_input(&self) -> &'i str {
233 self.input
234 }
235
236 /// Iterates over all lines (partially) covered by this span. Yielding a `&str` for each line.
237 ///
238 /// # Examples
239 ///
240 /// ```
241 /// # use pest;
242 /// # #[allow(non_camel_case_types)]
243 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
244 /// enum Rule {}
245 ///
246 /// let input = "a\nb\nc";
247 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap();
248 /// let start_pos = state.position().clone();
249 /// state = state.match_string("b\nc").unwrap();
250 /// let span = start_pos.span(&state.position().clone());
251 /// assert_eq!(span.lines().collect::<Vec<_>>(), vec!["b\n", "c"]);
252 /// ```
253 #[inline]
lines(&self) -> Lines<'_>254 pub fn lines(&self) -> Lines<'_> {
255 Lines {
256 inner: self.lines_span(),
257 }
258 }
259
260 /// Iterates over all lines (partially) covered by this span. Yielding a `Span` for each line.
261 ///
262 /// # Examples
263 ///
264 /// ```
265 /// # use pest;
266 /// # use pest::Span;
267 /// # #[allow(non_camel_case_types)]
268 /// # #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
269 /// enum Rule {}
270 ///
271 /// let input = "a\nb\nc";
272 /// let mut state: Box<pest::ParserState<'_, Rule>> = pest::ParserState::new(input).skip(2).unwrap();
273 /// let start_pos = state.position().clone();
274 /// state = state.match_string("b\nc").unwrap();
275 /// let span = start_pos.span(&state.position().clone());
276 /// assert_eq!(span.lines_span().collect::<Vec<_>>(), vec![Span::new(input, 2, 4).unwrap(), Span::new(input, 4, 5).unwrap()]);
277 /// ```
lines_span(&self) -> LinesSpan<'_>278 pub fn lines_span(&self) -> LinesSpan<'_> {
279 LinesSpan {
280 span: self,
281 pos: self.start,
282 }
283 }
284 }
285
286 impl<'i> fmt::Debug for Span<'i> {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result287 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
288 f.debug_struct("Span")
289 .field("str", &self.as_str())
290 .field("start", &self.start)
291 .field("end", &self.end)
292 .finish()
293 }
294 }
295
296 impl<'i> PartialEq for Span<'i> {
eq(&self, other: &Span<'i>) -> bool297 fn eq(&self, other: &Span<'i>) -> bool {
298 ptr::eq(self.input, other.input) && self.start == other.start && self.end == other.end
299 }
300 }
301
302 impl<'i> Eq for Span<'i> {}
303
304 impl<'i> Hash for Span<'i> {
hash<H: Hasher>(&self, state: &mut H)305 fn hash<H: Hasher>(&self, state: &mut H) {
306 (self.input as *const str).hash(state);
307 self.start.hash(state);
308 self.end.hash(state);
309 }
310 }
311
312 /// Merges two spans into one.
313 ///
314 /// This function merges two spans that are contiguous or overlapping into a single span
315 /// that covers the entire range of the two input spans. This is useful when you want to
316 /// aggregate information from multiple spans into a single entity.
317 ///
318 /// The function checks if the input spans are overlapping or contiguous by comparing their
319 /// start and end positions. If they are, a new span is created with the minimum start position
320 /// and the maximum end position of the two input spans.
321 ///
322 /// If the input spans are neither overlapping nor contiguous, the function returns None,
323 /// indicating that a merge operation was not possible.
324 ///
325 /// # Examples
326 ///
327 /// ```
328 /// # use pest;
329 /// # use pest::Span;
330 /// # use pest::merge_spans;
331 ///
332 /// // Example 1: Contiguous spans
333 /// let input = "abc\ndef\nghi";
334 /// let span1 = Span::new(input, 1, 7).unwrap();
335 /// let span2 = Span::new(input, 7, 11).unwrap();
336 /// let merged = merge_spans(&span1, &span2).unwrap();
337 /// assert_eq!(merged, Span::new(input, 1, 11).unwrap());
338 ///
339 /// // Example 2: Overlapping spans
340 /// let input = "abc\ndef\nghi";
341 /// let span1 = Span::new(input, 1, 7).unwrap();
342 /// let span2 = Span::new(input, 5, 11).unwrap();
343 /// let merged = merge_spans(&span1, &span2).unwrap();
344 /// assert_eq!(merged, Span::new(input, 1, 11).unwrap());
345 ///
346 /// // Example 3: Non-contiguous spans
347 /// let input = "abc\ndef\nghi";
348 /// let span1 = Span::new(input, 1, 7).unwrap();
349 /// let span2 = Span::new(input, 8, 11).unwrap();
350 /// let merged = merge_spans(&span1, &span2);
351 /// assert!(merged.is_none());
352 /// ```
merge_spans<'i>(a: &Span<'i>, b: &Span<'i>) -> Option<Span<'i>>353 pub fn merge_spans<'i>(a: &Span<'i>, b: &Span<'i>) -> Option<Span<'i>> {
354 if a.end() >= b.start() && a.start() <= b.end() {
355 // The spans overlap or are contiguous, so they can be merged.
356 Span::new(
357 a.get_input(),
358 core::cmp::min(a.start(), b.start()),
359 core::cmp::max(a.end(), b.end()),
360 )
361 } else {
362 // The spans don't overlap and aren't contiguous, so they can't be merged.
363 None
364 }
365 }
366
367 /// Line iterator for Spans, created by [`Span::lines_span()`].
368 ///
369 /// Iterates all lines that are at least _partially_ covered by the span. Yielding a `Span` for each.
370 ///
371 /// [`Span::lines_span()`]: struct.Span.html#method.lines_span
372 pub struct LinesSpan<'i> {
373 span: &'i Span<'i>,
374 pos: usize,
375 }
376
377 impl<'i> Iterator for LinesSpan<'i> {
378 type Item = Span<'i>;
next(&mut self) -> Option<Self::Item>379 fn next(&mut self) -> Option<Self::Item> {
380 if self.pos > self.span.end {
381 return None;
382 }
383 let pos = position::Position::new(self.span.input, self.pos)?;
384 if pos.at_end() {
385 return None;
386 }
387
388 let line_start = pos.find_line_start();
389 self.pos = pos.find_line_end();
390
391 Span::new(self.span.input, line_start, self.pos)
392 }
393 }
394
395 /// Line iterator for Spans, created by [`Span::lines()`].
396 ///
397 /// Iterates all lines that are at least _partially_ covered by the span. Yielding a `&str` for each.
398 ///
399 /// [`Span::lines()`]: struct.Span.html#method.lines
400 pub struct Lines<'i> {
401 inner: LinesSpan<'i>,
402 }
403
404 impl<'i> Iterator for Lines<'i> {
405 type Item = &'i str;
next(&mut self) -> Option<Self::Item>406 fn next(&mut self) -> Option<Self::Item> {
407 self.inner.next().map(|span| span.as_str())
408 }
409 }
410
411 #[cfg(test)]
412 mod tests {
413 use super::*;
414 use alloc::borrow::ToOwned;
415 use alloc::vec::Vec;
416
417 #[test]
get()418 fn get() {
419 let input = "abc123abc";
420 let span = Span::new(input, 3, input.len()).unwrap();
421 assert_eq!(span.as_str(), "123abc");
422 assert_eq!(span.input, input);
423
424 let span1 = span.get(..=2);
425 assert!(span1.is_some());
426 assert_eq!(span1.unwrap().input, input);
427 assert_eq!(span1.unwrap().as_str(), "123");
428
429 let span2 = span.get(..);
430 assert!(span2.is_some());
431 assert_eq!(span2.unwrap().input, input);
432 assert_eq!(span2.unwrap().as_str(), "123abc");
433
434 let span3 = span.get(3..);
435 assert!(span3.is_some());
436 assert_eq!(span3.unwrap().input, input);
437 assert_eq!(span3.unwrap().as_str(), "abc");
438
439 let span4 = span.get(0..0);
440 assert!(span4.is_some());
441 assert_eq!(span4.unwrap().input, input);
442 assert_eq!(span4.unwrap().as_str(), "");
443 }
444
445 #[test]
get_fails()446 fn get_fails() {
447 let input = "abc";
448 let span = Span::new(input, 0, input.len()).unwrap();
449
450 let span1 = span.get(0..100);
451 assert!(span1.is_none());
452
453 let span2 = span.get(100..200);
454 assert!(span2.is_none());
455 }
456
457 #[test]
span_comp()458 fn span_comp() {
459 let input = "abc\ndef\nghi";
460 let span = Span::new(input, 1, 7).unwrap();
461 let span2 = Span::new(input, 50, 51);
462 assert!(span2.is_none());
463 let span3 = Span::new(input, 0, 8).unwrap();
464 assert!(span != span3);
465 }
466
467 #[test]
split()468 fn split() {
469 let input = "a";
470 let start = position::Position::from_start(input);
471 let mut end = start;
472
473 assert!(end.skip(1));
474
475 let span = start.clone().span(&end.clone());
476
477 assert_eq!(span.split(), (start, end));
478 }
479
480 #[test]
lines_mid()481 fn lines_mid() {
482 let input = "abc\ndef\nghi";
483 let span = Span::new(input, 1, 7).unwrap();
484 let lines: Vec<_> = span.lines().collect();
485 let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect();
486
487 assert_eq!(lines.len(), 2);
488 assert_eq!(lines[0], "abc\n".to_owned());
489 assert_eq!(lines[1], "def\n".to_owned());
490 assert_eq!(lines, lines_span) // Verify parity with lines_span()
491 }
492
493 #[test]
lines_eof()494 fn lines_eof() {
495 let input = "abc\ndef\nghi";
496 let span = Span::new(input, 5, 11).unwrap();
497 assert!(span.end_pos().at_end());
498 assert_eq!(span.end(), 11);
499 let lines: Vec<_> = span.lines().collect();
500 let lines_span: Vec<_> = span.lines_span().map(|span| span.as_str()).collect();
501
502 assert_eq!(lines.len(), 2);
503 assert_eq!(lines[0], "def\n".to_owned());
504 assert_eq!(lines[1], "ghi".to_owned());
505 assert_eq!(lines, lines_span) // Verify parity with lines_span()
506 }
507
508 #[test]
lines_span()509 fn lines_span() {
510 let input = "abc\ndef\nghi";
511 let span = Span::new(input, 1, 7).unwrap();
512 let lines_span: Vec<_> = span.lines_span().collect();
513 let lines: Vec<_> = span.lines().collect();
514
515 assert_eq!(lines_span.len(), 2);
516 assert_eq!(lines_span[0], Span::new(input, 0, 4).unwrap());
517 assert_eq!(lines_span[1], Span::new(input, 4, 8).unwrap());
518 assert_eq!(
519 lines_span
520 .iter()
521 .map(|span| span.as_str())
522 .collect::<Vec<_>>(),
523 lines
524 );
525 }
526
527 #[test]
get_input_of_span()528 fn get_input_of_span() {
529 let input = "abc\ndef\nghi";
530 let span = Span::new(input, 1, 7).unwrap();
531
532 assert_eq!(span.get_input(), input);
533 }
534
535 #[test]
merge_contiguous()536 fn merge_contiguous() {
537 let input = "abc\ndef\nghi";
538 let span1 = Span::new(input, 1, 7).unwrap();
539 let span2 = Span::new(input, 7, 11).unwrap();
540 let merged = merge_spans(&span1, &span2).unwrap();
541
542 assert_eq!(merged, Span::new(input, 1, 11).unwrap());
543 }
544
545 #[test]
merge_overlapping()546 fn merge_overlapping() {
547 let input = "abc\ndef\nghi";
548 let span1 = Span::new(input, 1, 7).unwrap();
549 let span2 = Span::new(input, 5, 11).unwrap();
550 let merged = merge_spans(&span1, &span2).unwrap();
551
552 assert_eq!(merged, Span::new(input, 1, 11).unwrap());
553 }
554
555 #[test]
merge_non_contiguous()556 fn merge_non_contiguous() {
557 let input = "abc\ndef\nghi";
558 let span1 = Span::new(input, 1, 7).unwrap();
559 let span2 = Span::new(input, 8, 11).unwrap();
560 let merged = merge_spans(&span1, &span2);
561
562 assert!(merged.is_none());
563 }
564 }
565