1  // Copyright 2015-2016 Brian Smith.
2  //
3  // Permission to use, copy, modify, and/or distribute this software for any
4  // purpose with or without fee is hereby granted, provided that the above
5  // copyright notice and this permission notice appear in all copies.
6  //
7  // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
8  // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9  // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
10  // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11  // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12  // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13  // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14  
15  //! untrusted.rs: Safe, fast, zero-panic, zero-crashing, zero-allocation
16  //! parsing of untrusted inputs in Rust.
17  //!
18  //! <code>git clone https://github.com/briansmith/untrusted</code>
19  //!
20  //! untrusted.rs goes beyond Rust's normal safety guarantees by  also
21  //! guaranteeing that parsing will be panic-free, as long as
22  //! `untrusted::Input::as_slice_less_safe()` is not used. It avoids copying
23  //! data and heap allocation and strives to prevent common pitfalls such as
24  //! accidentally parsing input bytes multiple times. In order to meet these
25  //! goals, untrusted.rs is limited in functionality such that it works best for
26  //! input languages with a small fixed amount of lookahead such as ASN.1, TLS,
27  //! TCP/IP, and many other networking, IPC, and related protocols. Languages
28  //! that require more lookahead and/or backtracking require some significant
29  //! contortions to parse using this framework. It would not be realistic to use
30  //! it for parsing programming language code, for example.
31  //!
32  //! The overall pattern for using untrusted.rs is:
33  //!
34  //! 1. Write a recursive-descent-style parser for the input language, where the
35  //!    input data is given as a `&mut untrusted::Reader` parameter to each
36  //!    function. Each function should have a return type of `Result<V, E>` for
37  //!    some value type `V` and some error type `E`, either or both of which may
38  //!    be `()`. Functions for parsing the lowest-level language constructs
39  //!    should be defined. Those lowest-level functions will parse their inputs
40  //!    using `::read_byte()`, `Reader::peek()`, and similar functions.
41  //!    Higher-level language constructs are then parsed by calling the
42  //!    lower-level functions in sequence.
43  //!
44  //! 2. Wrap the top-most functions of your recursive-descent parser in
45  //!    functions that take their input data as an `untrusted::Input`. The
46  //!    wrapper functions should call the `Input`'s `read_all` (or a variant
47  //!    thereof) method. The wrapper functions are the only ones that should be
48  //!    exposed outside the parser's module.
49  //!
50  //! 3. After receiving the input data to parse, wrap it in an `untrusted::Input`
51  //!    using `untrusted::Input::from()` as early as possible. Pass the
52  //!    `untrusted::Input` to the wrapper functions when they need to be parsed.
53  //!
54  //! In general parsers built using `untrusted::Reader` do not need to explicitly
55  //! check for end-of-input unless they are parsing optional constructs, because
56  //! `Reader::read_byte()` will return `Err(EndOfInput)` on end-of-input.
57  //! Similarly, parsers using `untrusted::Reader` generally don't need to check
58  //! for extra junk at the end of the input as long as the parser's API uses the
59  //! pattern described above, as `read_all` and its variants automatically check
60  //! for trailing junk. `Reader::skip_to_end()` must be used when any remaining
61  //! unread input should be ignored without triggering an error.
62  //!
63  //! untrusted.rs works best when all processing of the input data is done
64  //! through the `untrusted::Input` and `untrusted::Reader` types. In
65  //! particular, avoid trying to parse input data using functions that take
66  //! byte slices. However, when you need to access a part of the input data as
67  //! a slice to use a function that isn't written using untrusted.rs,
68  //! `Input::as_slice_less_safe()` can be used.
69  //!
70  //! It is recommend to use `use untrusted;` and then `untrusted::Input`,
71  //! `untrusted::Reader`, etc., instead of using `use untrusted::*`. Qualifying
72  //! the names with `untrusted` helps remind the reader of the code that it is
73  //! dealing with *untrusted* input.
74  //!
75  //! # Examples
76  //!
77  //! [*ring*](https://github.com/briansmith/ring)'s parser for the subset of
78  //! ASN.1 DER it needs to understand,
79  //! [`ring::der`](https://github.com/briansmith/ring/blob/master/src/der.rs),
80  //! is built on top of untrusted.rs. *ring* also uses untrusted.rs to parse ECC
81  //! public keys, RSA PKCS#1 1.5 padding, and for all other parsing it does.
82  //!
83  //! All of [webpki](https://github.com/briansmith/webpki)'s parsing of X.509
84  //! certificates (also ASN.1 DER) is done using untrusted.rs.
85  
86  #![doc(html_root_url = "https://briansmith.org/rustdoc/")]
87  // `#[derive(...)]` uses `#[allow(unused_qualifications)]` internally.
88  #![deny(unused_qualifications)]
89  #![forbid(
90      anonymous_parameters,
91      box_pointers,
92      missing_docs,
93      trivial_casts,
94      trivial_numeric_casts,
95      unsafe_code,
96      unstable_features,
97      unused_extern_crates,
98      unused_import_braces,
99      unused_results,
100      variant_size_differences,
101      warnings
102  )]
103  // ANDROID: This lint is included in warnings and thus forbidden, but this version of the library
104  // is not compatible with it.
105  #![allow(rustdoc::bare_urls)]
106  #![no_std]
107  
108  // ANDROID: use std to allow building as a dylib.
109  #[cfg(android_dylib)]
110  extern crate std;
111  
112  /// A wrapper around `&'a [u8]` that helps in writing panic-free code.
113  ///
114  /// No methods of `Input` will ever panic.
115  #[derive(Clone, Copy, Debug, Eq)]
116  pub struct Input<'a> {
117      value: no_panic::Slice<'a>,
118  }
119  
120  impl<'a> Input<'a> {
121      /// Construct a new `Input` for the given input `bytes`.
from(bytes: &'a [u8]) -> Self122      pub const fn from(bytes: &'a [u8]) -> Self {
123          // This limit is important for avoiding integer overflow. In particular,
124          // `Reader` assumes that an `i + 1 > i` if `input.value.get(i)` does
125          // not return `None`. According to the Rust language reference, the
126          // maximum object size is `core::isize::MAX`, and in practice it is
127          // impossible to create an object of size `core::usize::MAX` or larger.
128          Self {
129              value: no_panic::Slice::new(bytes),
130          }
131      }
132  
133      /// Returns `true` if the input is empty and false otherwise.
134      #[inline]
is_empty(&self) -> bool135      pub fn is_empty(&self) -> bool { self.value.is_empty() }
136  
137      /// Returns the length of the `Input`.
138      #[inline]
len(&self) -> usize139      pub fn len(&self) -> usize { self.value.len() }
140  
141      /// Calls `read` with the given input as a `Reader`, ensuring that `read`
142      /// consumed the entire input. If `read` does not consume the entire input,
143      /// `incomplete_read` is returned.
read_all<F, R, E>(&self, incomplete_read: E, read: F) -> Result<R, E> where F: FnOnce(&mut Reader<'a>) -> Result<R, E>,144      pub fn read_all<F, R, E>(&self, incomplete_read: E, read: F) -> Result<R, E>
145      where
146          F: FnOnce(&mut Reader<'a>) -> Result<R, E>,
147      {
148          let mut input = Reader::new(*self);
149          let result = read(&mut input)?;
150          if input.at_end() {
151              Ok(result)
152          } else {
153              Err(incomplete_read)
154          }
155      }
156  
157      /// Access the input as a slice so it can be processed by functions that
158      /// are not written using the Input/Reader framework.
159      #[inline]
as_slice_less_safe(&self) -> &'a [u8]160      pub fn as_slice_less_safe(&self) -> &'a [u8] { self.value.as_slice_less_safe() }
161  }
162  
163  impl<'a> From<&'a [u8]> for Input<'a> {
164      #[inline]
from(value: &'a [u8]) -> Self165      fn from(value: &'a [u8]) -> Self { Self { value: no_panic::Slice::new(value)} }
166  }
167  
168  // #[derive(PartialEq)] would result in lifetime bounds that are
169  // unnecessarily restrictive; see
170  // https://github.com/rust-lang/rust/issues/26925.
171  impl PartialEq<Input<'_>> for Input<'_> {
172      #[inline]
eq(&self, other: &Input) -> bool173      fn eq(&self, other: &Input) -> bool {
174          self.as_slice_less_safe() == other.as_slice_less_safe()
175      }
176  }
177  
178  impl PartialEq<[u8]> for Input<'_> {
179      #[inline]
eq(&self, other: &[u8]) -> bool180      fn eq(&self, other: &[u8]) -> bool { self.as_slice_less_safe() == other }
181  }
182  
183  impl PartialEq<Input<'_>> for [u8] {
184      #[inline]
eq(&self, other: &Input) -> bool185      fn eq(&self, other: &Input) -> bool { other.as_slice_less_safe() == self }
186  }
187  
188  /// Calls `read` with the given input as a `Reader`, ensuring that `read`
189  /// consumed the entire input. When `input` is `None`, `read` will be
190  /// called with `None`.
read_all_optional<'a, F, R, E>( input: Option<Input<'a>>, incomplete_read: E, read: F, ) -> Result<R, E> where F: FnOnce(Option<&mut Reader<'a>>) -> Result<R, E>,191  pub fn read_all_optional<'a, F, R, E>(
192      input: Option<Input<'a>>, incomplete_read: E, read: F,
193  ) -> Result<R, E>
194  where
195      F: FnOnce(Option<&mut Reader<'a>>) -> Result<R, E>,
196  {
197      match input {
198          Some(input) => {
199              let mut input = Reader::new(input);
200              let result = read(Some(&mut input))?;
201              if input.at_end() {
202                  Ok(result)
203              } else {
204                  Err(incomplete_read)
205              }
206          },
207          None => read(None),
208      }
209  }
210  
211  /// A read-only, forward-only* cursor into the data in an `Input`.
212  ///
213  /// Using `Reader` to parse input helps to ensure that no byte of the input
214  /// will be accidentally processed more than once. Using `Reader` in
215  /// conjunction with `read_all` and `read_all_optional` helps ensure that no
216  /// byte of the input is accidentally left unprocessed. The methods of `Reader`
217  /// never panic, so `Reader` also assists the writing of panic-free code.
218  ///
219  /// \* `Reader` is not strictly forward-only because of the method
220  /// `get_input_between_marks`, which is provided mainly to support calculating
221  /// digests over parsed data.
222  #[derive(Debug)]
223  pub struct Reader<'a> {
224      input: no_panic::Slice<'a>,
225      i: usize,
226  }
227  
228  /// An index into the already-parsed input of a `Reader`.
229  pub struct Mark {
230      i: usize,
231  }
232  
233  impl<'a> Reader<'a> {
234      /// Construct a new Reader for the given input. Use `read_all` or
235      /// `read_all_optional` instead of `Reader::new` whenever possible.
236      #[inline]
new(input: Input<'a>) -> Self237      pub fn new(input: Input<'a>) -> Self {
238          Self {
239              input: input.value,
240              i: 0,
241          }
242      }
243  
244      /// Returns `true` if the reader is at the end of the input, and `false`
245      /// otherwise.
246      #[inline]
at_end(&self) -> bool247      pub fn at_end(&self) -> bool { self.i == self.input.len() }
248  
249      /// Returns an `Input` for already-parsed input that has had its boundaries
250      /// marked using `mark`.
251      #[inline]
get_input_between_marks( &self, mark1: Mark, mark2: Mark, ) -> Result<Input<'a>, EndOfInput>252      pub fn get_input_between_marks(
253          &self, mark1: Mark, mark2: Mark,
254      ) -> Result<Input<'a>, EndOfInput> {
255          self.input
256              .subslice(mark1.i..mark2.i)
257              .map(|subslice| Input { value: subslice })
258              .ok_or(EndOfInput)
259      }
260  
261      /// Return the current position of the `Reader` for future use in a call
262      /// to `get_input_between_marks`.
263      #[inline]
mark(&self) -> Mark264      pub fn mark(&self) -> Mark { Mark { i: self.i } }
265  
266      /// Returns `true` if there is at least one more byte in the input and that
267      /// byte is equal to `b`, and false otherwise.
268      #[inline]
peek(&self, b: u8) -> bool269      pub fn peek(&self, b: u8) -> bool {
270          match self.input.get(self.i) {
271              Some(actual_b) => b == *actual_b,
272              None => false,
273          }
274      }
275  
276      /// Reads the next input byte.
277      ///
278      /// Returns `Ok(b)` where `b` is the next input byte, or `Err(EndOfInput)`
279      /// if the `Reader` is at the end of the input.
280      #[inline]
read_byte(&mut self) -> Result<u8, EndOfInput>281      pub fn read_byte(&mut self) -> Result<u8, EndOfInput> {
282          match self.input.get(self.i) {
283              Some(b) => {
284                  self.i += 1; // safe from overflow; see Input::from().
285                  Ok(*b)
286              },
287              None => Err(EndOfInput),
288          }
289      }
290  
291      /// Skips `num_bytes` of the input, returning the skipped input as an
292      /// `Input`.
293      ///
294      /// Returns `Ok(i)` if there are at least `num_bytes` of input remaining,
295      /// and `Err(EndOfInput)` otherwise.
296      #[inline]
read_bytes(&mut self, num_bytes: usize) -> Result<Input<'a>, EndOfInput>297      pub fn read_bytes(&mut self, num_bytes: usize) -> Result<Input<'a>, EndOfInput> {
298          let new_i = self.i.checked_add(num_bytes).ok_or(EndOfInput)?;
299          let ret = self
300              .input
301              .subslice(self.i..new_i)
302              .map(|subslice| Input { value: subslice })
303              .ok_or(EndOfInput)?;
304          self.i = new_i;
305          Ok(ret)
306      }
307  
308      /// Skips the reader to the end of the input, returning the skipped input
309      /// as an `Input`.
310      #[inline]
read_bytes_to_end(&mut self) -> Input<'a>311      pub fn read_bytes_to_end(&mut self) -> Input<'a> {
312          let to_skip = self.input.len() - self.i;
313          self.read_bytes(to_skip).unwrap()
314      }
315  
316      /// Calls `read()` with the given input as a `Reader`. On success, returns a
317      /// pair `(bytes_read, r)` where `bytes_read` is what `read()` consumed and
318      /// `r` is `read()`'s return value.
read_partial<F, R, E>(&mut self, read: F) -> Result<(Input<'a>, R), E> where F: FnOnce(&mut Reader<'a>) -> Result<R, E>,319      pub fn read_partial<F, R, E>(&mut self, read: F) -> Result<(Input<'a>, R), E>
320      where
321          F: FnOnce(&mut Reader<'a>) -> Result<R, E>,
322      {
323          let start = self.i;
324          let r = read(self)?;
325          let bytes_read = Input {
326              value: self.input.subslice(start..self.i).unwrap()
327          };
328          Ok((bytes_read, r))
329      }
330  
331      /// Skips `num_bytes` of the input.
332      ///
333      /// Returns `Ok(i)` if there are at least `num_bytes` of input remaining,
334      /// and `Err(EndOfInput)` otherwise.
335      #[inline]
skip(&mut self, num_bytes: usize) -> Result<(), EndOfInput>336      pub fn skip(&mut self, num_bytes: usize) -> Result<(), EndOfInput> {
337          self.read_bytes(num_bytes).map(|_| ())
338      }
339  
340      /// Skips the reader to the end of the input.
341      #[inline]
skip_to_end(&mut self) -> ()342      pub fn skip_to_end(&mut self) -> () { let _ = self.read_bytes_to_end(); }
343  }
344  
345  /// The error type used to indicate the end of the input was reached before the
346  /// operation could be completed.
347  #[derive(Clone, Copy, Debug, Eq, PartialEq)]
348  pub struct EndOfInput;
349  
350  mod no_panic {
351      use core;
352  
353      /// A wrapper around a slice that exposes no functions that can panic.
354      #[derive(Clone, Copy, Debug, Eq, PartialEq)]
355      pub struct Slice<'a> {
356          bytes: &'a [u8],
357      }
358  
359      impl<'a> Slice<'a> {
360          #[inline]
new(bytes: &'a [u8]) -> Self361          pub const fn new(bytes: &'a [u8]) -> Self { Self { bytes } }
362  
363          #[inline]
get(&self, i: usize) -> Option<&u8>364          pub fn get(&self, i: usize) -> Option<&u8> { self.bytes.get(i) }
365  
366          #[inline]
subslice(&self, r: core::ops::Range<usize>) -> Option<Self>367          pub fn subslice(&self, r: core::ops::Range<usize>) -> Option<Self> {
368              self.bytes.get(r).map(|bytes| Self { bytes })
369          }
370  
371          #[inline]
is_empty(&self) -> bool372          pub fn is_empty(&self) -> bool { self.bytes.is_empty() }
373  
374          #[inline]
len(&self) -> usize375          pub fn len(&self) -> usize { self.bytes.len() }
376  
377          #[inline]
as_slice_less_safe(&self) -> &'a [u8]378          pub fn as_slice_less_safe(&self) -> &'a [u8] { self.bytes }
379      }
380  
381  } // mod no_panic
382