1 // Copyright 2015-2016 Brian Smith. 2 // 3 // Permission to use, copy, modify, and/or distribute this software for any 4 // purpose with or without fee is hereby granted, provided that the above 5 // copyright notice and this permission notice appear in all copies. 6 // 7 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 8 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 10 // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15 //! untrusted.rs: Safe, fast, zero-panic, zero-crashing, zero-allocation 16 //! parsing of untrusted inputs in Rust. 17 //! 18 //! <code>git clone https://github.com/briansmith/untrusted</code> 19 //! 20 //! untrusted.rs goes beyond Rust's normal safety guarantees by also 21 //! guaranteeing that parsing will be panic-free, as long as 22 //! `untrusted::Input::as_slice_less_safe()` is not used. It avoids copying 23 //! data and heap allocation and strives to prevent common pitfalls such as 24 //! accidentally parsing input bytes multiple times. In order to meet these 25 //! goals, untrusted.rs is limited in functionality such that it works best for 26 //! input languages with a small fixed amount of lookahead such as ASN.1, TLS, 27 //! TCP/IP, and many other networking, IPC, and related protocols. Languages 28 //! that require more lookahead and/or backtracking require some significant 29 //! contortions to parse using this framework. It would not be realistic to use 30 //! it for parsing programming language code, for example. 31 //! 32 //! The overall pattern for using untrusted.rs is: 33 //! 34 //! 1. Write a recursive-descent-style parser for the input language, where the 35 //! input data is given as a `&mut untrusted::Reader` parameter to each 36 //! function. Each function should have a return type of `Result<V, E>` for 37 //! some value type `V` and some error type `E`, either or both of which may 38 //! be `()`. Functions for parsing the lowest-level language constructs 39 //! should be defined. Those lowest-level functions will parse their inputs 40 //! using `::read_byte()`, `Reader::peek()`, and similar functions. 41 //! Higher-level language constructs are then parsed by calling the 42 //! lower-level functions in sequence. 43 //! 44 //! 2. Wrap the top-most functions of your recursive-descent parser in 45 //! functions that take their input data as an `untrusted::Input`. The 46 //! wrapper functions should call the `Input`'s `read_all` (or a variant 47 //! thereof) method. The wrapper functions are the only ones that should be 48 //! exposed outside the parser's module. 49 //! 50 //! 3. After receiving the input data to parse, wrap it in an `untrusted::Input` 51 //! using `untrusted::Input::from()` as early as possible. Pass the 52 //! `untrusted::Input` to the wrapper functions when they need to be parsed. 53 //! 54 //! In general parsers built using `untrusted::Reader` do not need to explicitly 55 //! check for end-of-input unless they are parsing optional constructs, because 56 //! `Reader::read_byte()` will return `Err(EndOfInput)` on end-of-input. 57 //! Similarly, parsers using `untrusted::Reader` generally don't need to check 58 //! for extra junk at the end of the input as long as the parser's API uses the 59 //! pattern described above, as `read_all` and its variants automatically check 60 //! for trailing junk. `Reader::skip_to_end()` must be used when any remaining 61 //! unread input should be ignored without triggering an error. 62 //! 63 //! untrusted.rs works best when all processing of the input data is done 64 //! through the `untrusted::Input` and `untrusted::Reader` types. In 65 //! particular, avoid trying to parse input data using functions that take 66 //! byte slices. However, when you need to access a part of the input data as 67 //! a slice to use a function that isn't written using untrusted.rs, 68 //! `Input::as_slice_less_safe()` can be used. 69 //! 70 //! It is recommend to use `use untrusted;` and then `untrusted::Input`, 71 //! `untrusted::Reader`, etc., instead of using `use untrusted::*`. Qualifying 72 //! the names with `untrusted` helps remind the reader of the code that it is 73 //! dealing with *untrusted* input. 74 //! 75 //! # Examples 76 //! 77 //! [*ring*](https://github.com/briansmith/ring)'s parser for the subset of 78 //! ASN.1 DER it needs to understand, 79 //! [`ring::der`](https://github.com/briansmith/ring/blob/master/src/der.rs), 80 //! is built on top of untrusted.rs. *ring* also uses untrusted.rs to parse ECC 81 //! public keys, RSA PKCS#1 1.5 padding, and for all other parsing it does. 82 //! 83 //! All of [webpki](https://github.com/briansmith/webpki)'s parsing of X.509 84 //! certificates (also ASN.1 DER) is done using untrusted.rs. 85 86 #![doc(html_root_url = "https://briansmith.org/rustdoc/")] 87 // `#[derive(...)]` uses `#[allow(unused_qualifications)]` internally. 88 #![deny(unused_qualifications)] 89 #![forbid( 90 anonymous_parameters, 91 box_pointers, 92 missing_docs, 93 trivial_casts, 94 trivial_numeric_casts, 95 unsafe_code, 96 unstable_features, 97 unused_extern_crates, 98 unused_import_braces, 99 unused_results, 100 variant_size_differences, 101 warnings 102 )] 103 // ANDROID: This lint is included in warnings and thus forbidden, but this version of the library 104 // is not compatible with it. 105 #![allow(rustdoc::bare_urls)] 106 #![no_std] 107 108 // ANDROID: use std to allow building as a dylib. 109 #[cfg(android_dylib)] 110 extern crate std; 111 112 /// A wrapper around `&'a [u8]` that helps in writing panic-free code. 113 /// 114 /// No methods of `Input` will ever panic. 115 #[derive(Clone, Copy, Debug, Eq)] 116 pub struct Input<'a> { 117 value: no_panic::Slice<'a>, 118 } 119 120 impl<'a> Input<'a> { 121 /// Construct a new `Input` for the given input `bytes`. from(bytes: &'a [u8]) -> Self122 pub const fn from(bytes: &'a [u8]) -> Self { 123 // This limit is important for avoiding integer overflow. In particular, 124 // `Reader` assumes that an `i + 1 > i` if `input.value.get(i)` does 125 // not return `None`. According to the Rust language reference, the 126 // maximum object size is `core::isize::MAX`, and in practice it is 127 // impossible to create an object of size `core::usize::MAX` or larger. 128 Self { 129 value: no_panic::Slice::new(bytes), 130 } 131 } 132 133 /// Returns `true` if the input is empty and false otherwise. 134 #[inline] is_empty(&self) -> bool135 pub fn is_empty(&self) -> bool { self.value.is_empty() } 136 137 /// Returns the length of the `Input`. 138 #[inline] len(&self) -> usize139 pub fn len(&self) -> usize { self.value.len() } 140 141 /// Calls `read` with the given input as a `Reader`, ensuring that `read` 142 /// consumed the entire input. If `read` does not consume the entire input, 143 /// `incomplete_read` is returned. read_all<F, R, E>(&self, incomplete_read: E, read: F) -> Result<R, E> where F: FnOnce(&mut Reader<'a>) -> Result<R, E>,144 pub fn read_all<F, R, E>(&self, incomplete_read: E, read: F) -> Result<R, E> 145 where 146 F: FnOnce(&mut Reader<'a>) -> Result<R, E>, 147 { 148 let mut input = Reader::new(*self); 149 let result = read(&mut input)?; 150 if input.at_end() { 151 Ok(result) 152 } else { 153 Err(incomplete_read) 154 } 155 } 156 157 /// Access the input as a slice so it can be processed by functions that 158 /// are not written using the Input/Reader framework. 159 #[inline] as_slice_less_safe(&self) -> &'a [u8]160 pub fn as_slice_less_safe(&self) -> &'a [u8] { self.value.as_slice_less_safe() } 161 } 162 163 impl<'a> From<&'a [u8]> for Input<'a> { 164 #[inline] from(value: &'a [u8]) -> Self165 fn from(value: &'a [u8]) -> Self { Self { value: no_panic::Slice::new(value)} } 166 } 167 168 // #[derive(PartialEq)] would result in lifetime bounds that are 169 // unnecessarily restrictive; see 170 // https://github.com/rust-lang/rust/issues/26925. 171 impl PartialEq<Input<'_>> for Input<'_> { 172 #[inline] eq(&self, other: &Input) -> bool173 fn eq(&self, other: &Input) -> bool { 174 self.as_slice_less_safe() == other.as_slice_less_safe() 175 } 176 } 177 178 impl PartialEq<[u8]> for Input<'_> { 179 #[inline] eq(&self, other: &[u8]) -> bool180 fn eq(&self, other: &[u8]) -> bool { self.as_slice_less_safe() == other } 181 } 182 183 impl PartialEq<Input<'_>> for [u8] { 184 #[inline] eq(&self, other: &Input) -> bool185 fn eq(&self, other: &Input) -> bool { other.as_slice_less_safe() == self } 186 } 187 188 /// Calls `read` with the given input as a `Reader`, ensuring that `read` 189 /// consumed the entire input. When `input` is `None`, `read` will be 190 /// called with `None`. read_all_optional<'a, F, R, E>( input: Option<Input<'a>>, incomplete_read: E, read: F, ) -> Result<R, E> where F: FnOnce(Option<&mut Reader<'a>>) -> Result<R, E>,191 pub fn read_all_optional<'a, F, R, E>( 192 input: Option<Input<'a>>, incomplete_read: E, read: F, 193 ) -> Result<R, E> 194 where 195 F: FnOnce(Option<&mut Reader<'a>>) -> Result<R, E>, 196 { 197 match input { 198 Some(input) => { 199 let mut input = Reader::new(input); 200 let result = read(Some(&mut input))?; 201 if input.at_end() { 202 Ok(result) 203 } else { 204 Err(incomplete_read) 205 } 206 }, 207 None => read(None), 208 } 209 } 210 211 /// A read-only, forward-only* cursor into the data in an `Input`. 212 /// 213 /// Using `Reader` to parse input helps to ensure that no byte of the input 214 /// will be accidentally processed more than once. Using `Reader` in 215 /// conjunction with `read_all` and `read_all_optional` helps ensure that no 216 /// byte of the input is accidentally left unprocessed. The methods of `Reader` 217 /// never panic, so `Reader` also assists the writing of panic-free code. 218 /// 219 /// \* `Reader` is not strictly forward-only because of the method 220 /// `get_input_between_marks`, which is provided mainly to support calculating 221 /// digests over parsed data. 222 #[derive(Debug)] 223 pub struct Reader<'a> { 224 input: no_panic::Slice<'a>, 225 i: usize, 226 } 227 228 /// An index into the already-parsed input of a `Reader`. 229 pub struct Mark { 230 i: usize, 231 } 232 233 impl<'a> Reader<'a> { 234 /// Construct a new Reader for the given input. Use `read_all` or 235 /// `read_all_optional` instead of `Reader::new` whenever possible. 236 #[inline] new(input: Input<'a>) -> Self237 pub fn new(input: Input<'a>) -> Self { 238 Self { 239 input: input.value, 240 i: 0, 241 } 242 } 243 244 /// Returns `true` if the reader is at the end of the input, and `false` 245 /// otherwise. 246 #[inline] at_end(&self) -> bool247 pub fn at_end(&self) -> bool { self.i == self.input.len() } 248 249 /// Returns an `Input` for already-parsed input that has had its boundaries 250 /// marked using `mark`. 251 #[inline] get_input_between_marks( &self, mark1: Mark, mark2: Mark, ) -> Result<Input<'a>, EndOfInput>252 pub fn get_input_between_marks( 253 &self, mark1: Mark, mark2: Mark, 254 ) -> Result<Input<'a>, EndOfInput> { 255 self.input 256 .subslice(mark1.i..mark2.i) 257 .map(|subslice| Input { value: subslice }) 258 .ok_or(EndOfInput) 259 } 260 261 /// Return the current position of the `Reader` for future use in a call 262 /// to `get_input_between_marks`. 263 #[inline] mark(&self) -> Mark264 pub fn mark(&self) -> Mark { Mark { i: self.i } } 265 266 /// Returns `true` if there is at least one more byte in the input and that 267 /// byte is equal to `b`, and false otherwise. 268 #[inline] peek(&self, b: u8) -> bool269 pub fn peek(&self, b: u8) -> bool { 270 match self.input.get(self.i) { 271 Some(actual_b) => b == *actual_b, 272 None => false, 273 } 274 } 275 276 /// Reads the next input byte. 277 /// 278 /// Returns `Ok(b)` where `b` is the next input byte, or `Err(EndOfInput)` 279 /// if the `Reader` is at the end of the input. 280 #[inline] read_byte(&mut self) -> Result<u8, EndOfInput>281 pub fn read_byte(&mut self) -> Result<u8, EndOfInput> { 282 match self.input.get(self.i) { 283 Some(b) => { 284 self.i += 1; // safe from overflow; see Input::from(). 285 Ok(*b) 286 }, 287 None => Err(EndOfInput), 288 } 289 } 290 291 /// Skips `num_bytes` of the input, returning the skipped input as an 292 /// `Input`. 293 /// 294 /// Returns `Ok(i)` if there are at least `num_bytes` of input remaining, 295 /// and `Err(EndOfInput)` otherwise. 296 #[inline] read_bytes(&mut self, num_bytes: usize) -> Result<Input<'a>, EndOfInput>297 pub fn read_bytes(&mut self, num_bytes: usize) -> Result<Input<'a>, EndOfInput> { 298 let new_i = self.i.checked_add(num_bytes).ok_or(EndOfInput)?; 299 let ret = self 300 .input 301 .subslice(self.i..new_i) 302 .map(|subslice| Input { value: subslice }) 303 .ok_or(EndOfInput)?; 304 self.i = new_i; 305 Ok(ret) 306 } 307 308 /// Skips the reader to the end of the input, returning the skipped input 309 /// as an `Input`. 310 #[inline] read_bytes_to_end(&mut self) -> Input<'a>311 pub fn read_bytes_to_end(&mut self) -> Input<'a> { 312 let to_skip = self.input.len() - self.i; 313 self.read_bytes(to_skip).unwrap() 314 } 315 316 /// Calls `read()` with the given input as a `Reader`. On success, returns a 317 /// pair `(bytes_read, r)` where `bytes_read` is what `read()` consumed and 318 /// `r` is `read()`'s return value. read_partial<F, R, E>(&mut self, read: F) -> Result<(Input<'a>, R), E> where F: FnOnce(&mut Reader<'a>) -> Result<R, E>,319 pub fn read_partial<F, R, E>(&mut self, read: F) -> Result<(Input<'a>, R), E> 320 where 321 F: FnOnce(&mut Reader<'a>) -> Result<R, E>, 322 { 323 let start = self.i; 324 let r = read(self)?; 325 let bytes_read = Input { 326 value: self.input.subslice(start..self.i).unwrap() 327 }; 328 Ok((bytes_read, r)) 329 } 330 331 /// Skips `num_bytes` of the input. 332 /// 333 /// Returns `Ok(i)` if there are at least `num_bytes` of input remaining, 334 /// and `Err(EndOfInput)` otherwise. 335 #[inline] skip(&mut self, num_bytes: usize) -> Result<(), EndOfInput>336 pub fn skip(&mut self, num_bytes: usize) -> Result<(), EndOfInput> { 337 self.read_bytes(num_bytes).map(|_| ()) 338 } 339 340 /// Skips the reader to the end of the input. 341 #[inline] skip_to_end(&mut self) -> ()342 pub fn skip_to_end(&mut self) -> () { let _ = self.read_bytes_to_end(); } 343 } 344 345 /// The error type used to indicate the end of the input was reached before the 346 /// operation could be completed. 347 #[derive(Clone, Copy, Debug, Eq, PartialEq)] 348 pub struct EndOfInput; 349 350 mod no_panic { 351 use core; 352 353 /// A wrapper around a slice that exposes no functions that can panic. 354 #[derive(Clone, Copy, Debug, Eq, PartialEq)] 355 pub struct Slice<'a> { 356 bytes: &'a [u8], 357 } 358 359 impl<'a> Slice<'a> { 360 #[inline] new(bytes: &'a [u8]) -> Self361 pub const fn new(bytes: &'a [u8]) -> Self { Self { bytes } } 362 363 #[inline] get(&self, i: usize) -> Option<&u8>364 pub fn get(&self, i: usize) -> Option<&u8> { self.bytes.get(i) } 365 366 #[inline] subslice(&self, r: core::ops::Range<usize>) -> Option<Self>367 pub fn subslice(&self, r: core::ops::Range<usize>) -> Option<Self> { 368 self.bytes.get(r).map(|bytes| Self { bytes }) 369 } 370 371 #[inline] is_empty(&self) -> bool372 pub fn is_empty(&self) -> bool { self.bytes.is_empty() } 373 374 #[inline] len(&self) -> usize375 pub fn len(&self) -> usize { self.bytes.len() } 376 377 #[inline] as_slice_less_safe(&self) -> &'a [u8]378 pub fn as_slice_less_safe(&self) -> &'a [u8] { self.bytes } 379 } 380 381 } // mod no_panic 382