1 // Copyright 2015 Nicholas Allegra (comex).
2 // Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or
3 // the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be
4 // copied, modified, or distributed except according to those terms.
5 
6 //! Parse strings like, and escape strings for, POSIX shells.
7 //!
8 //! Same idea as (but implementation not directly based on) the Python shlex module.
9 //!
10 //! Disabling the `std` feature (which is enabled by default) will allow the crate to work in
11 //! `no_std` environments, where the `alloc` crate, and a global allocator, are available.
12 //!
13 //! ## <span style="color:red">Warning</span>
14 //!
15 //! The [`try_quote`]/[`try_join`] family of APIs does not quote control characters (because they
16 //! cannot be quoted portably).
17 //!
18 //! This is fully safe in noninteractive contexts, like shell scripts and `sh -c` arguments (or
19 //! even scripts `source`d from interactive shells).
20 //!
21 //! But if you are quoting for human consumption, you should keep in mind that ugly inputs produce
22 //! ugly outputs (which may not be copy-pastable).
23 //!
24 //! And if by chance you are piping the output of [`try_quote`]/[`try_join`] directly to the stdin
25 //! of an interactive shell, you should stop, because control characters can lead to arbitrary
26 //! command injection.
27 //!
28 //! For more information, and for information about more minor issues, please see [quoting_warning].
29 //!
30 //! ## Compatibility
31 //!
32 //! This crate's quoting functionality tries to be compatible with **any POSIX-compatible shell**;
33 //! it's tested against `bash`, `zsh`, `dash`, Busybox `ash`, and `mksh`, plus `fish` (which is not
34 //! POSIX-compatible but close enough).
35 //!
36 //! It also aims to be compatible with Python `shlex` and C `wordexp`.
37 
38 #![cfg_attr(not(feature = "std"), no_std)]
39 
40 extern crate alloc;
41 use alloc::vec::Vec;
42 use alloc::borrow::Cow;
43 use alloc::string::String;
44 #[cfg(test)]
45 use alloc::vec;
46 #[cfg(test)]
47 use alloc::borrow::ToOwned;
48 
49 pub mod bytes;
50 #[cfg(all(doc, not(doctest)))]
51 #[path = "quoting_warning.md"]
52 pub mod quoting_warning;
53 
54 /// An iterator that takes an input string and splits it into the words using the same syntax as
55 /// the POSIX shell.
56 ///
57 /// See [`bytes::Shlex`].
58 pub struct Shlex<'a>(bytes::Shlex<'a>);
59 
60 impl<'a> Shlex<'a> {
new(in_str: &'a str) -> Self61     pub fn new(in_str: &'a str) -> Self {
62         Self(bytes::Shlex::new(in_str.as_bytes()))
63     }
64 }
65 
66 impl<'a> Iterator for Shlex<'a> {
67     type Item = String;
next(&mut self) -> Option<String>68     fn next(&mut self) -> Option<String> {
69         self.0.next().map(|byte_word| {
70             // Safety: given valid UTF-8, bytes::Shlex will always return valid UTF-8.
71             unsafe { String::from_utf8_unchecked(byte_word) }
72         })
73     }
74 }
75 
76 impl<'a> core::ops::Deref for Shlex<'a> {
77     type Target = bytes::Shlex<'a>;
78 
deref(&self) -> &Self::Target79     fn deref(&self) -> &Self::Target {
80         &self.0
81     }
82 }
83 
84 impl<'a> core::ops::DerefMut for Shlex<'a> {
deref_mut(&mut self) -> &mut Self::Target85     fn deref_mut(&mut self) -> &mut Self::Target {
86         &mut self.0
87     }
88 }
89 
90 /// Convenience function that consumes the whole string at once.  Returns None if the input was
91 /// erroneous.
split(in_str: &str) -> Option<Vec<String>>92 pub fn split(in_str: &str) -> Option<Vec<String>> {
93     let mut shl = Shlex::new(in_str);
94     let res = shl.by_ref().collect();
95     if shl.had_error { None } else { Some(res) }
96 }
97 
98 /// Errors from [`Quoter::quote`], [`Quoter::join`], etc. (and their [`bytes`] counterparts).
99 ///
100 /// By default, the only error that can be returned is [`QuoteError::Nul`].  If you call
101 /// `allow_nul(true)`, then no errors can be returned at all.  Any error variants added in the
102 /// future will not be enabled by default; they will be enabled through corresponding non-default
103 /// [`Quoter`] options.
104 ///
105 /// ...In theory.  In the unlikely event that additional classes of inputs are discovered that,
106 /// like nul bytes, are fundamentally unsafe to quote even for non-interactive shells, the risk
107 /// will be mitigated by adding corresponding [`QuoteError`] variants that *are* enabled by
108 /// default.
109 #[non_exhaustive]
110 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
111 pub enum QuoteError {
112     /// The input contained a nul byte.  In most cases, shells fundamentally [cannot handle strings
113     /// containing nul bytes](quoting_warning#nul-bytes), no matter how they are quoted.  But if
114     /// you're sure you can handle nul bytes, you can call `allow_nul(true)` on the `Quoter` to let
115     /// them pass through.
116     Nul,
117 }
118 
119 impl core::fmt::Display for QuoteError {
fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result120     fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
121         match self {
122             QuoteError::Nul => f.write_str("cannot shell-quote string containing nul byte"),
123         }
124     }
125 }
126 
127 #[cfg(feature = "std")]
128 impl std::error::Error for QuoteError {}
129 
130 /// A more configurable interface to quote strings.  If you only want the default settings you can
131 /// use the convenience functions [`try_quote`] and [`try_join`].
132 ///
133 /// The bytes equivalent is [`bytes::Quoter`].
134 #[derive(Default, Debug, Clone)]
135 pub struct Quoter {
136     inner: bytes::Quoter,
137 }
138 
139 impl Quoter {
140     /// Create a new [`Quoter`] with default settings.
141     #[inline]
new() -> Self142     pub fn new() -> Self {
143         Self::default()
144     }
145 
146     /// Set whether to allow [nul bytes](quoting_warning#nul-bytes).  By default they are not
147     /// allowed and will result in an error of [`QuoteError::Nul`].
148     #[inline]
allow_nul(mut self, allow: bool) -> Self149     pub fn allow_nul(mut self, allow: bool) -> Self {
150         self.inner = self.inner.allow_nul(allow);
151         self
152     }
153 
154     /// Convenience function that consumes an iterable of words and turns it into a single string,
155     /// quoting words when necessary. Consecutive words will be separated by a single space.
join<'a, I: IntoIterator<Item = &'a str>>(&self, words: I) -> Result<String, QuoteError>156     pub fn join<'a, I: IntoIterator<Item = &'a str>>(&self, words: I) -> Result<String, QuoteError> {
157         // Safety: given valid UTF-8, bytes::join() will always return valid UTF-8.
158         self.inner.join(words.into_iter().map(|s| s.as_bytes()))
159             .map(|bytes| unsafe { String::from_utf8_unchecked(bytes) })
160     }
161 
162     /// Given a single word, return a string suitable to encode it as a shell argument.
quote<'a>(&self, in_str: &'a str) -> Result<Cow<'a, str>, QuoteError>163     pub fn quote<'a>(&self, in_str: &'a str) -> Result<Cow<'a, str>, QuoteError> {
164         Ok(match self.inner.quote(in_str.as_bytes())? {
165             Cow::Borrowed(out) => {
166                 // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8.
167                 unsafe { core::str::from_utf8_unchecked(out) }.into()
168             }
169             Cow::Owned(out) => {
170                 // Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8.
171                 unsafe { String::from_utf8_unchecked(out) }.into()
172             }
173         })
174     }
175 }
176 
177 impl From<bytes::Quoter> for Quoter {
from(inner: bytes::Quoter) -> Quoter178     fn from(inner: bytes::Quoter) -> Quoter {
179         Quoter { inner }
180     }
181 }
182 
183 impl From<Quoter> for bytes::Quoter {
from(quoter: Quoter) -> bytes::Quoter184     fn from(quoter: Quoter) -> bytes::Quoter {
185         quoter.inner
186     }
187 }
188 
189 /// Convenience function that consumes an iterable of words and turns it into a single string,
190 /// quoting words when necessary. Consecutive words will be separated by a single space.
191 ///
192 /// Uses default settings except that nul bytes are passed through, which [may be
193 /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated.
194 ///
195 /// Equivalent to [`Quoter::new().allow_nul(true).join(words).unwrap()`](Quoter).
196 ///
197 /// (That configuration never returns `Err`, so this function does not panic.)
198 ///
199 /// The bytes equivalent is [bytes::join].
200 #[deprecated(since = "1.3.0", note = "replace with `try_join(words)?` to avoid nul byte danger")]
join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String201 pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String {
202     Quoter::new().allow_nul(true).join(words).unwrap()
203 }
204 
205 /// Convenience function that consumes an iterable of words and turns it into a single string,
206 /// quoting words when necessary. Consecutive words will be separated by a single space.
207 ///
208 /// Uses default settings.  The only error that can be returned is [`QuoteError::Nul`].
209 ///
210 /// Equivalent to [`Quoter::new().join(words)`](Quoter).
211 ///
212 /// The bytes equivalent is [bytes::try_join].
try_join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> Result<String, QuoteError>213 pub fn try_join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> Result<String, QuoteError> {
214     Quoter::new().join(words)
215 }
216 
217 /// Given a single word, return a string suitable to encode it as a shell argument.
218 ///
219 /// Uses default settings except that nul bytes are passed through, which [may be
220 /// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated.
221 ///
222 /// Equivalent to [`Quoter::new().allow_nul(true).quote(in_str).unwrap()`](Quoter).
223 ///
224 /// (That configuration never returns `Err`, so this function does not panic.)
225 ///
226 /// The bytes equivalent is [bytes::quote].
227 #[deprecated(since = "1.3.0", note = "replace with `try_quote(str)?` to avoid nul byte danger")]
quote(in_str: &str) -> Cow<str>228 pub fn quote(in_str: &str) -> Cow<str> {
229     Quoter::new().allow_nul(true).quote(in_str).unwrap()
230 }
231 
232 /// Given a single word, return a string suitable to encode it as a shell argument.
233 ///
234 /// Uses default settings.  The only error that can be returned is [`QuoteError::Nul`].
235 ///
236 /// Equivalent to [`Quoter::new().quote(in_str)`](Quoter).
237 ///
238 /// (That configuration never returns `Err`, so this function does not panic.)
239 ///
240 /// The bytes equivalent is [bytes::try_quote].
try_quote(in_str: &str) -> Result<Cow<str>, QuoteError>241 pub fn try_quote(in_str: &str) -> Result<Cow<str>, QuoteError> {
242     Quoter::new().quote(in_str)
243 }
244 
245 #[cfg(test)]
246 static SPLIT_TEST_ITEMS: &'static [(&'static str, Option<&'static [&'static str]>)] = &[
247     ("foo$baz", Some(&["foo$baz"])),
248     ("foo baz", Some(&["foo", "baz"])),
249     ("foo\"bar\"baz", Some(&["foobarbaz"])),
250     ("foo \"bar\"baz", Some(&["foo", "barbaz"])),
251     ("   foo \nbar", Some(&["foo", "bar"])),
252     ("foo\\\nbar", Some(&["foobar"])),
253     ("\"foo\\\nbar\"", Some(&["foobar"])),
254     ("'baz\\$b'", Some(&["baz\\$b"])),
255     ("'baz\\\''", None),
256     ("\\", None),
257     ("\"\\", None),
258     ("'\\", None),
259     ("\"", None),
260     ("'", None),
261     ("foo #bar\nbaz", Some(&["foo", "baz"])),
262     ("foo #bar", Some(&["foo"])),
263     ("foo#bar", Some(&["foo#bar"])),
264     ("foo\"#bar", None),
265     ("'\\n'", Some(&["\\n"])),
266     ("'\\\\n'", Some(&["\\\\n"])),
267 ];
268 
269 #[test]
test_split()270 fn test_split() {
271     for &(input, output) in SPLIT_TEST_ITEMS {
272         assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect()));
273     }
274 }
275 
276 #[test]
test_lineno()277 fn test_lineno() {
278     let mut sh = Shlex::new("\nfoo\nbar");
279     while let Some(word) = sh.next() {
280         if word == "bar" {
281             assert_eq!(sh.line_no, 3);
282         }
283     }
284 }
285 
286 #[test]
287 #[cfg_attr(not(feature = "std"), allow(unreachable_code, unused_mut))]
test_quote()288 fn test_quote() {
289     // This is a list of (unquoted, quoted) pairs.
290     // But it's using a single long (raw) string literal with an ad-hoc format, just because it's
291     // hard to read if we have to put the test strings through Rust escaping on top of the escaping
292     // being tested.  (Even raw string literals are noisy for short strings).
293     // Ad-hoc: "NL" is replaced with a literal newline; no other escape sequences.
294     let tests = r#"
295         <>                => <''>
296         <foobar>          => <foobar>
297         <foo bar>         => <'foo bar'>
298         <"foo bar'">      => <"\"foo bar'\"">
299         <'foo bar'>       => <"'foo bar'">
300         <">               => <'"'>
301         <"'>              => <"\"'">
302         <hello!world>     => <'hello!world'>
303         <'hello!world>    => <"'hello"'!world'>
304         <'hello!>         => <"'hello"'!'>
305         <hello ^ world>   => <'hello ''^ world'>
306         <hello^>          => <hello'^'>
307         <!world'>         => <'!world'"'">
308         <{a, b}>          => <'{a, b}'>
309         <NL>              => <'NL'>
310         <^>               => <'^'>
311         <foo^bar>         => <foo'^bar'>
312         <NLx^>            => <'NLx''^'>
313         <NL^x>            => <'NL''^x'>
314         <NL ^x>           => <'NL ''^x'>
315         <{a,b}>           => <'{a,b}'>
316         <a,b>             => <'a,b'>
317         <a..b             => <a..b>
318         <'$>              => <"'"'$'>
319         <"^>              => <'"''^'>
320     "#;
321     let mut ok = true;
322     for test in tests.trim().split('\n') {
323         let parts: Vec<String> = test
324             .replace("NL", "\n")
325             .split("=>")
326             .map(|part| part.trim().trim_start_matches('<').trim_end_matches('>').to_owned())
327             .collect();
328         assert!(parts.len() == 2);
329         let unquoted = &*parts[0];
330         let quoted_expected = &*parts[1];
331         let quoted_actual = try_quote(&parts[0]).unwrap();
332         if quoted_expected != quoted_actual {
333             #[cfg(not(feature = "std"))]
334             panic!("FAIL: for input <{}>, expected <{}>, got <{}>",
335                      unquoted, quoted_expected, quoted_actual);
336             #[cfg(feature = "std")]
337             println!("FAIL: for input <{}>, expected <{}>, got <{}>",
338                      unquoted, quoted_expected, quoted_actual);
339             ok = false;
340         }
341     }
342     assert!(ok);
343 }
344 
345 #[test]
346 #[allow(deprecated)]
test_join()347 fn test_join() {
348     assert_eq!(join(vec![]), "");
349     assert_eq!(join(vec![""]), "''");
350     assert_eq!(join(vec!["a", "b"]), "a b");
351     assert_eq!(join(vec!["foo bar", "baz"]), "'foo bar' baz");
352 }
353 
354 #[test]
test_fallible()355 fn test_fallible() {
356     assert_eq!(try_join(vec!["\0"]), Err(QuoteError::Nul));
357     assert_eq!(try_quote("\0"), Err(QuoteError::Nul));
358 }
359