1 use std::collections::HashMap;
2 use std::str;
3 
4 use winnow::prelude::*;
5 use winnow::{
6     ascii::float,
7     combinator::alt,
8     combinator::{cut_err, rest},
9     combinator::{delimited, preceded, separated_pair, terminated},
10     combinator::{repeat, separated},
11     error::{AddContext, ParserError},
12     stream::Partial,
13     token::{any, none_of, take, take_while},
14 };
15 
16 use crate::json::JsonValue;
17 
18 pub type Stream<'i> = Partial<&'i str>;
19 
20 /// The root element of a JSON parser is any value
21 ///
22 /// A parser has the following signature:
23 /// `&mut Stream -> PResult<Output, InputError>`, with `PResult` defined as:
24 /// `type PResult<O, E = ErrorKind> = Result<O, ErrMode<E>>;`
25 ///
26 /// most of the times you can ignore the error type and use the default (but this
27 /// examples shows custom error types later on!)
28 ///
29 /// Here we use `&str` as input type, but parsers can be generic over
30 /// the input type, work directly with `&[u8]`, or any other type that
31 /// implements the required traits.
json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>32 pub fn json<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
33     input: &mut Stream<'i>,
34 ) -> PResult<JsonValue, E> {
35     delimited(ws, json_value, ws_or_eof).parse_next(input)
36 }
37 
38 /// `alt` is a combinator that tries multiple parsers one by one, until
39 /// one of them succeeds
json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>40 fn json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
41     input: &mut Stream<'i>,
42 ) -> PResult<JsonValue, E> {
43     // `alt` combines the each value parser. It returns the result of the first
44     // successful parser, or an error
45     alt((
46         null.value(JsonValue::Null),
47         boolean.map(JsonValue::Boolean),
48         string.map(JsonValue::Str),
49         float.map(JsonValue::Num),
50         array.map(JsonValue::Array),
51         object.map(JsonValue::Object),
52     ))
53     .parse_next(input)
54 }
55 
56 /// `tag(string)` generates a parser that recognizes the argument string.
57 ///
58 /// This also shows returning a sub-slice of the original input
null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>59 fn null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
60     // This is a parser that returns `"null"` if it sees the string "null", and
61     // an error otherwise
62     "null".parse_next(input)
63 }
64 
65 /// We can combine `tag` with other functions, like `value` which returns a given constant value on
66 /// success.
boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E>67 fn boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> {
68     // This is a parser that returns `true` if it sees the string "true", and
69     // an error otherwise
70     let parse_true = "true".value(true);
71 
72     // This is a parser that returns `false` if it sees the string "false", and
73     // an error otherwise
74     let parse_false = "false".value(false);
75 
76     alt((parse_true, parse_false)).parse_next(input)
77 }
78 
79 /// This parser gathers all `char`s up into a `String`with a parse to recognize the double quote
80 /// character, before the string (using `preceded`) and after the string (using `terminated`).
string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<String, E>81 fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
82     input: &mut Stream<'i>,
83 ) -> PResult<String, E> {
84     preceded(
85         '\"',
86         // `cut_err` transforms an `ErrMode::Backtrack(e)` to `ErrMode::Cut(e)`, signaling to
87         // combinators like  `alt` that they should not try other parsers. We were in the
88         // right branch (since we found the `"` character) but encountered an error when
89         // parsing the string
90         cut_err(terminated(
91             repeat(0.., character).fold(String::new, |mut string, c| {
92                 string.push(c);
93                 string
94             }),
95             '\"',
96         )),
97     )
98     // `context` lets you add a static string to errors to provide more information in the
99     // error chain (to indicate which parser had an error)
100     .context("string")
101     .parse_next(input)
102 }
103 
104 /// You can mix the above declarative parsing with an imperative style to handle more unique cases,
105 /// like escaping
character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>106 fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
107     let c = none_of('\"').parse_next(input)?;
108     if c == '\\' {
109         alt((
110             any.verify_map(|c| {
111                 Some(match c {
112                     '"' | '\\' | '/' => c,
113                     'b' => '\x08',
114                     'f' => '\x0C',
115                     'n' => '\n',
116                     'r' => '\r',
117                     't' => '\t',
118                     _ => return None,
119                 })
120             }),
121             preceded('u', unicode_escape),
122         ))
123         .parse_next(input)
124     } else {
125         Ok(c)
126     }
127 }
128 
unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>129 fn unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
130     alt((
131         // Not a surrogate
132         u16_hex
133             .verify(|cp| !(0xD800..0xE000).contains(cp))
134             .map(|cp| cp as u32),
135         // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details
136         separated_pair(u16_hex, "\\u", u16_hex)
137             .verify(|(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low))
138             .map(|(high, low)| {
139                 let high_ten = (high as u32) - 0xD800;
140                 let low_ten = (low as u32) - 0xDC00;
141                 (high_ten << 10) + low_ten + 0x10000
142             }),
143     ))
144     .verify_map(
145         // Could be probably replaced with .unwrap() or _unchecked due to the verify checks
146         std::char::from_u32,
147     )
148     .parse_next(input)
149 }
150 
u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E>151 fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> {
152     take(4usize)
153         .verify_map(|s| u16::from_str_radix(s, 16).ok())
154         .parse_next(input)
155 }
156 
157 /// Some combinators, like `separated` or `repeat`, will call a parser repeatedly,
158 /// accumulating results in a `Vec`, until it encounters an error.
159 /// If you want more control on the parser application, check out the `iterator`
160 /// combinator (cf `examples/iterator.rs`)
array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<Vec<JsonValue>, E>161 fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
162     input: &mut Stream<'i>,
163 ) -> PResult<Vec<JsonValue>, E> {
164     preceded(
165         ('[', ws),
166         cut_err(terminated(
167             separated(0.., json_value, (ws, ',', ws)),
168             (ws, ']'),
169         )),
170     )
171     .context("array")
172     .parse_next(input)
173 }
174 
object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<HashMap<String, JsonValue>, E>175 fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
176     input: &mut Stream<'i>,
177 ) -> PResult<HashMap<String, JsonValue>, E> {
178     preceded(
179         ('{', ws),
180         cut_err(terminated(
181             separated(0.., key_value, (ws, ',', ws)),
182             (ws, '}'),
183         )),
184     )
185     .context("object")
186     .parse_next(input)
187 }
188 
key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<(String, JsonValue), E>189 fn key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
190     input: &mut Stream<'i>,
191 ) -> PResult<(String, JsonValue), E> {
192     separated_pair(string, cut_err((ws, ':', ws)), json_value).parse_next(input)
193 }
194 
195 /// Parser combinators are constructed from the bottom up:
196 /// first we write parsers for the smallest elements (here a space character),
197 /// then we'll combine them in larger parsers
ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>198 fn ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
199     // Combinators like `take_while` return a function. That function is the
200     // parser,to which we can pass the input
201     take_while(0.., WS).parse_next(input)
202 }
203 
ws_or_eof<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>204 fn ws_or_eof<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
205     rest.verify(|s: &str| s.chars().all(|c| WS.contains(&c)))
206         .parse_next(input)
207 }
208 
209 const WS: &[char] = &[' ', '\t', '\r', '\n'];
210 
211 #[cfg(test)]
212 mod test {
213     #[allow(clippy::useless_attribute)]
214     #[allow(dead_code)] // its dead for benches
215     use super::*;
216 
217     #[allow(clippy::useless_attribute)]
218     #[allow(dead_code)] // its dead for benches
219     type Error<'i> = winnow::error::InputError<Partial<&'i str>>;
220 
221     #[test]
json_string()222     fn json_string() {
223         assert_eq!(
224             string::<Error<'_>>.parse_peek(Partial::new("\"\"")),
225             Ok((Partial::new(""), "".to_string()))
226         );
227         assert_eq!(
228             string::<Error<'_>>.parse_peek(Partial::new("\"abc\"")),
229             Ok((Partial::new(""), "abc".to_string()))
230         );
231         assert_eq!(
232             string::<Error<'_>>.parse_peek(Partial::new(
233                 "\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""
234             )),
235             Ok((
236                 Partial::new(""),
237                 "abc\"\\/\x08\x0C\n\r\t\x01——def".to_string()
238             )),
239         );
240         assert_eq!(
241             string::<Error<'_>>.parse_peek(Partial::new("\"\\uD83D\\uDE10\"")),
242             Ok((Partial::new(""), "��".to_string()))
243         );
244 
245         assert!(string::<Error<'_>>.parse_peek(Partial::new("\"")).is_err());
246         assert!(string::<Error<'_>>
247             .parse_peek(Partial::new("\"abc"))
248             .is_err());
249         assert!(string::<Error<'_>>
250             .parse_peek(Partial::new("\"\\\""))
251             .is_err());
252         assert!(string::<Error<'_>>
253             .parse_peek(Partial::new("\"\\u123\""))
254             .is_err());
255         assert!(string::<Error<'_>>
256             .parse_peek(Partial::new("\"\\uD800\""))
257             .is_err());
258         assert!(string::<Error<'_>>
259             .parse_peek(Partial::new("\"\\uD800\\uD800\""))
260             .is_err());
261         assert!(string::<Error<'_>>
262             .parse_peek(Partial::new("\"\\uDC00\""))
263             .is_err());
264     }
265 
266     #[test]
json_object()267     fn json_object() {
268         use JsonValue::{Num, Object, Str};
269 
270         let input = r#"{"a":42,"b":"x"}"#;
271 
272         let expected = Object(
273             vec![
274                 ("a".to_string(), Num(42.0)),
275                 ("b".to_string(), Str("x".to_string())),
276             ]
277             .into_iter()
278             .collect(),
279         );
280 
281         assert_eq!(
282             json::<Error<'_>>.parse_peek(Partial::new(input)),
283             Ok((Partial::new(""), expected))
284         );
285     }
286 
287     #[test]
json_array()288     fn json_array() {
289         use JsonValue::{Array, Num, Str};
290 
291         let input = r#"[42,"x"]"#;
292 
293         let expected = Array(vec![Num(42.0), Str("x".to_string())]);
294 
295         assert_eq!(
296             json::<Error<'_>>.parse_peek(Partial::new(input)),
297             Ok((Partial::new(""), expected))
298         );
299     }
300 
301     #[test]
json_whitespace()302     fn json_whitespace() {
303         use JsonValue::{Array, Boolean, Null, Num, Object, Str};
304 
305         let input = r#"
306   {
307     "null" : null,
308     "true"  :true ,
309     "false":  false  ,
310     "number" : 123e4 ,
311     "string" : " abc 123 " ,
312     "array" : [ false , 1 , "two" ] ,
313     "object" : { "a" : 1.0 , "b" : "c" } ,
314     "empty_array" : [  ] ,
315     "empty_object" : {   }
316   }
317   "#;
318 
319         assert_eq!(
320             json::<Error<'_>>.parse_peek(Partial::new(input)),
321             Ok((
322                 Partial::new(""),
323                 Object(
324                     vec![
325                         ("null".to_string(), Null),
326                         ("true".to_string(), Boolean(true)),
327                         ("false".to_string(), Boolean(false)),
328                         ("number".to_string(), Num(123e4)),
329                         ("string".to_string(), Str(" abc 123 ".to_string())),
330                         (
331                             "array".to_string(),
332                             Array(vec![Boolean(false), Num(1.0), Str("two".to_string())])
333                         ),
334                         (
335                             "object".to_string(),
336                             Object(
337                                 vec![
338                                     ("a".to_string(), Num(1.0)),
339                                     ("b".to_string(), Str("c".to_string())),
340                                 ]
341                                 .into_iter()
342                                 .collect()
343                             )
344                         ),
345                         ("empty_array".to_string(), Array(vec![]),),
346                         ("empty_object".to_string(), Object(HashMap::new()),),
347                     ]
348                     .into_iter()
349                     .collect()
350                 )
351             ))
352         );
353     }
354 }
355