1 use std::collections::HashMap;
2 use std::str;
3
4 use winnow::prelude::*;
5 use winnow::{
6 ascii::float,
7 ascii::line_ending,
8 combinator::alt,
9 combinator::cut_err,
10 combinator::{delimited, preceded, separated_pair, terminated},
11 combinator::{repeat, separated},
12 error::{AddContext, ParserError},
13 stream::Partial,
14 token::{any, none_of, take, take_while},
15 };
16
17 #[derive(Debug, PartialEq, Clone)]
18 pub enum JsonValue {
19 Null,
20 Boolean(bool),
21 Str(String),
22 Num(f64),
23 Array(Vec<JsonValue>),
24 Object(HashMap<String, JsonValue>),
25 }
26
27 /// Use `Partial` to cause `ErrMode::Incomplete` while parsing
28 pub type Stream<'i> = Partial<&'i str>;
29
ndjson<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<Option<JsonValue>, E>30 pub fn ndjson<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
31 input: &mut Stream<'i>,
32 ) -> PResult<Option<JsonValue>, E> {
33 alt((
34 terminated(delimited(ws, json_value, ws), line_ending).map(Some),
35 line_ending.value(None),
36 ))
37 .parse_next(input)
38 }
39
40 // --Besides `WS`, same as a regular json parser ----------------------------
41
42 /// `alt` is a combinator that tries multiple parsers one by one, until
43 /// one of them succeeds
json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<JsonValue, E>44 fn json_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
45 input: &mut Stream<'i>,
46 ) -> PResult<JsonValue, E> {
47 // `alt` combines the each value parser. It returns the result of the first
48 // successful parser, or an error
49 alt((
50 null.value(JsonValue::Null),
51 boolean.map(JsonValue::Boolean),
52 string.map(JsonValue::Str),
53 float.map(JsonValue::Num),
54 array.map(JsonValue::Array),
55 object.map(JsonValue::Object),
56 ))
57 .parse_next(input)
58 }
59
60 /// `tag(string)` generates a parser that recognizes the argument string.
61 ///
62 /// This also shows returning a sub-slice of the original input
null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>63 fn null<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
64 // This is a parser that returns `"null"` if it sees the string "null", and
65 // an error otherwise
66 "null".parse_next(input)
67 }
68
69 /// We can combine `tag` with other functions, like `value` which returns a given constant value on
70 /// success.
boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E>71 fn boolean<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<bool, E> {
72 // This is a parser that returns `true` if it sees the string "true", and
73 // an error otherwise
74 let parse_true = "true".value(true);
75
76 // This is a parser that returns `false` if it sees the string "false", and
77 // an error otherwise
78 let parse_false = "false".value(false);
79
80 alt((parse_true, parse_false)).parse_next(input)
81 }
82
83 /// This parser gathers all `char`s up into a `String`with a parse to recognize the double quote
84 /// character, before the string (using `preceded`) and after the string (using `terminated`).
string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<String, E>85 fn string<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
86 input: &mut Stream<'i>,
87 ) -> PResult<String, E> {
88 preceded(
89 '\"',
90 // `cut_err` transforms an `ErrMode::Backtrack(e)` to `ErrMode::Cut(e)`, signaling to
91 // combinators like `alt` that they should not try other parsers. We were in the
92 // right branch (since we found the `"` character) but encountered an error when
93 // parsing the string
94 cut_err(terminated(
95 repeat(0.., character).fold(String::new, |mut string, c| {
96 string.push(c);
97 string
98 }),
99 '\"',
100 )),
101 )
102 // `context` lets you add a static string to errors to provide more information in the
103 // error chain (to indicate which parser had an error)
104 .context("string")
105 .parse_next(input)
106 }
107
108 /// You can mix the above declarative parsing with an imperative style to handle more unique cases,
109 /// like escaping
character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>110 fn character<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
111 let c = none_of('"').parse_next(input)?;
112 if c == '\\' {
113 alt((
114 any.verify_map(|c| {
115 Some(match c {
116 '"' | '\\' | '/' => c,
117 'b' => '\x08',
118 'f' => '\x0C',
119 'n' => '\n',
120 'r' => '\r',
121 't' => '\t',
122 _ => return None,
123 })
124 }),
125 preceded('u', unicode_escape),
126 ))
127 .parse_next(input)
128 } else {
129 Ok(c)
130 }
131 }
132
unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E>133 fn unicode_escape<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<char, E> {
134 alt((
135 // Not a surrogate
136 u16_hex
137 .verify(|cp| !(0xD800..0xE000).contains(cp))
138 .map(|cp| cp as u32),
139 // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details
140 separated_pair(u16_hex, "\\u", u16_hex)
141 .verify(|(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low))
142 .map(|(high, low)| {
143 let high_ten = (high as u32) - 0xD800;
144 let low_ten = (low as u32) - 0xDC00;
145 (high_ten << 10) + low_ten + 0x10000
146 }),
147 ))
148 .verify_map(
149 // Could be probably replaced with .unwrap() or _unchecked due to the verify checks
150 std::char::from_u32,
151 )
152 .parse_next(input)
153 }
154
u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E>155 fn u16_hex<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<u16, E> {
156 take(4usize)
157 .verify_map(|s| u16::from_str_radix(s, 16).ok())
158 .parse_next(input)
159 }
160
161 /// Some combinators, like `separated` or `repeat`, will call a parser repeatedly,
162 /// accumulating results in a `Vec`, until it encounters an error.
163 /// If you want more control on the parser application, check out the `iterator`
164 /// combinator (cf `examples/iterator.rs`)
array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<Vec<JsonValue>, E>165 fn array<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
166 input: &mut Stream<'i>,
167 ) -> PResult<Vec<JsonValue>, E> {
168 preceded(
169 ('[', ws),
170 cut_err(terminated(
171 separated(0.., json_value, (ws, ',', ws)),
172 (ws, ']'),
173 )),
174 )
175 .context("array")
176 .parse_next(input)
177 }
178
object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<HashMap<String, JsonValue>, E>179 fn object<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
180 input: &mut Stream<'i>,
181 ) -> PResult<HashMap<String, JsonValue>, E> {
182 preceded(
183 ('{', ws),
184 cut_err(terminated(
185 separated(0.., key_value, (ws, ',', ws)),
186 (ws, '}'),
187 )),
188 )
189 .context("object")
190 .parse_next(input)
191 }
192
key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>( input: &mut Stream<'i>, ) -> PResult<(String, JsonValue), E>193 fn key_value<'i, E: ParserError<Stream<'i>> + AddContext<Stream<'i>, &'static str>>(
194 input: &mut Stream<'i>,
195 ) -> PResult<(String, JsonValue), E> {
196 separated_pair(string, cut_err((ws, ':', ws)), json_value).parse_next(input)
197 }
198
199 /// Parser combinators are constructed from the bottom up:
200 /// first we write parsers for the smallest elements (here a space character),
201 /// then we'll combine them in larger parsers
ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E>202 fn ws<'i, E: ParserError<Stream<'i>>>(input: &mut Stream<'i>) -> PResult<&'i str, E> {
203 // Combinators like `take_while` return a function. That function is the
204 // parser,to which we can pass the input
205 take_while(0.., WS).parse_next(input)
206 }
207
208 const WS: &[char] = &[' ', '\t'];
209
210 #[cfg(test)]
211 mod test {
212 #[allow(clippy::useless_attribute)]
213 #[allow(dead_code)] // its dead for benches
214 use super::*;
215
216 #[allow(clippy::useless_attribute)]
217 #[allow(dead_code)] // its dead for benches
218 type Error<'i> = winnow::error::InputError<Partial<&'i str>>;
219
220 #[test]
json_string()221 fn json_string() {
222 assert_eq!(
223 string::<Error<'_>>.parse_peek(Partial::new("\"\"")),
224 Ok((Partial::new(""), "".to_string()))
225 );
226 assert_eq!(
227 string::<Error<'_>>.parse_peek(Partial::new("\"abc\"")),
228 Ok((Partial::new(""), "abc".to_string()))
229 );
230 assert_eq!(
231 string::<Error<'_>>.parse_peek(Partial::new(
232 "\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""
233 )),
234 Ok((
235 Partial::new(""),
236 "abc\"\\/\x08\x0C\n\r\t\x01——def".to_string()
237 )),
238 );
239 assert_eq!(
240 string::<Error<'_>>.parse_peek(Partial::new("\"\\uD83D\\uDE10\"")),
241 Ok((Partial::new(""), "��".to_string()))
242 );
243
244 assert!(string::<Error<'_>>.parse_peek(Partial::new("\"")).is_err());
245 assert!(string::<Error<'_>>
246 .parse_peek(Partial::new("\"abc"))
247 .is_err());
248 assert!(string::<Error<'_>>
249 .parse_peek(Partial::new("\"\\\""))
250 .is_err());
251 assert!(string::<Error<'_>>
252 .parse_peek(Partial::new("\"\\u123\""))
253 .is_err());
254 assert!(string::<Error<'_>>
255 .parse_peek(Partial::new("\"\\uD800\""))
256 .is_err());
257 assert!(string::<Error<'_>>
258 .parse_peek(Partial::new("\"\\uD800\\uD800\""))
259 .is_err());
260 assert!(string::<Error<'_>>
261 .parse_peek(Partial::new("\"\\uDC00\""))
262 .is_err());
263 }
264
265 #[test]
json_object()266 fn json_object() {
267 use JsonValue::{Num, Object, Str};
268
269 let input = r#"{"a":42,"b":"x"}
270 "#;
271
272 let expected = Object(
273 vec![
274 ("a".to_string(), Num(42.0)),
275 ("b".to_string(), Str("x".to_string())),
276 ]
277 .into_iter()
278 .collect(),
279 );
280
281 assert_eq!(
282 ndjson::<Error<'_>>.parse_peek(Partial::new(input)),
283 Ok((Partial::new(""), Some(expected)))
284 );
285 }
286
287 #[test]
json_array()288 fn json_array() {
289 use JsonValue::{Array, Num, Str};
290
291 let input = r#"[42,"x"]
292 "#;
293
294 let expected = Array(vec![Num(42.0), Str("x".to_string())]);
295
296 assert_eq!(
297 ndjson::<Error<'_>>.parse_peek(Partial::new(input)),
298 Ok((Partial::new(""), Some(expected)))
299 );
300 }
301
302 #[test]
json_whitespace()303 fn json_whitespace() {
304 use JsonValue::{Array, Boolean, Null, Num, Object, Str};
305
306 let input = r#" { "null" : null, "true" :true , "false": false , "number" : 123e4 , "string" : " abc 123 " , "array" : [ false , 1 , "two" ] , "object" : { "a" : 1.0 , "b" : "c" } , "empty_array" : [ ] , "empty_object" : { } }
307 "#;
308
309 assert_eq!(
310 ndjson::<Error<'_>>.parse_peek(Partial::new(input)),
311 Ok((
312 Partial::new(""),
313 Some(Object(
314 vec![
315 ("null".to_string(), Null),
316 ("true".to_string(), Boolean(true)),
317 ("false".to_string(), Boolean(false)),
318 ("number".to_string(), Num(123e4)),
319 ("string".to_string(), Str(" abc 123 ".to_string())),
320 (
321 "array".to_string(),
322 Array(vec![Boolean(false), Num(1.0), Str("two".to_string())])
323 ),
324 (
325 "object".to_string(),
326 Object(
327 vec![
328 ("a".to_string(), Num(1.0)),
329 ("b".to_string(), Str("c".to_string())),
330 ]
331 .into_iter()
332 .collect()
333 )
334 ),
335 ("empty_array".to_string(), Array(vec![]),),
336 ("empty_object".to_string(), Object(HashMap::new()),),
337 ]
338 .into_iter()
339 .collect()
340 ))
341 ))
342 );
343 }
344 }
345