1 #include "Python.h"
2 #include "structmember.h"
3 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
4 #define Py_TYPE(ob)     (((PyObject*)(ob))->ob_type)
5 #endif
6 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
7 typedef int Py_ssize_t;
8 #define PY_SSIZE_T_MAX INT_MAX
9 #define PY_SSIZE_T_MIN INT_MIN
10 #define PyInt_FromSsize_t PyInt_FromLong
11 #define PyInt_AsSsize_t PyInt_AsLong
12 #endif
13 #ifndef Py_IS_FINITE
14 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
15 #endif
16 
17 #ifdef __GNUC__
18 #define UNUSED __attribute__((__unused__))
19 #else
20 #define UNUSED
21 #endif
22 
23 #define DEFAULT_ENCODING "utf-8"
24 
25 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
26 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
27 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
28 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
29 
30 static PyTypeObject PyScannerType;
31 static PyTypeObject PyEncoderType;
32 
33 typedef struct _PyScannerObject {
34     PyObject_HEAD
35     PyObject *encoding;
36     PyObject *strict;
37     PyObject *object_hook;
38     PyObject *pairs_hook;
39     PyObject *parse_float;
40     PyObject *parse_int;
41     PyObject *parse_constant;
42 } PyScannerObject;
43 
44 static PyMemberDef scanner_members[] = {
45     {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
46     {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
47     {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
48     {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
49     {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
50     {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
51     {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
52     {NULL}
53 };
54 
55 typedef struct _PyEncoderObject {
56     PyObject_HEAD
57     PyObject *markers;
58     PyObject *defaultfn;
59     PyObject *encoder;
60     PyObject *indent;
61     PyObject *key_separator;
62     PyObject *item_separator;
63     PyObject *sort_keys;
64     PyObject *skipkeys;
65     int fast_encode;
66     int allow_nan;
67 } PyEncoderObject;
68 
69 static PyMemberDef encoder_members[] = {
70     {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
71     {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
72     {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
73     {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
74     {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
75     {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
76     {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
77     {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
78     {NULL}
79 };
80 
81 static Py_ssize_t
82 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
83 static PyObject *
84 ascii_escape_unicode(PyObject *pystr);
85 static PyObject *
86 ascii_escape_str(PyObject *pystr);
87 static PyObject *
88 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
89 void init_json(void);
90 static PyObject *
91 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
92 static PyObject *
93 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
94 static PyObject *
95 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
96 static PyObject *
97 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
98 static void
99 scanner_dealloc(PyObject *self);
100 static int
101 scanner_clear(PyObject *self);
102 static PyObject *
103 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
104 static void
105 encoder_dealloc(PyObject *self);
106 static int
107 encoder_clear(PyObject *self);
108 static int
109 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
110 static int
111 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
112 static int
113 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
114 static PyObject *
115 _encoded_const(PyObject *obj);
116 static void
117 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
118 static PyObject *
119 encoder_encode_string(PyEncoderObject *s, PyObject *obj);
120 static int
121 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
122 static PyObject *
123 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
124 static PyObject *
125 encoder_encode_float(PyEncoderObject *s, PyObject *obj);
126 
127 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
128 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
129 
130 #define MIN_EXPANSION 6
131 #ifdef Py_UNICODE_WIDE
132 #define MAX_EXPANSION (2 * MIN_EXPANSION)
133 #else
134 #define MAX_EXPANSION MIN_EXPANSION
135 #endif
136 
137 static int
_convertPyInt_AsSsize_t(PyObject * o,Py_ssize_t * size_ptr)138 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
139 {
140     /* PyObject to Py_ssize_t converter */
141     *size_ptr = PyInt_AsSsize_t(o);
142     if (*size_ptr == -1 && PyErr_Occurred())
143         return 0;
144     return 1;
145 }
146 
147 static PyObject *
_convertPyInt_FromSsize_t(Py_ssize_t * size_ptr)148 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
149 {
150     /* Py_ssize_t to PyObject converter */
151     return PyInt_FromSsize_t(*size_ptr);
152 }
153 
154 static Py_ssize_t
ascii_escape_char(Py_UNICODE c,char * output,Py_ssize_t chars)155 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
156 {
157     /* Escape unicode code point c to ASCII escape sequences
158     in char *output. output must have at least 12 bytes unused to
159     accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
160     output[chars++] = '\\';
161     switch (c) {
162         case '\\': output[chars++] = (char)c; break;
163         case '"': output[chars++] = (char)c; break;
164         case '\b': output[chars++] = 'b'; break;
165         case '\f': output[chars++] = 'f'; break;
166         case '\n': output[chars++] = 'n'; break;
167         case '\r': output[chars++] = 'r'; break;
168         case '\t': output[chars++] = 't'; break;
169         default:
170 #ifdef Py_UNICODE_WIDE
171             if (c >= 0x10000) {
172                 /* UTF-16 surrogate pair */
173                 Py_UNICODE v = c - 0x10000;
174                 c = 0xd800 | ((v >> 10) & 0x3ff);
175                 output[chars++] = 'u';
176                 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
177                 output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
178                 output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
179                 output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
180                 c = 0xdc00 | (v & 0x3ff);
181                 output[chars++] = '\\';
182             }
183 #endif
184             output[chars++] = 'u';
185             output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
186             output[chars++] = "0123456789abcdef"[(c >>  8) & 0xf];
187             output[chars++] = "0123456789abcdef"[(c >>  4) & 0xf];
188             output[chars++] = "0123456789abcdef"[(c      ) & 0xf];
189     }
190     return chars;
191 }
192 
193 static PyObject *
ascii_escape_unicode(PyObject * pystr)194 ascii_escape_unicode(PyObject *pystr)
195 {
196     /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
197     Py_ssize_t i;
198     Py_ssize_t input_chars;
199     Py_ssize_t output_size;
200     Py_ssize_t max_output_size;
201     Py_ssize_t chars;
202     Py_ssize_t incr;
203     PyObject *rval;
204     char *output;
205     Py_UNICODE *input_unicode;
206 
207     input_chars = PyUnicode_GET_SIZE(pystr);
208     input_unicode = PyUnicode_AS_UNICODE(pystr);
209 
210     output_size = input_chars;
211     incr = 2; /* for quotes */
212     /* One char input can be up to 6 chars output, estimate 4 of these */
213     incr += MIN_EXPANSION * 4;
214     if (PY_SSIZE_T_MAX - incr < output_size) {
215         PyErr_NoMemory();
216         return NULL;
217     }
218     output_size += incr;
219     if (PY_SSIZE_T_MAX / MAX_EXPANSION < input_chars ||
220         PY_SSIZE_T_MAX - 2 < input_chars * MAX_EXPANSION)
221         max_output_size = PY_SSIZE_T_MAX;
222     else
223         max_output_size = 2 + (input_chars * MAX_EXPANSION);
224     rval = PyString_FromStringAndSize(NULL, output_size);
225     if (rval == NULL) {
226         return NULL;
227     }
228     output = PyString_AS_STRING(rval);
229     chars = 0;
230     output[chars++] = '"';
231     for (i = 0; i < input_chars; i++) {
232         Py_UNICODE c = input_unicode[i];
233         if (S_CHAR(c)) {
234             output[chars++] = (char)c;
235         }
236         else {
237             chars = ascii_escape_char(c, output, chars);
238         }
239         if (output_size - chars < (1 + MAX_EXPANSION)) {
240             if (output_size == PY_SSIZE_T_MAX) {
241                 Py_DECREF(rval);
242                 PyErr_NoMemory();
243                 return NULL;
244             }
245             /* There's more than four, so let's resize by a lot */
246             if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
247                 output_size *= 2;
248             else
249                 output_size = max_output_size;
250             if (_PyString_Resize(&rval, output_size) == -1) {
251                 return NULL;
252             }
253             output = PyString_AS_STRING(rval);
254         }
255     }
256     output[chars++] = '"';
257     if (_PyString_Resize(&rval, chars) == -1) {
258         return NULL;
259     }
260     return rval;
261 }
262 
263 static PyObject *
ascii_escape_str(PyObject * pystr)264 ascii_escape_str(PyObject *pystr)
265 {
266     /* Take a PyString pystr and return a new ASCII-only escaped PyString */
267     Py_ssize_t i;
268     Py_ssize_t input_chars;
269     Py_ssize_t output_size;
270     Py_ssize_t max_output_size;
271     Py_ssize_t chars;
272     Py_ssize_t incr;
273     PyObject *rval;
274     char *output;
275     char *input_str;
276 
277     input_chars = PyString_GET_SIZE(pystr);
278     input_str = PyString_AS_STRING(pystr);
279 
280     /* Fast path for a string that's already ASCII */
281     for (i = 0; i < input_chars; i++) {
282         Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
283         if (!S_CHAR(c)) {
284             /* If we have to escape something, scan the string for unicode */
285             Py_ssize_t j;
286             for (j = i; j < input_chars; j++) {
287                 c = (Py_UNICODE)(unsigned char)input_str[j];
288                 if (c > 0x7f) {
289                     /* We hit a non-ASCII character, bail to unicode mode */
290                     PyObject *uni;
291                     uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
292                     if (uni == NULL) {
293                         return NULL;
294                     }
295                     rval = ascii_escape_unicode(uni);
296                     Py_DECREF(uni);
297                     return rval;
298                 }
299             }
300             break;
301         }
302     }
303 
304     output_size = input_chars;
305     incr = 2; /* for quotes */
306     if (i != input_chars) {
307         /* One char input can be up to 6 chars output, estimate 4 of these */
308         incr += MIN_EXPANSION * 4;
309     }
310     if (PY_SSIZE_T_MAX - incr < output_size) {
311         PyErr_NoMemory();
312         return NULL;
313     }
314     output_size += incr;
315     if (PY_SSIZE_T_MAX / MIN_EXPANSION < input_chars ||
316         PY_SSIZE_T_MAX - 2 < input_chars * MIN_EXPANSION)
317         max_output_size = PY_SSIZE_T_MAX;
318     else
319         max_output_size = 2 + (input_chars * MIN_EXPANSION);
320     rval = PyString_FromStringAndSize(NULL, output_size);
321     if (rval == NULL) {
322         return NULL;
323     }
324     output = PyString_AS_STRING(rval);
325     output[0] = '"';
326 
327     /* We know that everything up to i is ASCII already */
328     chars = i + 1;
329     memcpy(&output[1], input_str, i);
330 
331     for (; i < input_chars; i++) {
332         Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
333         if (S_CHAR(c)) {
334             output[chars++] = (char)c;
335         }
336         else {
337             chars = ascii_escape_char(c, output, chars);
338         }
339         /* An ASCII char can't possibly expand to a surrogate! */
340         if (output_size - chars < (1 + MIN_EXPANSION)) {
341             if (output_size == PY_SSIZE_T_MAX) {
342                 Py_DECREF(rval);
343                 PyErr_NoMemory();
344                 return NULL;
345             }
346             /* There's more than four, so let's resize by a lot */
347             if (PY_SSIZE_T_MAX / 2 >= output_size && output_size * 2 < max_output_size)
348                 output_size *= 2;
349             else
350                 output_size = max_output_size;
351             if (_PyString_Resize(&rval, output_size) == -1) {
352                 return NULL;
353             }
354             output = PyString_AS_STRING(rval);
355         }
356     }
357     output[chars++] = '"';
358     if (_PyString_Resize(&rval, chars) == -1) {
359         return NULL;
360     }
361     return rval;
362 }
363 
364 static void
raise_errmsg(char * msg,PyObject * s,Py_ssize_t end)365 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
366 {
367     /* Use the Python function json.decoder.errmsg to raise a nice
368     looking ValueError exception */
369     static PyObject *errmsg_fn = NULL;
370     PyObject *pymsg;
371     if (errmsg_fn == NULL) {
372         PyObject *decoder = PyImport_ImportModule("json.decoder");
373         if (decoder == NULL)
374             return;
375         errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
376         Py_DECREF(decoder);
377         if (errmsg_fn == NULL)
378             return;
379     }
380     pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
381     if (pymsg) {
382         PyErr_SetObject(PyExc_ValueError, pymsg);
383         Py_DECREF(pymsg);
384     }
385 }
386 
387 static PyObject *
join_list_unicode(PyObject * lst)388 join_list_unicode(PyObject *lst)
389 {
390     /* return u''.join(lst) */
391     static PyObject *joinfn = NULL;
392     if (joinfn == NULL) {
393         PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
394         if (ustr == NULL)
395             return NULL;
396 
397         joinfn = PyObject_GetAttrString(ustr, "join");
398         Py_DECREF(ustr);
399         if (joinfn == NULL)
400             return NULL;
401     }
402     return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
403 }
404 
405 static PyObject *
_build_rval_index_tuple(PyObject * rval,Py_ssize_t idx)406 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
407     /* return (rval, idx) tuple, stealing reference to rval */
408     PyObject *tpl;
409     PyObject *pyidx;
410     /*
411     steal a reference to rval, returns (rval, idx)
412     */
413     if (rval == NULL) {
414         return NULL;
415     }
416     pyidx = PyInt_FromSsize_t(idx);
417     if (pyidx == NULL) {
418         Py_DECREF(rval);
419         return NULL;
420     }
421     tpl = PyTuple_New(2);
422     if (tpl == NULL) {
423         Py_DECREF(pyidx);
424         Py_DECREF(rval);
425         return NULL;
426     }
427     PyTuple_SET_ITEM(tpl, 0, rval);
428     PyTuple_SET_ITEM(tpl, 1, pyidx);
429     return tpl;
430 }
431 
432 static PyObject *
scanstring_str(PyObject * pystr,Py_ssize_t end,char * encoding,int strict,Py_ssize_t * next_end_ptr)433 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
434 {
435     /* Read the JSON string from PyString pystr.
436     end is the index of the first character after the quote.
437     encoding is the encoding of pystr (must be an ASCII superset)
438     if strict is zero then literal control characters are allowed
439     *next_end_ptr is a return-by-reference index of the character
440         after the end quote
441 
442     Return value is a new PyString (if ASCII-only) or PyUnicode
443     */
444     PyObject *rval;
445     Py_ssize_t len = PyString_GET_SIZE(pystr);
446     Py_ssize_t begin = end - 1;
447     Py_ssize_t next;
448     char *buf = PyString_AS_STRING(pystr);
449     PyObject *chunks = PyList_New(0);
450     if (chunks == NULL) {
451         goto bail;
452     }
453     if (end < 0 || len <= end) {
454         PyErr_SetString(PyExc_ValueError, "end is out of bounds");
455         goto bail;
456     }
457     while (1) {
458         /* Find the end of the string or the next escape */
459         Py_UNICODE c = 0;
460         PyObject *chunk = NULL;
461         for (next = end; next < len; next++) {
462             c = (unsigned char)buf[next];
463             if (c == '"' || c == '\\') {
464                 break;
465             }
466             else if (strict && c <= 0x1f) {
467                 raise_errmsg("Invalid control character at", pystr, next);
468                 goto bail;
469             }
470         }
471         if (!(c == '"' || c == '\\')) {
472             raise_errmsg("Unterminated string starting at", pystr, begin);
473             goto bail;
474         }
475         /* Pick up this chunk if it's not zero length */
476         if (next != end) {
477             PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end);
478             if (strchunk == NULL) {
479                 goto bail;
480             }
481             chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
482             Py_DECREF(strchunk);
483             if (chunk == NULL) {
484                 goto bail;
485             }
486             if (PyList_Append(chunks, chunk)) {
487                 Py_DECREF(chunk);
488                 goto bail;
489             }
490             Py_DECREF(chunk);
491         }
492         next++;
493         if (c == '"') {
494             end = next;
495             break;
496         }
497         if (next == len) {
498             raise_errmsg("Unterminated string starting at", pystr, begin);
499             goto bail;
500         }
501         c = buf[next];
502         if (c != 'u') {
503             /* Non-unicode backslash escapes */
504             end = next + 1;
505             switch (c) {
506                 case '"': break;
507                 case '\\': break;
508                 case '/': break;
509                 case 'b': c = '\b'; break;
510                 case 'f': c = '\f'; break;
511                 case 'n': c = '\n'; break;
512                 case 'r': c = '\r'; break;
513                 case 't': c = '\t'; break;
514                 default: c = 0;
515             }
516             if (c == 0) {
517                 raise_errmsg("Invalid \\escape", pystr, end - 2);
518                 goto bail;
519             }
520         }
521         else {
522             c = 0;
523             next++;
524             end = next + 4;
525             if (end >= len) {
526                 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
527                 goto bail;
528             }
529             /* Decode 4 hex digits */
530             for (; next < end; next++) {
531                 Py_UNICODE digit = buf[next];
532                 c <<= 4;
533                 switch (digit) {
534                     case '0': case '1': case '2': case '3': case '4':
535                     case '5': case '6': case '7': case '8': case '9':
536                         c |= (digit - '0'); break;
537                     case 'a': case 'b': case 'c': case 'd': case 'e':
538                     case 'f':
539                         c |= (digit - 'a' + 10); break;
540                     case 'A': case 'B': case 'C': case 'D': case 'E':
541                     case 'F':
542                         c |= (digit - 'A' + 10); break;
543                     default:
544                         raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
545                         goto bail;
546                 }
547             }
548 #ifdef Py_UNICODE_WIDE
549             /* Surrogate pair */
550             if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
551                 buf[next++] == '\\' &&
552                 buf[next++] == 'u') {
553                 Py_UNICODE c2 = 0;
554                 end += 6;
555                 /* Decode 4 hex digits */
556                 for (; next < end; next++) {
557                     Py_UNICODE digit = buf[next];
558                     c2 <<= 4;
559                     switch (digit) {
560                         case '0': case '1': case '2': case '3': case '4':
561                         case '5': case '6': case '7': case '8': case '9':
562                             c2 |= (digit - '0'); break;
563                         case 'a': case 'b': case 'c': case 'd': case 'e':
564                         case 'f':
565                             c2 |= (digit - 'a' + 10); break;
566                         case 'A': case 'B': case 'C': case 'D': case 'E':
567                         case 'F':
568                             c2 |= (digit - 'A' + 10); break;
569                         default:
570                             raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
571                             goto bail;
572                     }
573                 }
574                 if ((c2 & 0xfc00) == 0xdc00)
575                     c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
576                 else
577                     end -= 6;
578             }
579 #endif
580         }
581         chunk = PyUnicode_FromUnicode(&c, 1);
582         if (chunk == NULL) {
583             goto bail;
584         }
585         if (PyList_Append(chunks, chunk)) {
586             Py_DECREF(chunk);
587             goto bail;
588         }
589         Py_DECREF(chunk);
590     }
591 
592     rval = join_list_unicode(chunks);
593     if (rval == NULL) {
594         goto bail;
595     }
596     Py_CLEAR(chunks);
597     *next_end_ptr = end;
598     return rval;
599 bail:
600     *next_end_ptr = -1;
601     Py_XDECREF(chunks);
602     return NULL;
603 }
604 
605 
606 static PyObject *
scanstring_unicode(PyObject * pystr,Py_ssize_t end,int strict,Py_ssize_t * next_end_ptr)607 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
608 {
609     /* Read the JSON string from PyUnicode pystr.
610     end is the index of the first character after the quote.
611     if strict is zero then literal control characters are allowed
612     *next_end_ptr is a return-by-reference index of the character
613         after the end quote
614 
615     Return value is a new PyUnicode
616     */
617     PyObject *rval;
618     Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
619     Py_ssize_t begin = end - 1;
620     Py_ssize_t next;
621     const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
622     PyObject *chunks = PyList_New(0);
623     if (chunks == NULL) {
624         goto bail;
625     }
626     if (end < 0 || len <= end) {
627         PyErr_SetString(PyExc_ValueError, "end is out of bounds");
628         goto bail;
629     }
630     while (1) {
631         /* Find the end of the string or the next escape */
632         Py_UNICODE c = 0;
633         PyObject *chunk = NULL;
634         for (next = end; next < len; next++) {
635             c = buf[next];
636             if (c == '"' || c == '\\') {
637                 break;
638             }
639             else if (strict && c <= 0x1f) {
640                 raise_errmsg("Invalid control character at", pystr, next);
641                 goto bail;
642             }
643         }
644         if (!(c == '"' || c == '\\')) {
645             raise_errmsg("Unterminated string starting at", pystr, begin);
646             goto bail;
647         }
648         /* Pick up this chunk if it's not zero length */
649         if (next != end) {
650             chunk = PyUnicode_FromUnicode(&buf[end], next - end);
651             if (chunk == NULL) {
652                 goto bail;
653             }
654             if (PyList_Append(chunks, chunk)) {
655                 Py_DECREF(chunk);
656                 goto bail;
657             }
658             Py_DECREF(chunk);
659         }
660         next++;
661         if (c == '"') {
662             end = next;
663             break;
664         }
665         if (next == len) {
666             raise_errmsg("Unterminated string starting at", pystr, begin);
667             goto bail;
668         }
669         c = buf[next];
670         if (c != 'u') {
671             /* Non-unicode backslash escapes */
672             end = next + 1;
673             switch (c) {
674                 case '"': break;
675                 case '\\': break;
676                 case '/': break;
677                 case 'b': c = '\b'; break;
678                 case 'f': c = '\f'; break;
679                 case 'n': c = '\n'; break;
680                 case 'r': c = '\r'; break;
681                 case 't': c = '\t'; break;
682                 default: c = 0;
683             }
684             if (c == 0) {
685                 raise_errmsg("Invalid \\escape", pystr, end - 2);
686                 goto bail;
687             }
688         }
689         else {
690             c = 0;
691             next++;
692             end = next + 4;
693             if (end >= len) {
694                 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
695                 goto bail;
696             }
697             /* Decode 4 hex digits */
698             for (; next < end; next++) {
699                 Py_UNICODE digit = buf[next];
700                 c <<= 4;
701                 switch (digit) {
702                     case '0': case '1': case '2': case '3': case '4':
703                     case '5': case '6': case '7': case '8': case '9':
704                         c |= (digit - '0'); break;
705                     case 'a': case 'b': case 'c': case 'd': case 'e':
706                     case 'f':
707                         c |= (digit - 'a' + 10); break;
708                     case 'A': case 'B': case 'C': case 'D': case 'E':
709                     case 'F':
710                         c |= (digit - 'A' + 10); break;
711                     default:
712                         raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
713                         goto bail;
714                 }
715             }
716 #ifdef Py_UNICODE_WIDE
717             /* Surrogate pair */
718             if ((c & 0xfc00) == 0xd800 && end + 6 < len &&
719                 buf[next++] == '\\' && buf[next++] == 'u') {
720                 Py_UNICODE c2 = 0;
721                 end += 6;
722                 /* Decode 4 hex digits */
723                 for (; next < end; next++) {
724                     Py_UNICODE digit = buf[next];
725                     c2 <<= 4;
726                     switch (digit) {
727                         case '0': case '1': case '2': case '3': case '4':
728                         case '5': case '6': case '7': case '8': case '9':
729                             c2 |= (digit - '0'); break;
730                         case 'a': case 'b': case 'c': case 'd': case 'e':
731                         case 'f':
732                             c2 |= (digit - 'a' + 10); break;
733                         case 'A': case 'B': case 'C': case 'D': case 'E':
734                         case 'F':
735                             c2 |= (digit - 'A' + 10); break;
736                         default:
737                             raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
738                             goto bail;
739                     }
740                 }
741                 if ((c2 & 0xfc00) == 0xdc00)
742                     c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
743                 else
744                     end -= 6;
745             }
746 #endif
747         }
748         chunk = PyUnicode_FromUnicode(&c, 1);
749         if (chunk == NULL) {
750             goto bail;
751         }
752         if (PyList_Append(chunks, chunk)) {
753             Py_DECREF(chunk);
754             goto bail;
755         }
756         Py_DECREF(chunk);
757     }
758 
759     rval = join_list_unicode(chunks);
760     if (rval == NULL) {
761         goto bail;
762     }
763     Py_DECREF(chunks);
764     *next_end_ptr = end;
765     return rval;
766 bail:
767     *next_end_ptr = -1;
768     Py_XDECREF(chunks);
769     return NULL;
770 }
771 
772 PyDoc_STRVAR(pydoc_scanstring,
773     "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
774     "\n"
775     "Scan the string s for a JSON string. End is the index of the\n"
776     "character in s after the quote that started the JSON string.\n"
777     "Unescapes all valid JSON string escape sequences and raises ValueError\n"
778     "on attempt to decode an invalid string. If strict is False then literal\n"
779     "control characters are allowed in the string.\n"
780     "\n"
781     "Returns a tuple of the decoded string and the index of the character in s\n"
782     "after the end quote."
783 );
784 
785 static PyObject *
py_scanstring(PyObject * self UNUSED,PyObject * args)786 py_scanstring(PyObject* self UNUSED, PyObject *args)
787 {
788     PyObject *pystr;
789     PyObject *rval;
790     Py_ssize_t end;
791     Py_ssize_t next_end = -1;
792     char *encoding = NULL;
793     int strict = 1;
794     if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
795         return NULL;
796     }
797     if (encoding == NULL) {
798         encoding = DEFAULT_ENCODING;
799     }
800     if (PyString_Check(pystr)) {
801         rval = scanstring_str(pystr, end, encoding, strict, &next_end);
802     }
803     else if (PyUnicode_Check(pystr)) {
804         rval = scanstring_unicode(pystr, end, strict, &next_end);
805     }
806     else {
807         PyErr_Format(PyExc_TypeError,
808                      "first argument must be a string, not %.80s",
809                      Py_TYPE(pystr)->tp_name);
810         return NULL;
811     }
812     return _build_rval_index_tuple(rval, next_end);
813 }
814 
815 PyDoc_STRVAR(pydoc_encode_basestring_ascii,
816     "encode_basestring_ascii(basestring) -> str\n"
817     "\n"
818     "Return an ASCII-only JSON representation of a Python string"
819 );
820 
821 static PyObject *
py_encode_basestring_ascii(PyObject * self UNUSED,PyObject * pystr)822 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
823 {
824     /* Return an ASCII-only JSON representation of a Python string */
825     /* METH_O */
826     if (PyString_Check(pystr)) {
827         return ascii_escape_str(pystr);
828     }
829     else if (PyUnicode_Check(pystr)) {
830         return ascii_escape_unicode(pystr);
831     }
832     else {
833         PyErr_Format(PyExc_TypeError,
834                      "first argument must be a string, not %.80s",
835                      Py_TYPE(pystr)->tp_name);
836         return NULL;
837     }
838 }
839 
840 static void
scanner_dealloc(PyObject * self)841 scanner_dealloc(PyObject *self)
842 {
843     /* bpo-31095: UnTrack is needed before calling any callbacks */
844     PyObject_GC_UnTrack(self);
845     scanner_clear(self);
846     Py_TYPE(self)->tp_free(self);
847 }
848 
849 static int
scanner_traverse(PyObject * self,visitproc visit,void * arg)850 scanner_traverse(PyObject *self, visitproc visit, void *arg)
851 {
852     PyScannerObject *s;
853     assert(PyScanner_Check(self));
854     s = (PyScannerObject *)self;
855     Py_VISIT(s->encoding);
856     Py_VISIT(s->strict);
857     Py_VISIT(s->object_hook);
858     Py_VISIT(s->pairs_hook);
859     Py_VISIT(s->parse_float);
860     Py_VISIT(s->parse_int);
861     Py_VISIT(s->parse_constant);
862     return 0;
863 }
864 
865 static int
scanner_clear(PyObject * self)866 scanner_clear(PyObject *self)
867 {
868     PyScannerObject *s;
869     assert(PyScanner_Check(self));
870     s = (PyScannerObject *)self;
871     Py_CLEAR(s->encoding);
872     Py_CLEAR(s->strict);
873     Py_CLEAR(s->object_hook);
874     Py_CLEAR(s->pairs_hook);
875     Py_CLEAR(s->parse_float);
876     Py_CLEAR(s->parse_int);
877     Py_CLEAR(s->parse_constant);
878     return 0;
879 }
880 
881 static PyObject *
_parse_object_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)882 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
883     /* Read a JSON object from PyString pystr.
884     idx is the index of the first character after the opening curly brace.
885     *next_idx_ptr is a return-by-reference index to the first character after
886         the closing curly brace.
887 
888     Returns a new PyObject (usually a dict, but object_hook can change that)
889     */
890     char *str = PyString_AS_STRING(pystr);
891     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
892     PyObject *rval;
893     PyObject *pairs;
894     PyObject *item;
895     PyObject *key = NULL;
896     PyObject *val = NULL;
897     char *encoding = PyString_AS_STRING(s->encoding);
898     int strict = PyObject_IsTrue(s->strict);
899     Py_ssize_t next_idx;
900 
901     if (strict < 0)
902         return NULL;
903 
904     pairs = PyList_New(0);
905     if (pairs == NULL)
906         return NULL;
907 
908     /* skip whitespace after { */
909     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
910 
911     /* only loop if the object is non-empty */
912     if (idx <= end_idx && str[idx] != '}') {
913         while (idx <= end_idx) {
914             /* read key */
915             if (str[idx] != '"') {
916                 raise_errmsg("Expecting property name", pystr, idx);
917                 goto bail;
918             }
919             key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
920             if (key == NULL)
921                 goto bail;
922             idx = next_idx;
923 
924             /* skip whitespace between key and : delimiter, read :, skip whitespace */
925             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
926             if (idx > end_idx || str[idx] != ':') {
927                 raise_errmsg("Expecting : delimiter", pystr, idx);
928                 goto bail;
929             }
930             idx++;
931             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
932 
933             /* read any JSON data type */
934             val = scan_once_str(s, pystr, idx, &next_idx);
935             if (val == NULL)
936                 goto bail;
937 
938             item = PyTuple_Pack(2, key, val);
939             if (item == NULL)
940                 goto bail;
941             Py_CLEAR(key);
942             Py_CLEAR(val);
943             if (PyList_Append(pairs, item) == -1) {
944                 Py_DECREF(item);
945                 goto bail;
946             }
947             Py_DECREF(item);
948             idx = next_idx;
949 
950             /* skip whitespace before } or , */
951             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
952 
953             /* bail if the object is closed or we didn't get the , delimiter */
954             if (idx > end_idx) break;
955             if (str[idx] == '}') {
956                 break;
957             }
958             else if (str[idx] != ',') {
959                 raise_errmsg("Expecting , delimiter", pystr, idx);
960                 goto bail;
961             }
962             idx++;
963 
964             /* skip whitespace after , delimiter */
965             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
966         }
967     }
968     /* verify that idx < end_idx, str[idx] should be '}' */
969     if (idx > end_idx || str[idx] != '}') {
970         raise_errmsg("Expecting object", pystr, end_idx);
971         goto bail;
972     }
973 
974     /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
975     if (s->pairs_hook != Py_None) {
976         val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
977         if (val == NULL)
978             goto bail;
979         Py_DECREF(pairs);
980         *next_idx_ptr = idx + 1;
981         return val;
982     }
983 
984     rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
985                                          pairs, NULL);
986     if (rval == NULL)
987         goto bail;
988     Py_CLEAR(pairs);
989 
990     /* if object_hook is not None: rval = object_hook(rval) */
991     if (s->object_hook != Py_None) {
992         val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
993         if (val == NULL)
994             goto bail;
995         Py_DECREF(rval);
996         rval = val;
997         val = NULL;
998     }
999     *next_idx_ptr = idx + 1;
1000     return rval;
1001 bail:
1002     Py_XDECREF(key);
1003     Py_XDECREF(val);
1004     Py_XDECREF(pairs);
1005     return NULL;
1006 }
1007 
1008 static PyObject *
_parse_object_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1009 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1010     /* Read a JSON object from PyUnicode pystr.
1011     idx is the index of the first character after the opening curly brace.
1012     *next_idx_ptr is a return-by-reference index to the first character after
1013         the closing curly brace.
1014 
1015     Returns a new PyObject (usually a dict, but object_hook can change that)
1016     */
1017     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1018     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1019     PyObject *rval;
1020     PyObject *pairs;
1021     PyObject *item;
1022     PyObject *key = NULL;
1023     PyObject *val = NULL;
1024     int strict = PyObject_IsTrue(s->strict);
1025     Py_ssize_t next_idx;
1026 
1027     if (strict < 0)
1028         return NULL;
1029 
1030     pairs = PyList_New(0);
1031     if (pairs == NULL)
1032         return NULL;
1033 
1034     /* skip whitespace after { */
1035     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1036 
1037     /* only loop if the object is non-empty */
1038     if (idx <= end_idx && str[idx] != '}') {
1039         while (idx <= end_idx) {
1040             /* read key */
1041             if (str[idx] != '"') {
1042                 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
1043                 goto bail;
1044             }
1045             key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1046             if (key == NULL)
1047                 goto bail;
1048             idx = next_idx;
1049 
1050             /* skip whitespace between key and : delimiter, read :, skip whitespace */
1051             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1052             if (idx > end_idx || str[idx] != ':') {
1053                 raise_errmsg("Expecting ':' delimiter", pystr, idx);
1054                 goto bail;
1055             }
1056             idx++;
1057             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1058 
1059             /* read any JSON term */
1060             val = scan_once_unicode(s, pystr, idx, &next_idx);
1061             if (val == NULL)
1062                 goto bail;
1063 
1064             item = PyTuple_Pack(2, key, val);
1065             if (item == NULL)
1066                 goto bail;
1067             Py_CLEAR(key);
1068             Py_CLEAR(val);
1069             if (PyList_Append(pairs, item) == -1) {
1070                 Py_DECREF(item);
1071                 goto bail;
1072             }
1073             Py_DECREF(item);
1074             idx = next_idx;
1075 
1076             /* skip whitespace before } or , */
1077             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1078 
1079             /* bail if the object is closed or we didn't get the , delimiter */
1080             if (idx > end_idx) break;
1081             if (str[idx] == '}') {
1082                 break;
1083             }
1084             else if (str[idx] != ',') {
1085                 raise_errmsg("Expecting ',' delimiter", pystr, idx);
1086                 goto bail;
1087             }
1088             idx++;
1089 
1090             /* skip whitespace after , delimiter */
1091             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1092         }
1093     }
1094 
1095     /* verify that idx < end_idx, str[idx] should be '}' */
1096     if (idx > end_idx || str[idx] != '}') {
1097         raise_errmsg("Expecting object", pystr, end_idx);
1098         goto bail;
1099     }
1100 
1101     /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1102     if (s->pairs_hook != Py_None) {
1103         val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1104         if (val == NULL)
1105             goto bail;
1106         Py_DECREF(pairs);
1107         *next_idx_ptr = idx + 1;
1108         return val;
1109     }
1110 
1111     rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type),
1112                                          pairs, NULL);
1113     if (rval == NULL)
1114         goto bail;
1115     Py_CLEAR(pairs);
1116 
1117     /* if object_hook is not None: rval = object_hook(rval) */
1118     if (s->object_hook != Py_None) {
1119         val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1120         if (val == NULL)
1121             goto bail;
1122         Py_DECREF(rval);
1123         rval = val;
1124         val = NULL;
1125     }
1126     *next_idx_ptr = idx + 1;
1127     return rval;
1128 bail:
1129     Py_XDECREF(key);
1130     Py_XDECREF(val);
1131     Py_XDECREF(pairs);
1132     return NULL;
1133 }
1134 
1135 static PyObject *
_parse_array_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1136 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1137     /* Read a JSON array from PyString pystr.
1138     idx is the index of the first character after the opening brace.
1139     *next_idx_ptr is a return-by-reference index to the first character after
1140         the closing brace.
1141 
1142     Returns a new PyList
1143     */
1144     char *str = PyString_AS_STRING(pystr);
1145     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1146     PyObject *val = NULL;
1147     PyObject *rval = PyList_New(0);
1148     Py_ssize_t next_idx;
1149     if (rval == NULL)
1150         return NULL;
1151 
1152     /* skip whitespace after [ */
1153     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1154 
1155     /* only loop if the array is non-empty */
1156     if (idx <= end_idx && str[idx] != ']') {
1157         while (idx <= end_idx) {
1158 
1159             /* read any JSON term and de-tuplefy the (rval, idx) */
1160             val = scan_once_str(s, pystr, idx, &next_idx);
1161             if (val == NULL)
1162                 goto bail;
1163 
1164             if (PyList_Append(rval, val) == -1)
1165                 goto bail;
1166 
1167             Py_CLEAR(val);
1168             idx = next_idx;
1169 
1170             /* skip whitespace between term and , */
1171             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1172 
1173             /* bail if the array is closed or we didn't get the , delimiter */
1174             if (idx > end_idx) break;
1175             if (str[idx] == ']') {
1176                 break;
1177             }
1178             else if (str[idx] != ',') {
1179                 raise_errmsg("Expecting , delimiter", pystr, idx);
1180                 goto bail;
1181             }
1182             idx++;
1183 
1184             /* skip whitespace after , */
1185             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1186         }
1187     }
1188 
1189     /* verify that idx < end_idx, str[idx] should be ']' */
1190     if (idx > end_idx || str[idx] != ']') {
1191         raise_errmsg("Expecting object", pystr, end_idx);
1192         goto bail;
1193     }
1194     *next_idx_ptr = idx + 1;
1195     return rval;
1196 bail:
1197     Py_XDECREF(val);
1198     Py_DECREF(rval);
1199     return NULL;
1200 }
1201 
1202 static PyObject *
_parse_array_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1203 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1204     /* Read a JSON array from PyString pystr.
1205     idx is the index of the first character after the opening brace.
1206     *next_idx_ptr is a return-by-reference index to the first character after
1207         the closing brace.
1208 
1209     Returns a new PyList
1210     */
1211     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1212     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1213     PyObject *val = NULL;
1214     PyObject *rval = PyList_New(0);
1215     Py_ssize_t next_idx;
1216     if (rval == NULL)
1217         return NULL;
1218 
1219     /* skip whitespace after [ */
1220     while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1221 
1222     /* only loop if the array is non-empty */
1223     if (idx <= end_idx && str[idx] != ']') {
1224         while (idx <= end_idx) {
1225 
1226             /* read any JSON term  */
1227             val = scan_once_unicode(s, pystr, idx, &next_idx);
1228             if (val == NULL)
1229                 goto bail;
1230 
1231             if (PyList_Append(rval, val) == -1)
1232                 goto bail;
1233 
1234             Py_CLEAR(val);
1235             idx = next_idx;
1236 
1237             /* skip whitespace between term and , */
1238             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1239 
1240             /* bail if the array is closed or we didn't get the , delimiter */
1241             if (idx > end_idx) break;
1242             if (str[idx] == ']') {
1243                 break;
1244             }
1245             else if (str[idx] != ',') {
1246                 raise_errmsg("Expecting ',' delimiter", pystr, idx);
1247                 goto bail;
1248             }
1249             idx++;
1250 
1251             /* skip whitespace after , */
1252             while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1253         }
1254     }
1255 
1256     /* verify that idx < end_idx, str[idx] should be ']' */
1257     if (idx > end_idx || str[idx] != ']') {
1258         raise_errmsg("Expecting object", pystr, end_idx);
1259         goto bail;
1260     }
1261     *next_idx_ptr = idx + 1;
1262     return rval;
1263 bail:
1264     Py_XDECREF(val);
1265     Py_DECREF(rval);
1266     return NULL;
1267 }
1268 
1269 static PyObject *
_parse_constant(PyScannerObject * s,char * constant,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1270 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1271     /* Read a JSON constant from PyString pystr.
1272     constant is the constant string that was found
1273         ("NaN", "Infinity", "-Infinity").
1274     idx is the index of the first character of the constant
1275     *next_idx_ptr is a return-by-reference index to the first character after
1276         the constant.
1277 
1278     Returns the result of parse_constant
1279     */
1280     PyObject *cstr;
1281     PyObject *rval;
1282     /* constant is "NaN", "Infinity", or "-Infinity" */
1283     cstr = PyString_InternFromString(constant);
1284     if (cstr == NULL)
1285         return NULL;
1286 
1287     /* rval = parse_constant(constant) */
1288     rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1289     idx += PyString_GET_SIZE(cstr);
1290     Py_DECREF(cstr);
1291     *next_idx_ptr = idx;
1292     return rval;
1293 }
1294 
1295 static PyObject *
_match_number_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)1296 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1297     /* Read a JSON number from PyString pystr.
1298     idx is the index of the first character of the number
1299     *next_idx_ptr is a return-by-reference index to the first character after
1300         the number.
1301 
1302     Returns a new PyObject representation of that number:
1303         PyInt, PyLong, or PyFloat.
1304         May return other types if parse_int or parse_float are set
1305     */
1306     char *str = PyString_AS_STRING(pystr);
1307     Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1308     Py_ssize_t idx = start;
1309     int is_float = 0;
1310     PyObject *rval;
1311     PyObject *numstr;
1312 
1313     /* read a sign if it's there, make sure it's not the end of the string */
1314     if (str[idx] == '-') {
1315         idx++;
1316         if (idx > end_idx) {
1317             PyErr_SetNone(PyExc_StopIteration);
1318             return NULL;
1319         }
1320     }
1321 
1322     /* read as many integer digits as we find as long as it doesn't start with 0 */
1323     if (str[idx] >= '1' && str[idx] <= '9') {
1324         idx++;
1325         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1326     }
1327     /* if it starts with 0 we only expect one integer digit */
1328     else if (str[idx] == '0') {
1329         idx++;
1330     }
1331     /* no integer digits, error */
1332     else {
1333         PyErr_SetNone(PyExc_StopIteration);
1334         return NULL;
1335     }
1336 
1337     /* if the next char is '.' followed by a digit then read all float digits */
1338     if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1339         is_float = 1;
1340         idx += 2;
1341         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1342     }
1343 
1344     /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1345     if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1346 
1347         /* save the index of the 'e' or 'E' just in case we need to backtrack */
1348         Py_ssize_t e_start = idx;
1349         idx++;
1350 
1351         /* read an exponent sign if present */
1352         if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1353 
1354         /* read all digits */
1355         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1356 
1357         /* if we got a digit, then parse as float. if not, backtrack */
1358         if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1359             is_float = 1;
1360         }
1361         else {
1362             idx = e_start;
1363         }
1364     }
1365 
1366     /* copy the section we determined to be a number */
1367     numstr = PyString_FromStringAndSize(&str[start], idx - start);
1368     if (numstr == NULL)
1369         return NULL;
1370     if (is_float) {
1371         /* parse as a float using a fast path if available, otherwise call user defined method */
1372         if (s->parse_float != (PyObject *)&PyFloat_Type) {
1373             rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1374         }
1375         else {
1376             double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1377                                              NULL, NULL);
1378             if (d == -1.0 && PyErr_Occurred())
1379                 return NULL;
1380             rval = PyFloat_FromDouble(d);
1381         }
1382     }
1383     else {
1384         /* parse as an int using a fast path if available, otherwise call user defined method */
1385         if (s->parse_int != (PyObject *)&PyInt_Type) {
1386             rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1387         }
1388         else {
1389             rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1390         }
1391     }
1392     Py_DECREF(numstr);
1393     *next_idx_ptr = idx;
1394     return rval;
1395 }
1396 
1397 static PyObject *
_match_number_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t start,Py_ssize_t * next_idx_ptr)1398 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1399     /* Read a JSON number from PyUnicode pystr.
1400     idx is the index of the first character of the number
1401     *next_idx_ptr is a return-by-reference index to the first character after
1402         the number.
1403 
1404     Returns a new PyObject representation of that number:
1405         PyInt, PyLong, or PyFloat.
1406         May return other types if parse_int or parse_float are set
1407     */
1408     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1409     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1410     Py_ssize_t idx = start;
1411     int is_float = 0;
1412     PyObject *rval;
1413     PyObject *numstr;
1414 
1415     /* read a sign if it's there, make sure it's not the end of the string */
1416     if (str[idx] == '-') {
1417         idx++;
1418         if (idx > end_idx) {
1419             PyErr_SetNone(PyExc_StopIteration);
1420             return NULL;
1421         }
1422     }
1423 
1424     /* read as many integer digits as we find as long as it doesn't start with 0 */
1425     if (str[idx] >= '1' && str[idx] <= '9') {
1426         idx++;
1427         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1428     }
1429     /* if it starts with 0 we only expect one integer digit */
1430     else if (str[idx] == '0') {
1431         idx++;
1432     }
1433     /* no integer digits, error */
1434     else {
1435         PyErr_SetNone(PyExc_StopIteration);
1436         return NULL;
1437     }
1438 
1439     /* if the next char is '.' followed by a digit then read all float digits */
1440     if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1441         is_float = 1;
1442         idx += 2;
1443         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1444     }
1445 
1446     /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1447     if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1448         Py_ssize_t e_start = idx;
1449         idx++;
1450 
1451         /* read an exponent sign if present */
1452         if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1453 
1454         /* read all digits */
1455         while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1456 
1457         /* if we got a digit, then parse as float. if not, backtrack */
1458         if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1459             is_float = 1;
1460         }
1461         else {
1462             idx = e_start;
1463         }
1464     }
1465 
1466     /* copy the section we determined to be a number */
1467     numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1468     if (numstr == NULL)
1469         return NULL;
1470     if (is_float) {
1471         /* parse as a float using a fast path if available, otherwise call user defined method */
1472         if (s->parse_float != (PyObject *)&PyFloat_Type) {
1473             rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1474         }
1475         else {
1476             rval = PyFloat_FromString(numstr, NULL);
1477         }
1478     }
1479     else {
1480         /* no fast path for unicode -> int, just call */
1481         rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1482     }
1483     Py_DECREF(numstr);
1484     *next_idx_ptr = idx;
1485     return rval;
1486 }
1487 
1488 static PyObject *
scan_once_str(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1489 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1490 {
1491     /* Read one JSON term (of any kind) from PyString pystr.
1492     idx is the index of the first character of the term
1493     *next_idx_ptr is a return-by-reference index to the first character after
1494         the number.
1495 
1496     Returns a new PyObject representation of the term.
1497     */
1498     PyObject *res;
1499     int strict;
1500     char *str = PyString_AS_STRING(pystr);
1501     Py_ssize_t length = PyString_GET_SIZE(pystr);
1502     if (idx < 0) {
1503         PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1504         return NULL;
1505     }
1506     if (idx >= length) {
1507         PyErr_SetNone(PyExc_StopIteration);
1508         return NULL;
1509     }
1510     switch (str[idx]) {
1511         case '"':
1512             /* string */
1513             strict = PyObject_IsTrue(s->strict);
1514             if (strict < 0)
1515                 return NULL;
1516             return scanstring_str(pystr, idx + 1,
1517                 PyString_AS_STRING(s->encoding), strict, next_idx_ptr);
1518         case '{':
1519             /* object */
1520             if (Py_EnterRecursiveCall(" while decoding a JSON object "
1521                                       "from a byte string"))
1522                 return NULL;
1523             res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1524             Py_LeaveRecursiveCall();
1525             return res;
1526         case '[':
1527             /* array */
1528             if (Py_EnterRecursiveCall(" while decoding a JSON array "
1529                                       "from a byte string"))
1530                 return NULL;
1531             res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1532             Py_LeaveRecursiveCall();
1533             return res;
1534         case 'n':
1535             /* null */
1536             if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1537                 Py_INCREF(Py_None);
1538                 *next_idx_ptr = idx + 4;
1539                 return Py_None;
1540             }
1541             break;
1542         case 't':
1543             /* true */
1544             if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1545                 Py_INCREF(Py_True);
1546                 *next_idx_ptr = idx + 4;
1547                 return Py_True;
1548             }
1549             break;
1550         case 'f':
1551             /* false */
1552             if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1553                 Py_INCREF(Py_False);
1554                 *next_idx_ptr = idx + 5;
1555                 return Py_False;
1556             }
1557             break;
1558         case 'N':
1559             /* NaN */
1560             if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1561                 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1562             }
1563             break;
1564         case 'I':
1565             /* Infinity */
1566             if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1567                 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1568             }
1569             break;
1570         case '-':
1571             /* -Infinity */
1572             if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1573                 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1574             }
1575             break;
1576     }
1577     /* Didn't find a string, object, array, or named constant. Look for a number. */
1578     return _match_number_str(s, pystr, idx, next_idx_ptr);
1579 }
1580 
1581 static PyObject *
scan_once_unicode(PyScannerObject * s,PyObject * pystr,Py_ssize_t idx,Py_ssize_t * next_idx_ptr)1582 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1583 {
1584     /* Read one JSON term (of any kind) from PyUnicode pystr.
1585     idx is the index of the first character of the term
1586     *next_idx_ptr is a return-by-reference index to the first character after
1587         the number.
1588 
1589     Returns a new PyObject representation of the term.
1590     */
1591     PyObject *res;
1592     int strict;
1593     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1594     Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1595     if (idx < 0) {
1596         PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1597         return NULL;
1598     }
1599     if (idx >= length) {
1600         PyErr_SetNone(PyExc_StopIteration);
1601         return NULL;
1602     }
1603     switch (str[idx]) {
1604         case '"':
1605             /* string */
1606             strict = PyObject_IsTrue(s->strict);
1607             if (strict < 0)
1608                 return NULL;
1609             return scanstring_unicode(pystr, idx + 1, strict, next_idx_ptr);
1610         case '{':
1611             /* object */
1612             if (Py_EnterRecursiveCall(" while decoding a JSON object "
1613                                       "from a unicode string"))
1614                 return NULL;
1615             res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1616             Py_LeaveRecursiveCall();
1617             return res;
1618         case '[':
1619             /* array */
1620             if (Py_EnterRecursiveCall(" while decoding a JSON array "
1621                                       "from a unicode string"))
1622                 return NULL;
1623             res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1624             Py_LeaveRecursiveCall();
1625             return res;
1626         case 'n':
1627             /* null */
1628             if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1629                 Py_INCREF(Py_None);
1630                 *next_idx_ptr = idx + 4;
1631                 return Py_None;
1632             }
1633             break;
1634         case 't':
1635             /* true */
1636             if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1637                 Py_INCREF(Py_True);
1638                 *next_idx_ptr = idx + 4;
1639                 return Py_True;
1640             }
1641             break;
1642         case 'f':
1643             /* false */
1644             if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1645                 Py_INCREF(Py_False);
1646                 *next_idx_ptr = idx + 5;
1647                 return Py_False;
1648             }
1649             break;
1650         case 'N':
1651             /* NaN */
1652             if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1653                 return _parse_constant(s, "NaN", idx, next_idx_ptr);
1654             }
1655             break;
1656         case 'I':
1657             /* Infinity */
1658             if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1659                 return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1660             }
1661             break;
1662         case '-':
1663             /* -Infinity */
1664             if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1665                 return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1666             }
1667             break;
1668     }
1669     /* Didn't find a string, object, array, or named constant. Look for a number. */
1670     return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1671 }
1672 
1673 static PyObject *
scanner_call(PyObject * self,PyObject * args,PyObject * kwds)1674 scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1675 {
1676     /* Python callable interface to scan_once_{str,unicode} */
1677     PyObject *pystr;
1678     PyObject *rval;
1679     Py_ssize_t idx;
1680     Py_ssize_t next_idx = -1;
1681     static char *kwlist[] = {"string", "idx", NULL};
1682     PyScannerObject *s;
1683     assert(PyScanner_Check(self));
1684     s = (PyScannerObject *)self;
1685     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1686         return NULL;
1687 
1688     if (PyString_Check(pystr)) {
1689         rval = scan_once_str(s, pystr, idx, &next_idx);
1690     }
1691     else if (PyUnicode_Check(pystr)) {
1692         rval = scan_once_unicode(s, pystr, idx, &next_idx);
1693     }
1694     else {
1695         PyErr_Format(PyExc_TypeError,
1696                  "first argument must be a string, not %.80s",
1697                  Py_TYPE(pystr)->tp_name);
1698         return NULL;
1699     }
1700     return _build_rval_index_tuple(rval, next_idx);
1701 }
1702 
1703 static PyObject *
scanner_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1704 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1705 {
1706     PyScannerObject *s;
1707     PyObject *ctx;
1708     static char *kwlist[] = {"context", NULL};
1709 
1710     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1711         return NULL;
1712 
1713     s = (PyScannerObject *)type->tp_alloc(type, 0);
1714     if (s == NULL)
1715         return NULL;
1716 
1717     /* PyString_AS_STRING is used on encoding */
1718     s->encoding = PyObject_GetAttrString(ctx, "encoding");
1719     if (s->encoding == NULL)
1720         goto bail;
1721     if (s->encoding == Py_None) {
1722         Py_DECREF(Py_None);
1723         s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1724     }
1725     else if (PyUnicode_Check(s->encoding)) {
1726         PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1727         Py_SETREF(s->encoding, tmp);
1728     }
1729     if (s->encoding == NULL)
1730         goto bail;
1731     if (!PyString_Check(s->encoding)) {
1732 	PyErr_Format(PyExc_TypeError,
1733 		     "encoding must be a string, not %.80s",
1734 		     Py_TYPE(s->encoding)->tp_name);
1735 	goto bail;
1736     }
1737 
1738 
1739     /* All of these will fail "gracefully" so we don't need to verify them */
1740     s->strict = PyObject_GetAttrString(ctx, "strict");
1741     if (s->strict == NULL)
1742         goto bail;
1743     s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1744     if (s->object_hook == NULL)
1745         goto bail;
1746     s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1747     if (s->pairs_hook == NULL)
1748         goto bail;
1749     s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1750     if (s->parse_float == NULL)
1751         goto bail;
1752     s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1753     if (s->parse_int == NULL)
1754         goto bail;
1755     s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1756     if (s->parse_constant == NULL)
1757         goto bail;
1758 
1759     return (PyObject *)s;
1760 
1761 bail:
1762     Py_DECREF(s);
1763     return NULL;
1764 }
1765 
1766 PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1767 
1768 static
1769 PyTypeObject PyScannerType = {
1770     PyVarObject_HEAD_INIT(NULL, 0)
1771     "_json.Scanner",       /* tp_name */
1772     sizeof(PyScannerObject), /* tp_basicsize */
1773     0,                    /* tp_itemsize */
1774     scanner_dealloc, /* tp_dealloc */
1775     0,                    /* tp_print */
1776     0,                    /* tp_getattr */
1777     0,                    /* tp_setattr */
1778     0,                    /* tp_compare */
1779     0,                    /* tp_repr */
1780     0,                    /* tp_as_number */
1781     0,                    /* tp_as_sequence */
1782     0,                    /* tp_as_mapping */
1783     0,                    /* tp_hash */
1784     scanner_call,         /* tp_call */
1785     0,                    /* tp_str */
1786     0,/* PyObject_GenericGetAttr, */                    /* tp_getattro */
1787     0,/* PyObject_GenericSetAttr, */                    /* tp_setattro */
1788     0,                    /* tp_as_buffer */
1789     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
1790     scanner_doc,          /* tp_doc */
1791     scanner_traverse,                    /* tp_traverse */
1792     scanner_clear,                    /* tp_clear */
1793     0,                    /* tp_richcompare */
1794     0,                    /* tp_weaklistoffset */
1795     0,                    /* tp_iter */
1796     0,                    /* tp_iternext */
1797     0,                    /* tp_methods */
1798     scanner_members,                    /* tp_members */
1799     0,                    /* tp_getset */
1800     0,                    /* tp_base */
1801     0,                    /* tp_dict */
1802     0,                    /* tp_descr_get */
1803     0,                    /* tp_descr_set */
1804     0,                    /* tp_dictoffset */
1805     0,                    /* tp_init */
1806     0,/* PyType_GenericAlloc, */        /* tp_alloc */
1807     scanner_new,          /* tp_new */
1808     0,/* PyObject_GC_Del, */              /* tp_free */
1809 };
1810 
1811 static PyObject *
encoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1812 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1813 {
1814     static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1815 
1816     PyEncoderObject *s;
1817     PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1818     PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan_obj;
1819     int allow_nan;
1820 
1821     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist,
1822         &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
1823         &sort_keys, &skipkeys, &allow_nan_obj))
1824         return NULL;
1825 
1826     allow_nan = PyObject_IsTrue(allow_nan_obj);
1827     if (allow_nan < 0)
1828         return NULL;
1829 
1830     if (markers != Py_None && !PyDict_Check(markers)) {
1831         PyErr_Format(PyExc_TypeError,
1832                      "make_encoder() argument 1 must be dict or None, "
1833                      "not %.200s", Py_TYPE(markers)->tp_name);
1834         return NULL;
1835     }
1836 
1837     s = (PyEncoderObject *)type->tp_alloc(type, 0);
1838     if (s == NULL)
1839         return NULL;
1840 
1841     s->markers = markers;
1842     s->defaultfn = defaultfn;
1843     s->encoder = encoder;
1844     s->indent = indent;
1845     s->key_separator = key_separator;
1846     s->item_separator = item_separator;
1847     s->sort_keys = sort_keys;
1848     s->skipkeys = skipkeys;
1849     s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
1850     s->allow_nan = allow_nan;
1851 
1852     Py_INCREF(s->markers);
1853     Py_INCREF(s->defaultfn);
1854     Py_INCREF(s->encoder);
1855     Py_INCREF(s->indent);
1856     Py_INCREF(s->key_separator);
1857     Py_INCREF(s->item_separator);
1858     Py_INCREF(s->sort_keys);
1859     Py_INCREF(s->skipkeys);
1860     return (PyObject *)s;
1861 }
1862 
1863 static PyObject *
encoder_call(PyObject * self,PyObject * args,PyObject * kwds)1864 encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
1865 {
1866     /* Python callable interface to encode_listencode_obj */
1867     static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1868     PyObject *obj;
1869     PyObject *rval;
1870     Py_ssize_t indent_level;
1871     PyEncoderObject *s;
1872     assert(PyEncoder_Check(self));
1873     s = (PyEncoderObject *)self;
1874     if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
1875         &obj, _convertPyInt_AsSsize_t, &indent_level))
1876         return NULL;
1877     rval = PyList_New(0);
1878     if (rval == NULL)
1879         return NULL;
1880     if (encoder_listencode_obj(s, rval, obj, indent_level)) {
1881         Py_DECREF(rval);
1882         return NULL;
1883     }
1884     return rval;
1885 }
1886 
1887 static PyObject *
_encoded_const(PyObject * obj)1888 _encoded_const(PyObject *obj)
1889 {
1890     /* Return the JSON string representation of None, True, False */
1891     if (obj == Py_None) {
1892         static PyObject *s_null = NULL;
1893         if (s_null == NULL) {
1894             s_null = PyString_InternFromString("null");
1895         }
1896         Py_XINCREF(s_null);
1897         return s_null;
1898     }
1899     else if (obj == Py_True) {
1900         static PyObject *s_true = NULL;
1901         if (s_true == NULL) {
1902             s_true = PyString_InternFromString("true");
1903         }
1904         Py_XINCREF(s_true);
1905         return s_true;
1906     }
1907     else if (obj == Py_False) {
1908         static PyObject *s_false = NULL;
1909         if (s_false == NULL) {
1910             s_false = PyString_InternFromString("false");
1911         }
1912         Py_XINCREF(s_false);
1913         return s_false;
1914     }
1915     else {
1916         PyErr_SetString(PyExc_ValueError, "not a const");
1917         return NULL;
1918     }
1919 }
1920 
1921 static PyObject *
encoder_encode_float(PyEncoderObject * s,PyObject * obj)1922 encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1923 {
1924     /* Return the JSON representation of a PyFloat */
1925     double i = PyFloat_AS_DOUBLE(obj);
1926     if (!Py_IS_FINITE(i)) {
1927         if (!s->allow_nan) {
1928             PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
1929             return NULL;
1930         }
1931         if (i > 0) {
1932             return PyString_FromString("Infinity");
1933         }
1934         else if (i < 0) {
1935             return PyString_FromString("-Infinity");
1936         }
1937         else {
1938             return PyString_FromString("NaN");
1939         }
1940     }
1941     /* Make sure to use the base float class repr method */
1942     return PyFloat_Type.tp_repr(obj);
1943 }
1944 
1945 static PyObject *
encoder_encode_string(PyEncoderObject * s,PyObject * obj)1946 encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1947 {
1948     /* Return the JSON representation of a string */
1949     if (s->fast_encode)
1950         return py_encode_basestring_ascii(NULL, obj);
1951     else
1952         return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
1953 }
1954 
1955 static int
_steal_list_append(PyObject * lst,PyObject * stolen)1956 _steal_list_append(PyObject *lst, PyObject *stolen)
1957 {
1958     /* Append stolen and then decrement its reference count */
1959     int rval = PyList_Append(lst, stolen);
1960     Py_DECREF(stolen);
1961     return rval;
1962 }
1963 
1964 static int
encoder_listencode_obj(PyEncoderObject * s,PyObject * rval,PyObject * obj,Py_ssize_t indent_level)1965 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
1966 {
1967     /* Encode Python object obj to a JSON term, rval is a PyList */
1968     PyObject *newobj;
1969     int rv;
1970 
1971     if (obj == Py_None || obj == Py_True || obj == Py_False) {
1972         PyObject *cstr = _encoded_const(obj);
1973         if (cstr == NULL)
1974             return -1;
1975         return _steal_list_append(rval, cstr);
1976     }
1977     else if (PyString_Check(obj) || PyUnicode_Check(obj))
1978     {
1979         PyObject *encoded = encoder_encode_string(s, obj);
1980         if (encoded == NULL)
1981             return -1;
1982         return _steal_list_append(rval, encoded);
1983     }
1984     else if (_PyAnyInt_Check(obj)) {
1985         PyObject *encoded = PyObject_Str(obj);
1986         if (encoded == NULL)
1987             return -1;
1988         return _steal_list_append(rval, encoded);
1989     }
1990     else if (PyFloat_Check(obj)) {
1991         PyObject *encoded = encoder_encode_float(s, obj);
1992         if (encoded == NULL)
1993             return -1;
1994         return _steal_list_append(rval, encoded);
1995     }
1996     else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1997         if (Py_EnterRecursiveCall(" while encoding a JSON object"))
1998             return -1;
1999         rv = encoder_listencode_list(s, rval, obj, indent_level);
2000         Py_LeaveRecursiveCall();
2001         return rv;
2002     }
2003     else if (PyDict_Check(obj)) {
2004         if (Py_EnterRecursiveCall(" while encoding a JSON object"))
2005             return -1;
2006         rv = encoder_listencode_dict(s, rval, obj, indent_level);
2007         Py_LeaveRecursiveCall();
2008         return rv;
2009     }
2010     else {
2011         PyObject *ident = NULL;
2012         if (s->markers != Py_None) {
2013             int has_key;
2014             ident = PyLong_FromVoidPtr(obj);
2015             if (ident == NULL)
2016                 return -1;
2017             has_key = PyDict_Contains(s->markers, ident);
2018             if (has_key) {
2019                 if (has_key != -1)
2020                     PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2021                 Py_DECREF(ident);
2022                 return -1;
2023             }
2024             if (PyDict_SetItem(s->markers, ident, obj)) {
2025                 Py_DECREF(ident);
2026                 return -1;
2027             }
2028         }
2029         newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2030         if (newobj == NULL) {
2031             Py_XDECREF(ident);
2032             return -1;
2033         }
2034 
2035         if (Py_EnterRecursiveCall(" while encoding a JSON object")) {
2036             Py_DECREF(newobj);
2037             Py_XDECREF(ident);
2038             return -1;
2039         }
2040         rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2041         Py_LeaveRecursiveCall();
2042 
2043         Py_DECREF(newobj);
2044         if (rv) {
2045             Py_XDECREF(ident);
2046             return -1;
2047         }
2048         if (ident != NULL) {
2049             if (PyDict_DelItem(s->markers, ident)) {
2050                 Py_XDECREF(ident);
2051                 return -1;
2052             }
2053             Py_XDECREF(ident);
2054         }
2055         return rv;
2056     }
2057 }
2058 
2059 static int
encoder_listencode_dict(PyEncoderObject * s,PyObject * rval,PyObject * dct,Py_ssize_t indent_level)2060 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2061 {
2062     /* Encode Python dict dct a JSON term, rval is a PyList */
2063     static PyObject *open_dict = NULL;
2064     static PyObject *close_dict = NULL;
2065     static PyObject *empty_dict = NULL;
2066     PyObject *kstr = NULL;
2067     PyObject *ident = NULL;
2068     PyObject *key = NULL;
2069     PyObject *value = NULL;
2070     PyObject *it = NULL;
2071     int skipkeys;
2072     Py_ssize_t idx;
2073 
2074     if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) {
2075         open_dict = PyString_InternFromString("{");
2076         close_dict = PyString_InternFromString("}");
2077         empty_dict = PyString_InternFromString("{}");
2078         if (open_dict == NULL || close_dict == NULL || empty_dict == NULL)
2079             return -1;
2080     }
2081     if (Py_SIZE(dct) == 0)
2082         return PyList_Append(rval, empty_dict);
2083 
2084     if (s->markers != Py_None) {
2085         int has_key;
2086         ident = PyLong_FromVoidPtr(dct);
2087         if (ident == NULL)
2088             goto bail;
2089         has_key = PyDict_Contains(s->markers, ident);
2090         if (has_key) {
2091             if (has_key != -1)
2092                 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2093             goto bail;
2094         }
2095         if (PyDict_SetItem(s->markers, ident, dct)) {
2096             goto bail;
2097         }
2098     }
2099 
2100     if (PyList_Append(rval, open_dict))
2101         goto bail;
2102 
2103     if (s->indent != Py_None) {
2104         /* TODO: DOES NOT RUN */
2105         indent_level += 1;
2106         /*
2107             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2108             separator = _item_separator + newline_indent
2109             buf += newline_indent
2110         */
2111     }
2112 
2113     /* TODO: C speedup not implemented for sort_keys */
2114 
2115     it = PyObject_GetIter(dct);
2116     if (it == NULL)
2117         goto bail;
2118     skipkeys = PyObject_IsTrue(s->skipkeys);
2119     if (skipkeys < 0)
2120         goto bail;
2121     idx = 0;
2122     while ((key = PyIter_Next(it)) != NULL) {
2123         PyObject *encoded;
2124 
2125         if (PyString_Check(key) || PyUnicode_Check(key)) {
2126             Py_INCREF(key);
2127             kstr = key;
2128         }
2129         else if (PyFloat_Check(key)) {
2130             kstr = encoder_encode_float(s, key);
2131             if (kstr == NULL)
2132                 goto bail;
2133         }
2134         else if (_PyAnyInt_Check(key)) {
2135             kstr = PyObject_Str(key);
2136             if (kstr == NULL)
2137                 goto bail;
2138         }
2139         else if (key == Py_True || key == Py_False || key == Py_None) {
2140             kstr = _encoded_const(key);
2141             if (kstr == NULL)
2142                 goto bail;
2143         }
2144         else if (skipkeys) {
2145             Py_DECREF(key);
2146             continue;
2147         }
2148         else {
2149             /* TODO: include repr of key */
2150             PyErr_SetString(PyExc_TypeError, "keys must be a string");
2151             goto bail;
2152         }
2153 
2154         if (idx) {
2155             if (PyList_Append(rval, s->item_separator))
2156                 goto bail;
2157         }
2158 
2159         value = PyObject_GetItem(dct, key);
2160         if (value == NULL)
2161             goto bail;
2162 
2163         encoded = encoder_encode_string(s, kstr);
2164         Py_CLEAR(kstr);
2165         if (encoded == NULL)
2166             goto bail;
2167         if (PyList_Append(rval, encoded)) {
2168             Py_DECREF(encoded);
2169             goto bail;
2170         }
2171         Py_DECREF(encoded);
2172         if (PyList_Append(rval, s->key_separator))
2173             goto bail;
2174         if (encoder_listencode_obj(s, rval, value, indent_level))
2175             goto bail;
2176         idx += 1;
2177         Py_CLEAR(value);
2178         Py_DECREF(key);
2179     }
2180     if (PyErr_Occurred())
2181         goto bail;
2182     Py_CLEAR(it);
2183 
2184     if (ident != NULL) {
2185         if (PyDict_DelItem(s->markers, ident))
2186             goto bail;
2187         Py_CLEAR(ident);
2188     }
2189     if (s->indent != Py_None) {
2190         /* TODO: DOES NOT RUN */
2191         /*
2192             indent_level -= 1;
2193 
2194             yield '\n' + (' ' * (_indent * _current_indent_level))
2195         */
2196     }
2197     if (PyList_Append(rval, close_dict))
2198         goto bail;
2199     return 0;
2200 
2201 bail:
2202     Py_XDECREF(it);
2203     Py_XDECREF(key);
2204     Py_XDECREF(value);
2205     Py_XDECREF(kstr);
2206     Py_XDECREF(ident);
2207     return -1;
2208 }
2209 
2210 
2211 static int
encoder_listencode_list(PyEncoderObject * s,PyObject * rval,PyObject * seq,Py_ssize_t indent_level)2212 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2213 {
2214     /* Encode Python list seq to a JSON term, rval is a PyList */
2215     static PyObject *open_array = NULL;
2216     static PyObject *close_array = NULL;
2217     static PyObject *empty_array = NULL;
2218     PyObject *ident = NULL;
2219     PyObject *s_fast = NULL;
2220     Py_ssize_t i;
2221 
2222     if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2223         open_array = PyString_InternFromString("[");
2224         close_array = PyString_InternFromString("]");
2225         empty_array = PyString_InternFromString("[]");
2226         if (open_array == NULL || close_array == NULL || empty_array == NULL)
2227             return -1;
2228     }
2229     ident = NULL;
2230     s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
2231     if (s_fast == NULL)
2232         return -1;
2233     if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
2234         Py_DECREF(s_fast);
2235         return PyList_Append(rval, empty_array);
2236     }
2237 
2238     if (s->markers != Py_None) {
2239         int has_key;
2240         ident = PyLong_FromVoidPtr(seq);
2241         if (ident == NULL)
2242             goto bail;
2243         has_key = PyDict_Contains(s->markers, ident);
2244         if (has_key) {
2245             if (has_key != -1)
2246                 PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2247             goto bail;
2248         }
2249         if (PyDict_SetItem(s->markers, ident, seq)) {
2250             goto bail;
2251         }
2252     }
2253 
2254     if (PyList_Append(rval, open_array))
2255         goto bail;
2256     if (s->indent != Py_None) {
2257         /* TODO: DOES NOT RUN */
2258         indent_level += 1;
2259         /*
2260             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
2261             separator = _item_separator + newline_indent
2262             buf += newline_indent
2263         */
2264     }
2265     for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
2266         PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
2267         if (i) {
2268             if (PyList_Append(rval, s->item_separator))
2269                 goto bail;
2270         }
2271         if (encoder_listencode_obj(s, rval, obj, indent_level))
2272             goto bail;
2273     }
2274     if (ident != NULL) {
2275         if (PyDict_DelItem(s->markers, ident))
2276             goto bail;
2277         Py_CLEAR(ident);
2278     }
2279     if (s->indent != Py_None) {
2280         /* TODO: DOES NOT RUN */
2281         /*
2282             indent_level -= 1;
2283 
2284             yield '\n' + (' ' * (_indent * _current_indent_level))
2285         */
2286     }
2287     if (PyList_Append(rval, close_array))
2288         goto bail;
2289     Py_DECREF(s_fast);
2290     return 0;
2291 
2292 bail:
2293     Py_XDECREF(ident);
2294     Py_DECREF(s_fast);
2295     return -1;
2296 }
2297 
2298 static void
encoder_dealloc(PyObject * self)2299 encoder_dealloc(PyObject *self)
2300 {
2301     /* bpo-31095: UnTrack is needed before calling any callbacks */
2302     PyObject_GC_UnTrack(self);
2303     encoder_clear(self);
2304     Py_TYPE(self)->tp_free(self);
2305 }
2306 
2307 static int
encoder_traverse(PyObject * self,visitproc visit,void * arg)2308 encoder_traverse(PyObject *self, visitproc visit, void *arg)
2309 {
2310     PyEncoderObject *s;
2311     assert(PyEncoder_Check(self));
2312     s = (PyEncoderObject *)self;
2313     Py_VISIT(s->markers);
2314     Py_VISIT(s->defaultfn);
2315     Py_VISIT(s->encoder);
2316     Py_VISIT(s->indent);
2317     Py_VISIT(s->key_separator);
2318     Py_VISIT(s->item_separator);
2319     Py_VISIT(s->sort_keys);
2320     Py_VISIT(s->skipkeys);
2321     return 0;
2322 }
2323 
2324 static int
encoder_clear(PyObject * self)2325 encoder_clear(PyObject *self)
2326 {
2327     /* Deallocate Encoder */
2328     PyEncoderObject *s;
2329     assert(PyEncoder_Check(self));
2330     s = (PyEncoderObject *)self;
2331     Py_CLEAR(s->markers);
2332     Py_CLEAR(s->defaultfn);
2333     Py_CLEAR(s->encoder);
2334     Py_CLEAR(s->indent);
2335     Py_CLEAR(s->key_separator);
2336     Py_CLEAR(s->item_separator);
2337     Py_CLEAR(s->sort_keys);
2338     Py_CLEAR(s->skipkeys);
2339     return 0;
2340 }
2341 
2342 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2343 
2344 static
2345 PyTypeObject PyEncoderType = {
2346     PyVarObject_HEAD_INIT(NULL, 0)
2347     "_json.Encoder",       /* tp_name */
2348     sizeof(PyEncoderObject), /* tp_basicsize */
2349     0,                    /* tp_itemsize */
2350     encoder_dealloc, /* tp_dealloc */
2351     0,                    /* tp_print */
2352     0,                    /* tp_getattr */
2353     0,                    /* tp_setattr */
2354     0,                    /* tp_compare */
2355     0,                    /* tp_repr */
2356     0,                    /* tp_as_number */
2357     0,                    /* tp_as_sequence */
2358     0,                    /* tp_as_mapping */
2359     0,                    /* tp_hash */
2360     encoder_call,         /* tp_call */
2361     0,                    /* tp_str */
2362     0,                    /* tp_getattro */
2363     0,                    /* tp_setattro */
2364     0,                    /* tp_as_buffer */
2365     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,   /* tp_flags */
2366     encoder_doc,          /* tp_doc */
2367     encoder_traverse,     /* tp_traverse */
2368     encoder_clear,        /* tp_clear */
2369     0,                    /* tp_richcompare */
2370     0,                    /* tp_weaklistoffset */
2371     0,                    /* tp_iter */
2372     0,                    /* tp_iternext */
2373     0,                    /* tp_methods */
2374     encoder_members,      /* tp_members */
2375     0,                    /* tp_getset */
2376     0,                    /* tp_base */
2377     0,                    /* tp_dict */
2378     0,                    /* tp_descr_get */
2379     0,                    /* tp_descr_set */
2380     0,                    /* tp_dictoffset */
2381     0,                    /* tp_init */
2382     0,                    /* tp_alloc */
2383     encoder_new,          /* tp_new */
2384     0,                    /* tp_free */
2385 };
2386 
2387 static PyMethodDef speedups_methods[] = {
2388     {"encode_basestring_ascii",
2389         (PyCFunction)py_encode_basestring_ascii,
2390         METH_O,
2391         pydoc_encode_basestring_ascii},
2392     {"scanstring",
2393         (PyCFunction)py_scanstring,
2394         METH_VARARGS,
2395         pydoc_scanstring},
2396     {NULL, NULL, 0, NULL}
2397 };
2398 
2399 PyDoc_STRVAR(module_doc,
2400 "json speedups\n");
2401 
2402 void
init_json(void)2403 init_json(void)
2404 {
2405     PyObject *m;
2406     if (PyType_Ready(&PyScannerType) < 0)
2407         return;
2408     if (PyType_Ready(&PyEncoderType) < 0)
2409         return;
2410     m = Py_InitModule3("_json", speedups_methods, module_doc);
2411     if (m == NULL)
2412         return;
2413     Py_INCREF((PyObject*)&PyScannerType);
2414     PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2415     Py_INCREF((PyObject*)&PyEncoderType);
2416     PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
2417 }
2418