json_tokener.c

  1 /*
  2  * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
  3  *
  4  * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
  5  * Michael Clark <michael@metaparadigm.com>
  6  *
  7  * This library is free software; you can redistribute it and/or modify
  8  * it under the terms of the MIT license. See COPYING for details.
  9  *
 10  *
 11  * Copyright (c) 2008-2009 Yahoo! Inc.  All rights reserved.
 12  * The copyrights to the contents of this file are licensed under the MIT License
 13  * (http://www.opensource.org/licenses/mit-license.php)
 14  */
 15 
 16 #include "config.h"
 17 
 18 #include "math_compat.h"
 19 #include <assert.h>
 20 #include <ctype.h>
 21 #include <limits.h>
 22 #include <math.h>
 23 #include <stddef.h>
 24 #include <stdio.h>
 25 #include <stdlib.h>
 26 #include <string.h>
 27 
 28 #include "debug.h"
 29 #include "json_inttypes.h"
 30 #include "json_object.h"
 31 #include "json_object_private.h"
 32 #include "json_tokener.h"
 33 #include "json_util.h"
 34 #include "printbuf.h"
 35 #include "strdup_compat.h"
 36 
 37 #ifdef HAVE_LOCALE_H
 38 #include <locale.h>
 39 #endif /* HAVE_LOCALE_H */
 40 #ifdef HAVE_XLOCALE_H
 41 #include <xlocale.h>
 42 #endif
 43 #ifdef HAVE_STRINGS_H
 44 #include <strings.h>
 45 #endif /* HAVE_STRINGS_H */
 46 
 47 #define jt_hexdigit(x) (((x) <= '9') ? (x) - '' : ((x)&7) + 9)
 48 
 49 #if !HAVE_STRNCASECMP && defined(_MSC_VER)
 50 /* MSC has the version as _strnicmp */
 51 #define strncasecmp _strnicmp
 52 #elif !HAVE_STRNCASECMP
 53 #error You do not have strncasecmp on your system.
 54 #endif /* HAVE_STRNCASECMP */
 55 
 56 /* Use C99 NAN by default; if not available, nan("") should work too. */
 57 #ifndef NAN
 58 #define NAN nan("")
 59 #endif /* !NAN */
 60 
 61 static const char json_null_str[] = "null";
 62 static const int json_null_str_len = sizeof(json_null_str) - 1;
 63 static const char json_inf_str[] = "Infinity";
 64 static const char json_inf_str_lower[] = "infinity";
 65 static const unsigned int json_inf_str_len = sizeof(json_inf_str) - 1;
 66 static const char json_nan_str[] = "NaN";
 67 static const int json_nan_str_len = sizeof(json_nan_str) - 1;
 68 static const char json_true_str[] = "true";
 69 static const int json_true_str_len = sizeof(json_true_str) - 1;
 70 static const char json_false_str[] = "false";
 71 static const int json_false_str_len = sizeof(json_false_str) - 1;
 72 
 73 /* clang-format off */
 74 static const char *json_tokener_errors[] = {
 75         "success",
 76         "continue",
 77         "nesting too deep",
 78         "unexpected end of data",
 79         "unexpected character",
 80         "null expected",
 81         "boolean expected",
 82         "number expected",
 83         "array value separator ',' expected",
 84         "quoted object property name expected",
 85         "object property name separator ':' expected",
 86         "object value separator ',' expected",
 87         "invalid string sequence",
 88         "expected comment",
 89         "invalid utf-8 string",
 90         "buffer size overflow"
 91 };
 92 /* clang-format on */
 93 
 94 /**
 95  * validete the utf-8 string in strict model.
 96  * if not utf-8 format, return err.
 97  */
 98 static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes);
 99 
100 static int json_tokener_parse_double(const char *buf, int len, double *retval);
101 
102 const char *json_tokener_error_desc(enum json_tokener_error jerr)
103 {
104         int jerr_int = (int)jerr;
105         if (jerr_int < 0 ||
106             jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0])))
107                 return "Unknown error, "
108                        "invalid json_tokener_error value passed to json_tokener_error_desc()";
109         return json_tokener_errors[jerr];
110 }
111 
112 enum json_tokener_error json_tokener_get_error(struct json_tokener *tok)
113 {
114         return tok->err;
115 }
116 
117 /* Stuff for decoding unicode sequences */
118 #define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800)
119 #define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00)
120 #define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000)
121 static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD};
122 
123 struct json_tokener *json_tokener_new_ex(int depth)
124 {
125         struct json_tokener *tok;
126 
127         tok = (struct json_tokener *)calloc(1, sizeof(struct json_tokener));
128         if (!tok)
129                 return NULL;
130         tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));
131         if (!tok->stack)
132         {
133                 free(tok);
134                 return NULL;
135         }
136         tok->pb = printbuf_new();
137         tok->max_depth = depth;
138         json_tokener_reset(tok);
139         return tok;
140 }
141 
142 struct json_tokener *json_tokener_new(void)
143 {
144         return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
145 }
146 
147 void json_tokener_free(struct json_tokener *tok)
148 {
149         json_tokener_reset(tok);
150         if (tok->pb)
151                 printbuf_free(tok->pb);
152         free(tok->stack);
153         free(tok);
154 }
155 
156 static void json_tokener_reset_level(struct json_tokener *tok, int depth)
157 {
158         tok->stack[depth].state = json_tokener_state_eatws;
159         tok->stack[depth].saved_state = json_tokener_state_start;
160         json_object_put(tok->stack[depth].current);
161         tok->stack[depth].current = NULL;
162         free(tok->stack[depth].obj_field_name);
163         tok->stack[depth].obj_field_name = NULL;
164 }
165 
166 void json_tokener_reset(struct json_tokener *tok)
167 {
168         int i;
169         if (!tok)
170                 return;
171 
172         for (i = tok->depth; i >= 0; i--)
173                 json_tokener_reset_level(tok, i);
174         tok->depth = 0;
175         tok->err = json_tokener_success;
176 }
177 
178 struct json_object *json_tokener_parse(const char *str)
179 {
180         enum json_tokener_error jerr_ignored;
181         struct json_object *obj;
182         obj = json_tokener_parse_verbose(str, &jerr_ignored);
183         return obj;
184 }
185 
186 struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
187 {
188         struct json_tokener *tok;
189         struct json_object *obj;
190 
191         tok = json_tokener_new();
192         if (!tok)
193                 return NULL;
194         obj = json_tokener_parse_ex(tok, str, -1);
195         *error = tok->err;
196         if (tok->err != json_tokener_success
197 #if 0
198                 /* This would be a more sensible default, and cause parsing
199                  * things like "null123" to fail when the caller can't know
200                  * where the parsing left off, but starting to fail would
201                  * be a notable behaviour change.  Save for a 1.0 release.
202                  */
203             || json_tokener_get_parse_end(tok) != strlen(str)
204 #endif
205         )
206 
207         {
208                 if (obj != NULL)
209                         json_object_put(obj);
210                 obj = NULL;
211         }
212 
213         json_tokener_free(tok);
214         return obj;
215 }
216 
217 #define state tok->stack[tok->depth].state
218 #define saved_state tok->stack[tok->depth].saved_state
219 #define current tok->stack[tok->depth].current
220 #define obj_field_name tok->stack[tok->depth].obj_field_name
221 
222 /* Optimization:
223  * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
224  * iterating character-by character.  A large performance boost is
225  * achieved by using tighter loops to locally handle units such as
226  * comments and strings.  Loops that handle an entire token within
227  * their scope also gather entire strings and pass them to
228  * printbuf_memappend() in a single call, rather than calling
229  * printbuf_memappend() one char at a time.
230  *
231  * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is
232  * common to both the main loop and the tighter loops.
233  */
234 
235 /* PEEK_CHAR(dest, tok) macro:
236  *   Peeks at the current char and stores it in dest.
237  *   Returns 1 on success, sets tok->err and returns 0 if no more chars.
238  *   Implicit inputs:  str, len, nBytesp vars
239  */
240 #define PEEK_CHAR(dest, tok)                                                 \
241         (((tok)->char_offset == len)                                         \
242              ? (((tok)->depth == 0 && state == json_tokener_state_eatws &&   \
243                  saved_state == json_tokener_state_finish)                   \
244                     ? (((tok)->err = json_tokener_success), 0)               \
245                     : (((tok)->err = json_tokener_continue), 0))             \
246              : (((tok->flags & JSON_TOKENER_VALIDATE_UTF8) &&                \
247                  (!json_tokener_validate_utf8(*str, nBytesp)))               \
248                     ? ((tok->err = json_tokener_error_parse_utf8_string), 0) \
249                     : (((dest) = *str), 1)))
250 
251 /* ADVANCE_CHAR() macro:
252  *   Increments str & tok->char_offset.
253  *   For convenience of existing conditionals, returns the old value of c (0 on eof)
254  *   Implicit inputs:  c var
255  */
256 #define ADVANCE_CHAR(str, tok) (++(str), ((tok)->char_offset)++, c)
257 
258 /* End optimization macro defs */
259 
260 struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str, int len)
261 {
262         struct json_object *obj = NULL;
263         char c = '\1';
264         unsigned int nBytes = 0;
265         unsigned int *nBytesp = &nBytes;
266 
267 #ifdef HAVE_USELOCALE
268         locale_t oldlocale = uselocale(NULL);
269         locale_t newloc;
270 #elif defined(HAVE_SETLOCALE)
271         char *oldlocale = NULL;
272 #endif
273 
274         tok->char_offset = 0;
275         tok->err = json_tokener_success;
276 
277         /* this interface is presently not 64-bit clean due to the int len argument
278          * and the internal printbuf interface that takes 32-bit int len arguments
279          * so the function limits the maximum string size to INT32_MAX (2GB).
280          * If the function is called with len == -1 then strlen is called to check
281          * the string length is less than INT32_MAX (2GB)
282          */
283         if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX))
284         {
285                 tok->err = json_tokener_error_size;
286                 return NULL;
287         }
288 
289 #ifdef HAVE_USELOCALE
290         {
291                 locale_t duploc = duplocale(oldlocale);
292                 newloc = newlocale(LC_NUMERIC_MASK, "C", duploc);
293                 if (newloc == NULL)
294                 {
295                         freelocale(duploc);
296                         return NULL;
297                 }
298                 uselocale(newloc);
299         }
300 #elif defined(HAVE_SETLOCALE)
301         {
302                 char *tmplocale;
303                 tmplocale = setlocale(LC_NUMERIC, NULL);
304                 if (tmplocale)
305                         oldlocale = strdup(tmplocale);
306                 setlocale(LC_NUMERIC, "C");
307         }
308 #endif
309 
310         while (PEEK_CHAR(c, tok)) // Note: c might be '\0' !
311         {
312 
313         redo_char:
314                 switch (state)
315                 {
316 
317                 case json_tokener_state_eatws:
318                         /* Advance until we change state */
319                         while (isspace((unsigned char)c))
320                         {
321                                 if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
322                                         goto out;
323                         }
324                         if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT))
325                         {
326                                 printbuf_reset(tok->pb);
327                                 printbuf_memappend_fast(tok->pb, &c, 1);
328                                 state = json_tokener_state_comment_start;
329                         }
330                         else
331                         {
332                                 state = saved_state;
333                                 goto redo_char;
334                         }
335                         break;
336 
337                 case json_tokener_state_start:
338                         switch (c)
339                         {
340                         case '{':
341                                 state = json_tokener_state_eatws;
342                                 saved_state = json_tokener_state_object_field_start;
343                                 current = json_object_new_object();
344                                 if (current == NULL)
345                                         goto out;
346                                 break;
347                         case '[':
348                                 state = json_tokener_state_eatws;
349                                 saved_state = json_tokener_state_array;
350                                 current = json_object_new_array();
351                                 if (current == NULL)
352                                         goto out;
353                                 break;
354                         case 'I':
355                         case 'i':
356                                 state = json_tokener_state_inf;
357                                 printbuf_reset(tok->pb);
358                                 tok->st_pos = 0;
359                                 goto redo_char;
360                         case 'N':
361                         case 'n':
362                                 state = json_tokener_state_null; // or NaN
363                                 printbuf_reset(tok->pb);
364                                 tok->st_pos = 0;
365                                 goto redo_char;
366                         case '\'':
367                                 if (tok->flags & JSON_TOKENER_STRICT)
368                                 {
369                                         /* in STRICT mode only double-quote are allowed */
370                                         tok->err = json_tokener_error_parse_unexpected;
371                                         goto out;
372                                 }
373                                 /* FALLTHRU */
374                         case '"':
375                                 state = json_tokener_state_string;
376                                 printbuf_reset(tok->pb);
377                                 tok->quote_char = c;
378                                 break;
379                         case 'T':
380                         case 't':
381                         case 'F':
382                         case 'f':
383                                 state = json_tokener_state_boolean;
384                                 printbuf_reset(tok->pb);
385                                 tok->st_pos = 0;
386                                 goto redo_char;
387                         case '':
388                         case '1':
389                         case '2':
390                         case '3':
391                         case '4':
392                         case '5':
393                         case '6':
394                         case '7':
395                         case '8':
396                         case '9':
397                         case '-':
398                                 state = json_tokener_state_number;
399                                 printbuf_reset(tok->pb);
400                                 tok->is_double = 0;
401                                 goto redo_char;
402                         default: tok->err = json_tokener_error_parse_unexpected; goto out;
403                         }
404                         break;
405 
406                 case json_tokener_state_finish:
407                         if (tok->depth == 0)
408                                 goto out;
409                         obj = json_object_get(current);
410                         json_tokener_reset_level(tok, tok->depth);
411                         tok->depth--;
412                         goto redo_char;
413 
414                 case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */
415                 {
416                         /* If we were guaranteed to have len set, then we could (usually) handle
417                          * the entire "Infinity" check in a single strncmp (strncasecmp), but
418                          * since len might be -1 (i.e. "read until \0"), we need to check it
419                          * a character at a time.
420                          * Trying to handle it both ways would make this code considerably more
421                          * complicated with likely little performance benefit.
422                          */
423                         int is_negative = 0;
424                         const char *_json_inf_str = json_inf_str;
425                         if (!(tok->flags & JSON_TOKENER_STRICT))
426                                 _json_inf_str = json_inf_str_lower;
427 
428                         /* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */
429                         while (tok->st_pos < (int)json_inf_str_len)
430                         {
431                                 char inf_char = *str;
432                                 if (!(tok->flags & JSON_TOKENER_STRICT))
433                                         inf_char = tolower((unsigned char)*str);
434                                 if (inf_char != _json_inf_str[tok->st_pos])
435                                 {
436                                         tok->err = json_tokener_error_parse_unexpected;
437                                         goto out;
438                                 }
439                                 tok->st_pos++;
440                                 (void)ADVANCE_CHAR(str, tok);
441                                 if (!PEEK_CHAR(c, tok))
442                                 {
443                                         /* out of input chars, for now at least */
444                                         goto out;
445                                 }
446                         }
447                         /* We checked the full length of "Infinity", so create the object.
448                          * When handling -Infinity, the number parsing code will have dropped
449                          * the "-" into tok->pb for us, so check it now.
450                          */
451                         if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-')
452                         {
453                                 is_negative = 1;
454                         }
455                         current = json_object_new_double(is_negative ? -INFINITY : INFINITY);
456                         if (current == NULL)
457                                 goto out;
458                         saved_state = json_tokener_state_finish;
459                         state = json_tokener_state_eatws;
460                         goto redo_char;
461                 }
462                 break;
463                 case json_tokener_state_null: /* aka starts with 'n' */
464                 {
465                         int size;
466                         int size_nan;
467                         printbuf_memappend_fast(tok->pb, &c, 1);
468                         size = json_min(tok->st_pos + 1, json_null_str_len);
469                         size_nan = json_min(tok->st_pos + 1, json_nan_str_len);
470                         if ((!(tok->flags & JSON_TOKENER_STRICT) &&
471                              strncasecmp(json_null_str, tok->pb->buf, size) == 0) ||
472                             (strncmp(json_null_str, tok->pb->buf, size) == 0))
473                         {
474                                 if (tok->st_pos == json_null_str_len)
475                                 {
476                                         current = NULL;
477                                         saved_state = json_tokener_state_finish;
478                                         state = json_tokener_state_eatws;
479                                         goto redo_char;
480                                 }
481                         }
482                         else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
483                                   strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) ||
484                                  (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0))
485                         {
486                                 if (tok->st_pos == json_nan_str_len)
487                                 {
488                                         current = json_object_new_double(NAN);
489                                         if (current == NULL)
490                                                 goto out;
491                                         saved_state = json_tokener_state_finish;
492                                         state = json_tokener_state_eatws;
493                                         goto redo_char;
494                                 }
495                         }
496                         else
497                         {
498                                 tok->err = json_tokener_error_parse_null;
499                                 goto out;
500                         }
501                         tok->st_pos++;
502                 }
503                 break;
504 
505                 case json_tokener_state_comment_start:
506                         if (c == '*')
507                         {
508                                 state = json_tokener_state_comment;
509                         }
510                         else if (c == '/')
511                         {
512                                 state = json_tokener_state_comment_eol;
513                         }
514                         else
515                         {
516                                 tok->err = json_tokener_error_parse_comment;
517                                 goto out;
518                         }
519                         printbuf_memappend_fast(tok->pb, &c, 1);
520                         break;
521 
522                 case json_tokener_state_comment:
523                 {
524                         /* Advance until we change state */
525                         const char *case_start = str;
526                         while (c != '*')
527                         {
528                                 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
529                                 {
530                                         printbuf_memappend_fast(tok->pb, case_start,
531                                                                 str - case_start);
532                                         goto out;
533                                 }
534                         }
535                         printbuf_memappend_fast(tok->pb, case_start, 1 + str - case_start);
536                         state = json_tokener_state_comment_end;
537                 }
538                 break;
539 
540                 case json_tokener_state_comment_eol:
541                 {
542                         /* Advance until we change state */
543                         const char *case_start = str;
544                         while (c != '\n')
545                         {
546                                 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
547                                 {
548                                         printbuf_memappend_fast(tok->pb, case_start,
549                                                                 str - case_start);
550                                         goto out;
551                                 }
552                         }
553                         printbuf_memappend_fast(tok->pb, case_start, str - case_start);
554                         MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
555                         state = json_tokener_state_eatws;
556                 }
557                 break;
558 
559                 case json_tokener_state_comment_end:
560                         printbuf_memappend_fast(tok->pb, &c, 1);
561                         if (c == '/')
562                         {
563                                 MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
564                                 state = json_tokener_state_eatws;
565                         }
566                         else
567                         {
568                                 state = json_tokener_state_comment;
569                         }
570                         break;
571 
572                 case json_tokener_state_string:
573                 {
574                         /* Advance until we change state */
575                         const char *case_start = str;
576                         while (1)
577                         {
578                                 if (c == tok->quote_char)
579                                 {
580                                         printbuf_memappend_fast(tok->pb, case_start,
581                                                                 str - case_start);
582                                         current =
583                                             json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
584                                         if (current == NULL)
585                                                 goto out;
586                                         saved_state = json_tokener_state_finish;
587                                         state = json_tokener_state_eatws;
588                                         break;
589                                 }
590                                 else if (c == '\\')
591                                 {
592                                         printbuf_memappend_fast(tok->pb, case_start,
593                                                                 str - case_start);
594                                         saved_state = json_tokener_state_string;
595                                         state = json_tokener_state_string_escape;
596                                         break;
597                                 }
598                                 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
599                                 {
600                                         printbuf_memappend_fast(tok->pb, case_start,
601                                                                 str - case_start);
602                                         goto out;
603                                 }
604                         }
605                 }
606                 break;
607 
608                 case json_tokener_state_string_escape:
609                         switch (c)
610                         {
611                         case '"':
612                         case '\\':
613                         case '/':
614                                 printbuf_memappend_fast(tok->pb, &c, 1);
615                                 state = saved_state;
616                                 break;
617                         case 'b':
618                         case 'n':
619                         case 'r':
620                         case 't':
621                         case 'f':
622                                 if (c == 'b')
623                                         printbuf_memappend_fast(tok->pb, "\b", 1);
624                                 else if (c == 'n')
625                                         printbuf_memappend_fast(tok->pb, "\n", 1);
626                                 else if (c == 'r')
627                                         printbuf_memappend_fast(tok->pb, "\r", 1);
628                                 else if (c == 't')
629                                         printbuf_memappend_fast(tok->pb, "\t", 1);
630                                 else if (c == 'f')
631                                         printbuf_memappend_fast(tok->pb, "\f", 1);
632                                 state = saved_state;
633                                 break;
634                         case 'u':
635                                 tok->ucs_char = 0;
636                                 tok->st_pos = 0;
637                                 state = json_tokener_state_escape_unicode;
638                                 break;
639                         default: tok->err = json_tokener_error_parse_string; goto out;
640                         }
641                         break;
642 
643                         // ===================================================
644 
645                 case json_tokener_state_escape_unicode:
646                 {
647                         /* Handle a 4-byte \uNNNN sequence, or two sequences if a surrogate pair */
648                         while (1)
649                         {
650                                 if (!c || !strchr(json_hex_chars, c))
651                                 {
652                                         tok->err = json_tokener_error_parse_string;
653                                         goto out;
654                                 }
655                                 tok->ucs_char |=
656                                     ((unsigned int)jt_hexdigit(c) << ((3 - tok->st_pos) * 4));
657                                 tok->st_pos++;
658                                 if (tok->st_pos >= 4)
659                                         break;
660 
661                                 (void)ADVANCE_CHAR(str, tok);
662                                 if (!PEEK_CHAR(c, tok))
663                                 {
664                                         /*
665                                          * We're out of characters in the current call to
666                                          * json_tokener_parse(), but a subsequent call might
667                                          * provide us with more, so leave our current state
668                                          * as-is (including tok->high_surrogate) and return.
669                                          */
670                                         goto out;
671                                 }
672                         }
673                         tok->st_pos = 0;
674 
675                         /* Now, we have a full \uNNNN sequence in tok->ucs_char */
676 
677                         /* If the *previous* sequence was a high surrogate ... */
678                         if (tok->high_surrogate)
679                         {
680                                 if (IS_LOW_SURROGATE(tok->ucs_char))
681                                 {
682                                         /* Recalculate the ucs_char, then fall thru to process normally */
683                                         tok->ucs_char = DECODE_SURROGATE_PAIR(tok->high_surrogate,
684                                                                               tok->ucs_char);
685                                 }
686                                 else
687                                 {
688                                         /* High surrogate was not followed by a low surrogate
689                                          * Replace the high and process the rest normally
690                                          */
691                                         printbuf_memappend_fast(tok->pb,
692                                                                 (char *)utf8_replacement_char, 3);
693                                 }
694                                 tok->high_surrogate = 0;
695                         }
696 
697                         if (tok->ucs_char < 0x80)
698                         {
699                                 unsigned char unescaped_utf[1];
700                                 unescaped_utf[0] = tok->ucs_char;
701                                 printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 1);
702                         }
703                         else if (tok->ucs_char < 0x800)
704                         {
705                                 unsigned char unescaped_utf[2];
706                                 unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
707                                 unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
708                                 printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 2);
709                         }
710                         else if (IS_HIGH_SURROGATE(tok->ucs_char))
711                         {
712                                 /*
713                                  * The next two characters should be \u, HOWEVER,
714                                  * we can't simply peek ahead here, because the
715                                  * characters we need might not be passed to us
716                                  * until a subsequent call to json_tokener_parse.
717                                  * Instead, transition throug a couple of states.
718                                  * (now):
719                                  *   _escape_unicode => _unicode_need_escape
720                                  * (see a '\\' char):
721                                  *   _unicode_need_escape => _unicode_need_u
722                                  * (see a 'u' char):
723                                  *   _unicode_need_u => _escape_unicode
724                                  *      ...and we'll end up back around here.
725                                  */
726                                 tok->high_surrogate = tok->ucs_char;
727                                 tok->ucs_char = 0;
728                                 state = json_tokener_state_escape_unicode_need_escape;
729                                 break;
730                         }
731                         else if (IS_LOW_SURROGATE(tok->ucs_char))
732                         {
733                                 /* Got a low surrogate not preceded by a high */
734                                 printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
735                         }
736                         else if (tok->ucs_char < 0x10000)
737                         {
738                                 unsigned char unescaped_utf[3];
739                                 unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
740                                 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
741                                 unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
742                                 printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 3);
743                         }
744                         else if (tok->ucs_char < 0x110000)
745                         {
746                                 unsigned char unescaped_utf[4];
747                                 unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
748                                 unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
749                                 unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
750                                 unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
751                                 printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 4);
752                         }
753                         else
754                         {
755                                 /* Don't know what we got--insert the replacement char */
756                                 printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
757                         }
758                         state = saved_state; // i.e. _state_string or _state_object_field
759                 }
760                 break;
761 
762                 case json_tokener_state_escape_unicode_need_escape:
763                         // We get here after processing a high_surrogate
764                         // require a '\\' char
765                         if (!c || c != '\\')
766                         {
767                                 /* Got a high surrogate without another sequence following
768                                  * it.  Put a replacement char in for the high surrogate
769                                  * and pop back up to _state_string or _state_object_field.
770                                  */
771                                 printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
772                                 tok->high_surrogate = 0;
773                                 tok->ucs_char = 0;
774                                 tok->st_pos = 0;
775                                 state = saved_state;
776                                 goto redo_char;
777                         }
778                         state = json_tokener_state_escape_unicode_need_u;
779                         break;
780 
781                 case json_tokener_state_escape_unicode_need_u:
782                         /* We already had a \ char, check that it's \u */
783                         if (!c || c != 'u')
784                         {
785                                 /* Got a high surrogate with some non-unicode escape
786                                  * sequence following it.
787                                  * Put a replacement char in for the high surrogate
788                                  * and handle the escape sequence normally.
789                                  */
790                                 printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
791                                 tok->high_surrogate = 0;
792                                 tok->ucs_char = 0;
793                                 tok->st_pos = 0;
794                                 state = json_tokener_state_string_escape;
795                                 goto redo_char;
796                         }
797                         state = json_tokener_state_escape_unicode;
798                         break;
799 
800                         // ===================================================
801 
802                 case json_tokener_state_boolean:
803                 {
804                         int size1, size2;
805                         printbuf_memappend_fast(tok->pb, &c, 1);
806                         size1 = json_min(tok->st_pos + 1, json_true_str_len);
807                         size2 = json_min(tok->st_pos + 1, json_false_str_len);
808                         if ((!(tok->flags & JSON_TOKENER_STRICT) &&
809                              strncasecmp(json_true_str, tok->pb->buf, size1) == 0) ||
810                             (strncmp(json_true_str, tok->pb->buf, size1) == 0))
811                         {
812                                 if (tok->st_pos == json_true_str_len)
813                                 {
814                                         current = json_object_new_boolean(1);
815                                         if (current == NULL)
816                                                 goto out;
817                                         saved_state = json_tokener_state_finish;
818                                         state = json_tokener_state_eatws;
819                                         goto redo_char;
820                                 }
821                         }
822                         else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
823                                   strncasecmp(json_false_str, tok->pb->buf, size2) == 0) ||
824                                  (strncmp(json_false_str, tok->pb->buf, size2) == 0))
825                         {
826                                 if (tok->st_pos == json_false_str_len)
827                                 {
828                                         current = json_object_new_boolean(0);
829                                         if (current == NULL)
830                                                 goto out;
831                                         saved_state = json_tokener_state_finish;
832                                         state = json_tokener_state_eatws;
833                                         goto redo_char;
834                                 }
835                         }
836                         else
837                         {
838                                 tok->err = json_tokener_error_parse_boolean;
839                                 goto out;
840                         }
841                         tok->st_pos++;
842                 }
843                 break;
844 
845                 case json_tokener_state_number:
846                 {
847                         /* Advance until we change state */
848                         const char *case_start = str;
849                         int case_len = 0;
850                         int is_exponent = 0;
851                         int neg_sign_ok = 1;
852                         int pos_sign_ok = 0;
853                         if (printbuf_length(tok->pb) > 0)
854                         {
855                                 /* We don't save all state from the previous incremental parse
856                                    so we need to re-generate it based on the saved string so far.
857                                  */
858                                 char *e_loc = strchr(tok->pb->buf, 'e');
859                                 if (!e_loc)
860                                         e_loc = strchr(tok->pb->buf, 'E');
861                                 if (e_loc)
862                                 {
863                                         char *last_saved_char =
864                                             &tok->pb->buf[printbuf_length(tok->pb) - 1];
865                                         is_exponent = 1;
866                                         pos_sign_ok = neg_sign_ok = 1;
867                                         /* If the "e" isn't at the end, we can't start with a '-' */
868                                         if (e_loc != last_saved_char)
869                                         {
870                                                 neg_sign_ok = 0;
871                                                 pos_sign_ok = 0;
872                                         }
873                                         // else leave it set to 1, i.e. start of the new input
874                                 }
875                         }
876 
877                         while (c && ((c >= '' && c <= '9') ||
878                                      (!is_exponent && (c == 'e' || c == 'E')) ||
879                                      (neg_sign_ok && c == '-') || (pos_sign_ok && c == '+') ||
880                                      (!tok->is_double && c == '.')))
881                         {
882                                 pos_sign_ok = neg_sign_ok = 0;
883                                 ++case_len;
884 
885                                 /* non-digit characters checks */
886                                 /* note: since the main loop condition to get here was
887                                  * an input starting with 0-9 or '-', we are
888                                  * protected from input starting with '.' or
889                                  * e/E.
890                                  */
891                                 switch (c)
892                                 {
893                                 case '.':
894                                         tok->is_double = 1;
895                                         pos_sign_ok = 1;
896                                         neg_sign_ok = 1;
897                                         break;
898                                 case 'e': /* FALLTHRU */
899                                 case 'E':
900                                         is_exponent = 1;
901                                         tok->is_double = 1;
902                                         /* the exponent part can begin with a negative sign */
903                                         pos_sign_ok = neg_sign_ok = 1;
904                                         break;
905                                 default: break;
906                                 }
907 
908                                 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
909                                 {
910                                         printbuf_memappend_fast(tok->pb, case_start, case_len);
911                                         goto out;
912                                 }
913                         }
914                         /*
915                                 Now we know c isn't a valid number char, but check whether
916                                 it might have been intended to be, and return a potentially
917                                 more understandable error right away.
918                                 However, if we're at the top-level, use the number as-is
919                             because c can be part of a new object to parse on the
920                                 next call to json_tokener_parse().
921                          */
922                         if (tok->depth > 0 && c != ',' && c != ']' && c != '}' && c != '/' &&
923                             c != 'I' && c != 'i' && !isspace((unsigned char)c))
924                         {
925                                 tok->err = json_tokener_error_parse_number;
926                                 goto out;
927                         }
928                         if (case_len > 0)
929                                 printbuf_memappend_fast(tok->pb, case_start, case_len);
930 
931                         // Check for -Infinity
932                         if (tok->pb->buf[0] == '-' && case_len <= 1 && (c == 'i' || c == 'I'))
933                         {
934                                 state = json_tokener_state_inf;
935                                 tok->st_pos = 0;
936                                 goto redo_char;
937                         }
938                         if (tok->is_double && !(tok->flags & JSON_TOKENER_STRICT))
939                         {
940                                 /* Trim some chars off the end, to allow things
941                                    like "123e+" to parse ok. */
942                                 while (printbuf_length(tok->pb) > 1)
943                                 {
944                                         char last_char = tok->pb->buf[printbuf_length(tok->pb) - 1];
945                                         if (last_char != 'e' && last_char != 'E' &&
946                                             last_char != '-' && last_char != '+')
947                                         {
948                                                 break;
949                                         }
950                                         tok->pb->buf[printbuf_length(tok->pb) - 1] = '\0';
951                                         printbuf_length(tok->pb)--;
952                                 }
953                         }
954                 }
955                         {
956                                 int64_t num64;
957                                 uint64_t numuint64;
958                                 double numd;
959                                 if (!tok->is_double && tok->pb->buf[0] == '-' &&
960                                     json_parse_int64(tok->pb->buf, &num64) == 0)
961                                 {
962                                         current = json_object_new_int64(num64);
963                                         if (current == NULL)
964                                                 goto out;
965                                 }
966                                 else if (!tok->is_double && tok->pb->buf[0] != '-' &&
967                                          json_parse_uint64(tok->pb->buf, &numuint64) == 0)
968                                 {
969                                         if (numuint64 && tok->pb->buf[0] == '' &&
970                                             (tok->flags & JSON_TOKENER_STRICT))
971                                         {
972                                                 tok->err = json_tokener_error_parse_number;
973                                                 goto out;
974                                         }
975                                         if (numuint64 <= INT64_MAX)
976                                         {
977                                                 num64 = (uint64_t)numuint64;
978                                                 current = json_object_new_int64(num64);
979                                                 if (current == NULL)
980                                                         goto out;
981                                         }
982                                         else
983                                         {
984                                                 current = json_object_new_uint64(numuint64);
985                                                 if (current == NULL)
986                                                         goto out;
987                                         }
988                                 }
989                                 else if (tok->is_double &&
990                                          json_tokener_parse_double(
991                                              tok->pb->buf, printbuf_length(tok->pb), &numd) == 0)
992                                 {
993                                         current = json_object_new_double_s(numd, tok->pb->buf);
994                                         if (current == NULL)
995                                                 goto out;
996                                 }
997                                 else
998                                 {
999                                         tok->err = json_tokener_error_parse_number;
1000                                         goto out;
1001                                 }
1002                                 saved_state = json_tokener_state_finish;
1003                                 state = json_tokener_state_eatws;
1004                                 goto redo_char;
1005                         }
1006                         break;
1007 
1008                 case json_tokener_state_array_after_sep:
1009                 case json_tokener_state_array:
1010                         if (c == ']')
1011                         {
1012                                 // Minimize memory usage; assume parsed objs are unlikely to be changed
1013                                 json_object_array_shrink(current, 0);
1014 
1015                                 if (state == json_tokener_state_array_after_sep &&
1016                                     (tok->flags & JSON_TOKENER_STRICT))
1017                                 {
1018                                         tok->err = json_tokener_error_parse_unexpected;
1019                                         goto out;
1020                                 }
1021                                 saved_state = json_tokener_state_finish;
1022                                 state = json_tokener_state_eatws;
1023                         }
1024                         else
1025                         {
1026                                 if (tok->depth >= tok->max_depth - 1)
1027                                 {
1028                                         tok->err = json_tokener_error_depth;
1029                                         goto out;
1030                                 }
1031                                 state = json_tokener_state_array_add;
1032                                 tok->depth++;
1033                                 json_tokener_reset_level(tok, tok->depth);
1034                                 goto redo_char;
1035                         }
1036                         break;
1037 
1038                 case json_tokener_state_array_add:
1039                         if (json_object_array_add(current, obj) != 0)
1040                                 goto out;
1041                         saved_state = json_tokener_state_array_sep;
1042                         state = json_tokener_state_eatws;
1043                         goto redo_char;
1044 
1045                 case json_tokener_state_array_sep:
1046                         if (c == ']')
1047                         {
1048                                 // Minimize memory usage; assume parsed objs are unlikely to be changed
1049                                 json_object_array_shrink(current, 0);
1050 
1051                                 saved_state = json_tokener_state_finish;
1052                                 state = json_tokener_state_eatws;
1053                         }
1054                         else if (c == ',')
1055                         {
1056                                 saved_state = json_tokener_state_array_after_sep;
1057                                 state = json_tokener_state_eatws;
1058                         }
1059                         else
1060                         {
1061                                 tok->err = json_tokener_error_parse_array;
1062                                 goto out;
1063                         }
1064                         break;
1065 
1066                 case json_tokener_state_object_field_start:
1067                 case json_tokener_state_object_field_start_after_sep:
1068                         if (c == '}')
1069                         {
1070                                 if (state == json_tokener_state_object_field_start_after_sep &&
1071                                     (tok->flags & JSON_TOKENER_STRICT))
1072                                 {
1073                                         tok->err = json_tokener_error_parse_unexpected;
1074                                         goto out;
1075                                 }
1076                                 saved_state = json_tokener_state_finish;
1077                                 state = json_tokener_state_eatws;
1078                         }
1079                         else if (c == '"' || c == '\'')
1080                         {
1081                                 tok->quote_char = c;
1082                                 printbuf_reset(tok->pb);
1083                                 state = json_tokener_state_object_field;
1084                         }
1085                         else
1086                         {
1087                                 tok->err = json_tokener_error_parse_object_key_name;
1088                                 goto out;
1089                         }
1090                         break;
1091 
1092                 case json_tokener_state_object_field:
1093                 {
1094                         /* Advance until we change state */
1095                         const char *case_start = str;
1096                         while (1)
1097                         {
1098                                 if (c == tok->quote_char)
1099                                 {
1100                                         printbuf_memappend_fast(tok->pb, case_start,
1101                                                                 str - case_start);
1102                                         obj_field_name = strdup(tok->pb->buf);
1103                                         saved_state = json_tokener_state_object_field_end;
1104                                         state = json_tokener_state_eatws;
1105                                         break;
1106                                 }
1107                                 else if (c == '\\')
1108                                 {
1109                                         printbuf_memappend_fast(tok->pb, case_start,
1110                                                                 str - case_start);
1111                                         saved_state = json_tokener_state_object_field;
1112                                         state = json_tokener_state_string_escape;
1113                                         break;
1114                                 }
1115                                 if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
1116                                 {
1117                                         printbuf_memappend_fast(tok->pb, case_start,
1118                                                                 str - case_start);
1119                                         goto out;
1120                                 }
1121                         }
1122                 }
1123                 break;
1124 
1125                 case json_tokener_state_object_field_end:
1126                         if (c == ':')
1127                         {
1128                                 saved_state = json_tokener_state_object_value;
1129                                 state = json_tokener_state_eatws;
1130                         }
1131                         else
1132                         {
1133                                 tok->err = json_tokener_error_parse_object_key_sep;
1134                                 goto out;
1135                         }
1136                         break;
1137 
1138                 case json_tokener_state_object_value:
1139                         if (tok->depth >= tok->max_depth - 1)
1140                         {
1141                                 tok->err = json_tokener_error_depth;
1142                                 goto out;
1143                         }
1144                         state = json_tokener_state_object_value_add;
1145                         tok->depth++;
1146                         json_tokener_reset_level(tok, tok->depth);
1147                         goto redo_char;
1148 
1149                 case json_tokener_state_object_value_add:
1150                         json_object_object_add(current, obj_field_name, obj);
1151                         free(obj_field_name);
1152                         obj_field_name = NULL;
1153                         saved_state = json_tokener_state_object_sep;
1154                         state = json_tokener_state_eatws;
1155                         goto redo_char;
1156 
1157                 case json_tokener_state_object_sep:
1158                         /* { */
1159                         if (c == '}')
1160                         {
1161                                 saved_state = json_tokener_state_finish;
1162                                 state = json_tokener_state_eatws;
1163                         }
1164                         else if (c == ',')
1165                         {
1166                                 saved_state = json_tokener_state_object_field_start_after_sep;
1167                                 state = json_tokener_state_eatws;
1168                         }
1169                         else
1170                         {
1171                                 tok->err = json_tokener_error_parse_object_value_sep;
1172                                 goto out;
1173                         }
1174                         break;
1175                 }
1176                 (void)ADVANCE_CHAR(str, tok);
1177                 if (!c) // This is the char *before* advancing
1178                         break;
1179         } /* while(PEEK_CHAR) */
1180 
1181 out:
1182         if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0))
1183         {
1184                 tok->err = json_tokener_error_parse_utf8_string;
1185         }
1186         if (c && (state == json_tokener_state_finish) && (tok->depth == 0) &&
1187             (tok->flags & (JSON_TOKENER_STRICT | JSON_TOKENER_ALLOW_TRAILING_CHARS)) ==
1188                 JSON_TOKENER_STRICT)
1189         {
1190                 /* unexpected char after JSON data */
1191                 tok->err = json_tokener_error_parse_unexpected;
1192         }
1193         if (!c)
1194         {
1195                 /* We hit an eof char (0) */
1196                 if (state != json_tokener_state_finish && saved_state != json_tokener_state_finish)
1197                         tok->err = json_tokener_error_parse_eof;
1198         }
1199 
1200 #ifdef HAVE_USELOCALE
1201         uselocale(oldlocale);
1202         freelocale(newloc);
1203 #elif defined(HAVE_SETLOCALE)
1204         setlocale(LC_NUMERIC, oldlocale);
1205         free(oldlocale);
1206 #endif
1207 
1208         if (tok->err == json_tokener_success)
1209         {
1210                 json_object *ret = json_object_get(current);
1211                 int ii;
1212 
1213                 /* Partially reset, so we parse additional objects on subsequent calls. */
1214                 for (ii = tok->depth; ii >= 0; ii--)
1215                         json_tokener_reset_level(tok, ii);
1216                 return ret;
1217         }
1218 
1219         MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", json_tokener_errors[tok->err],
1220                  tok->char_offset);
1221         return NULL;
1222 }
1223 
1224 static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes)
1225 {
1226         unsigned char chr = c;
1227         if (*nBytes == 0)
1228         {
1229                 if (chr >= 0x80)
1230                 {
1231                         if ((chr & 0xe0) == 0xc0)
1232                                 *nBytes = 1;
1233                         else if ((chr & 0xf0) == 0xe0)
1234                                 *nBytes = 2;
1235                         else if ((chr & 0xf8) == 0xf0)
1236                                 *nBytes = 3;
1237                         else
1238                                 return 0;
1239                 }
1240         }
1241         else
1242         {
1243                 if ((chr & 0xC0) != 0x80)
1244                         return 0;
1245                 (*nBytes)--;
1246         }
1247         return 1;
1248 }
1249 
1250 void json_tokener_set_flags(struct json_tokener *tok, int flags)
1251 {
1252         tok->flags = flags;
1253 }
1254 
1255 size_t json_tokener_get_parse_end(struct json_tokener *tok)
1256 {
1257         assert(tok->char_offset >= 0); /* Drop this line when char_offset becomes a size_t */
1258         return (size_t)tok->char_offset;
1259 }
1260 
1261 static int json_tokener_parse_double(const char *buf, int len, double *retval)
1262 {
1263         char *end;
1264         *retval = strtod(buf, &end);
1265         if (buf + len == end)
1266                 return 0; // It worked
1267         return 1;
1268 }
1269
This page was automatically generated by LXR 0.3.1. • OpenWrt
OpenWrt.org Cross Reference

Sources/json-c/json_tokener.c