Annotation of parser3/src/lib/json/JSON_parser.C, revision 1.2
1.1 misha 1: /*
2: Copyright (c) 2005 JSON.org
3:
4: Permission is hereby granted, free of charge, to any person obtaining a copy
5: of this software and associated documentation files (the "Software"), to deal
6: in the Software without restriction, including without limitation the rights
7: to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8: copies of the Software, and to permit persons to whom the Software is
9: furnished to do so, subject to the following conditions:
10:
11: The above copyright notice and this permission notice shall be included in all
12: copies or substantial portions of the Software.
13:
14: The Software shall be used for Good, not Evil.
15:
16: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17: IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18: FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19: AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20: LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21: OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22: SOFTWARE.
23: */
24:
25: /*
26: Callbacks, comments, Unicode handling by Jean Gressmann (jean@0x42.de), 2007-2010.
27:
28: Changelog:
29: 2010-05-07
30: Added error handling for memory allocation failure (sgbeal@googlemail.com).
31: Added diagnosis errors for invalid JSON.
32:
33: 2010-03-25
34: Fixed buffer overrun in grow_parse_buffer & cleaned up code.
35:
36: 2009-10-19
37: Replaced long double in JSON_value_struct with double after reports
38: of strtold being broken on some platforms (charles@transmissionbt.com).
39:
40: 2009-05-17
41: Incorporated benrudiak@googlemail.com fix for UTF16 decoding.
42:
43: 2009-05-14
44: Fixed float parsing bug related to a locale being set that didn't
45: use '.' as decimal point character (charles@transmissionbt.com).
46:
47: 2008-10-14
48: Renamed states.IN to states.IT to avoid name clash which IN macro
49: defined in windef.h (alexey.pelykh@gmail.com)
50:
51: 2008-07-19
52: Removed some duplicate code & debugging variable (charles@transmissionbt.com)
53:
54: 2008-05-28
55: Made JSON_value structure ansi C compliant. This bug was report by
56: trisk@acm.jhu.edu
57:
58: 2008-05-20
59: Fixed bug reported by charles@transmissionbt.com where the switching
60: from static to dynamic parse buffer did not copy the static parse
61: buffer's content.
62: */
63:
64:
65:
66: #include <assert.h>
67: #include <ctype.h>
68: #include <float.h>
69: #include <stddef.h>
70: #include <stdio.h>
71: #include <stdlib.h>
72: #include <string.h>
73: #include <locale.h>
74:
75: #include "JSON_parser.h"
76:
77: #ifdef _MSC_VER
78: # if _MSC_VER >= 1400 /* Visual Studio 2005 and up */
79: # pragma warning(disable:4996) // unsecure sscanf
80: # pragma warning(disable:4127) // conditional expression is constant
81: # endif
82: #endif
83:
84:
85: #define true 1
86: #define false 0
87: #define __ -1 /* the universal error code */
88:
89: /* values chosen so that the object size is approx equal to one page (4K) */
90: #ifndef JSON_PARSER_STACK_SIZE
91: # define JSON_PARSER_STACK_SIZE 128
92: #endif
93:
94: #ifndef JSON_PARSER_PARSE_BUFFER_SIZE
95: # define JSON_PARSER_PARSE_BUFFER_SIZE 3500
96: #endif
97:
98: #ifdef JSON_PARSER_DEBUG_MALLOC
99: # define JSON_parser_malloc JSON_parser_debug_malloc
100: # define JSON_parser_free JSON_parser_debug_free
101: #else
102: # define JSON_parser_malloc(bytes, reason) malloc(bytes)
103: # define JSON_parser_free free
104: #endif
105:
106: extern void* JSON_parser_debug_malloc(size_t bytes, const char* reason);
107: extern void JSON_parser_debug_free(void*);
108:
109: typedef unsigned short UTF16;
110:
111: struct JSON_parser_struct {
112: JSON_parser_callback callback;
113: void* ctx;
114: signed char state, before_comment_state, type, escaped, comment, allow_comments, handle_floats_manually, error;
115: char decimal_point;
116: UTF16 utf16_high_surrogate;
117: int current_char;
118: int depth;
119: int top;
120: int stack_capacity;
121: signed char* stack;
122: char* parse_buffer;
123: size_t parse_buffer_capacity;
124: size_t parse_buffer_count;
125: signed char static_stack[JSON_PARSER_STACK_SIZE];
126: char static_parse_buffer[JSON_PARSER_PARSE_BUFFER_SIZE];
127: };
128:
129: #define COUNTOF(x) (sizeof(x)/sizeof(x[0]))
130:
131: /*
132: Characters are mapped into these character classes. This allows for
133: a significant reduction in the size of the state transition table.
134: */
135:
136:
137:
138: enum classes {
139: C_SPACE, /* space */
140: C_WHITE, /* other whitespace */
141: C_LCURB, /* { */
142: C_RCURB, /* } */
143: C_LSQRB, /* [ */
144: C_RSQRB, /* ] */
145: C_COLON, /* : */
146: C_COMMA, /* , */
147: C_QUOTE, /* " */
148: C_BACKS, /* \ */
149: C_SLASH, /* / */
150: C_PLUS, /* + */
151: C_MINUS, /* - */
152: C_POINT, /* . */
153: C_ZERO , /* 0 */
154: C_DIGIT, /* 123456789 */
155: C_LOW_A, /* a */
156: C_LOW_B, /* b */
157: C_LOW_C, /* c */
158: C_LOW_D, /* d */
159: C_LOW_E, /* e */
160: C_LOW_F, /* f */
161: C_LOW_L, /* l */
162: C_LOW_N, /* n */
163: C_LOW_R, /* r */
164: C_LOW_S, /* s */
165: C_LOW_T, /* t */
166: C_LOW_U, /* u */
167: C_ABCDF, /* ABCDF */
168: C_E, /* E */
169: C_ETC, /* everything else */
170: C_STAR, /* * */
171: NR_CLASSES
172: };
173:
174: static signed char ascii_class[128] = {
175: /*
176: This array maps the 128 ASCII characters into character classes.
177: The remaining Unicode characters should be mapped to C_ETC.
178: Non-whitespace control characters are errors.
179: */
180: __, __, __, __, __, __, __, __,
181: __, C_WHITE, C_WHITE, __, __, C_WHITE, __, __,
182: __, __, __, __, __, __, __, __,
183: __, __, __, __, __, __, __, __,
184:
185: C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
186: C_ETC, C_ETC, C_STAR, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH,
187: C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
188: C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
189:
190: C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC,
191: C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
192: C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
193: C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC,
194:
195: C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC,
196: C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC,
197: C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC,
198: C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC
199: };
200:
201:
202: /*
203: The state codes.
204: */
205: enum states {
206: GO, /* start */
207: OK, /* ok */
208: OB, /* object */
209: KE, /* key */
210: CO, /* colon */
211: VA, /* value */
212: AR, /* array */
213: ST, /* string */
214: ES, /* escape */
215: U1, /* u1 */
216: U2, /* u2 */
217: U3, /* u3 */
218: U4, /* u4 */
219: MI, /* minus */
220: ZE, /* zero */
221: IT, /* integer */
222: FR, /* fraction */
223: E1, /* e */
224: E2, /* ex */
225: E3, /* exp */
226: T1, /* tr */
227: T2, /* tru */
228: T3, /* true */
229: F1, /* fa */
230: F2, /* fal */
231: F3, /* fals */
232: F4, /* false */
233: N1, /* nu */
234: N2, /* nul */
235: N3, /* null */
236: C1, /* / */
237: C2, /* / * */
238: C3, /* * */
239: FX, /* *.* *eE* */
240: D1, /* second UTF-16 character decoding started by \ */
241: D2, /* second UTF-16 character proceeded by u */
242: NR_STATES
243: };
244:
245: enum actions
246: {
247: CB = -10, /* comment begin */
248: CE = -11, /* comment end */
249: FA = -12, /* false */
250: TR = -13, /* false */
251: NU = -14, /* null */
252: DE = -15, /* double detected by exponent e E */
253: DF = -16, /* double detected by fraction . */
254: SB = -17, /* string begin */
255: MX = -18, /* integer detected by minus */
256: ZX = -19, /* integer detected by zero */
257: IX = -20, /* integer detected by 1-9 */
258: EX = -21, /* next char is escaped */
259: UC = -22 /* Unicode character read */
260: };
261:
262:
263: static signed char state_transition_table[NR_STATES][NR_CLASSES] = {
264: /*
265: The state transition table takes the current state and the current symbol,
266: and returns either a new state or an action. An action is represented as a
267: negative number. A JSON text is accepted if at the end of the text the
268: state is OK and if the mode is MODE_DONE.
269:
270: white 1-9 ABCDF etc
271: space | { } [ ] : , " \ / + - . 0 | a b c d e f l n r s t u | E | * */
272: /*start GO*/ {GO,GO,-6,__,-5,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
273: /*ok OK*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
274: /*object OB*/ {OB,OB,__,-9,__,__,__,__,SB,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
275: /*key KE*/ {KE,KE,__,__,__,__,__,__,SB,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
276: /*colon CO*/ {CO,CO,__,__,__,__,-2,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
277: /*value VA*/ {VA,VA,-6,__,-5,__,__,__,SB,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,FA,__,NU,__,__,TR,__,__,__,__,__},
278: /*array AR*/ {AR,AR,-6,__,-5,-7,__,__,SB,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,FA,__,NU,__,__,TR,__,__,__,__,__},
279: /*string ST*/ {ST,__,ST,ST,ST,ST,ST,ST,-4,EX,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST},
280: /*escape ES*/ {__,__,__,__,__,__,__,__,ST,ST,ST,__,__,__,__,__,__,ST,__,__,__,ST,__,ST,ST,__,ST,U1,__,__,__,__},
281: /*u1 U1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__,__},
282: /*u2 U2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__,__},
283: /*u3 U3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__,__},
284: /*u4 U4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,UC,UC,UC,UC,UC,UC,UC,UC,__,__,__,__,__,__,UC,UC,__,__},
285: /*minus MI*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ZE,IT,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
286: /*zero ZE*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,DF,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
287: /*int IT*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,DF,IT,IT,__,__,__,__,DE,__,__,__,__,__,__,__,__,DE,__,__},
288: /*frac FR*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__,__},
289: /*e E1*/ {__,__,__,__,__,__,__,__,__,__,__,E2,E2,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
290: /*ex E2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
291: /*exp E3*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
292: /*tr T1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__,__},
293: /*tru T2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__,__},
294: /*true T3*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__,__},
295: /*fa F1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
296: /*fal F2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__,__},
297: /*fals F3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__,__},
298: /*false F4*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__,__},
299: /*nu N1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__,__},
300: /*nul N2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__,__},
301: /*null N3*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__},
302: /*/ C1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,C2},
303: /*/* C2*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3},
304: /** C3*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,CE,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3},
305: /*_. FX*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__,__},
306: /*\ D1*/ {__,__,__,__,__,__,__,__,__,D2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
307: /*\ D2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U1,__,__,__,__},
308: };
309:
310:
311: /*
312: These modes can be pushed on the stack.
313: */
314: enum modes {
315: MODE_ARRAY = 1,
316: MODE_DONE = 2,
317: MODE_KEY = 3,
318: MODE_OBJECT = 4
319: };
320:
321: static void set_error(JSON_parser jc)
322: {
323: switch (jc->state) {
324: case GO:
325: switch (jc->current_char) {
326: case '{': case '}': case '[': case ']':
327: jc->error = JSON_E_UNBALANCED_COLLECTION;
328: break;
329: default:
330: jc->error = JSON_E_INVALID_CHAR;
331: break;
332: }
333: break;
334: case OB:
335: jc->error = JSON_E_EXPECTED_KEY;
336: break;
337: case AR:
338: jc->error = JSON_E_UNBALANCED_COLLECTION;
339: break;
340: case CO:
341: jc->error = JSON_E_EXPECTED_COLON;
342: break;
343: case KE:
344: jc->error = JSON_E_EXPECTED_KEY;
345: break;
346: /* \uXXXX\uYYYY */
347: case U1: case U2: case U3:case U4: case D1: case D2:
348: jc->error = JSON_E_INVALID_UNICODE_SEQUENCE;
349: break;
350: /* true, false, null */
351: case T1: case T2: case T3: case F1: case F2: case F3: case F4: case N1: case N2: case N3:
352: jc->error = JSON_E_INVALID_KEYWORD;
353: break;
354: /* minus, integer, fraction, exponent */
355: case MI: case ZE: case IT: case FR: case E1: case E2: case E3:
356: jc->error = JSON_E_INVALID_NUMBER;
357: break;
358: default:
359: jc->error = JSON_E_INVALID_CHAR;
360: break;
361: }
362: }
363:
364: static int
365: push(JSON_parser jc, int mode)
366: {
367: /*
368: Push a mode onto the stack. Return false if there is overflow.
369: */
370: assert(jc->top <= jc->stack_capacity);
371:
372: if (jc->depth < 0) {
373: if (jc->top == jc->stack_capacity) {
374: const size_t bytes_to_copy = jc->stack_capacity * sizeof(jc->stack[0]);
375: const size_t new_capacity = jc->stack_capacity * 2;
376: const size_t bytes_to_allocate = new_capacity * sizeof(jc->stack[0]);
377: void* mem = JSON_parser_malloc(bytes_to_allocate, "stack");
378: if (!mem) {
379: jc->error = JSON_E_OUT_OF_MEMORY;
380: return false;
381: }
382: jc->stack_capacity = (int)new_capacity;
383: memcpy(mem, jc->stack, bytes_to_copy);
384: if (jc->stack != &jc->static_stack[0]) {
385: JSON_parser_free(jc->stack);
386: }
387: jc->stack = (signed char*)mem;
388: }
389: } else {
390: if (jc->top == jc->depth) {
391: jc->error = JSON_E_NESTING_DEPTH_REACHED;
392: return false;
393: }
394: }
395: jc->stack[++jc->top] = (signed char)mode;
396: return true;
397: }
398:
399:
400: static int
401: pop(JSON_parser jc, int mode)
402: {
403: /*
404: Pop the stack, assuring that the current mode matches the expectation.
405: Return false if there is underflow or if the modes mismatch.
406: */
407: if (jc->top < 0 || jc->stack[jc->top] != mode) {
408: return false;
409: }
410: jc->top -= 1;
411: return true;
412: }
413:
414:
415: #define parse_buffer_clear(jc) \
416: do {\
417: jc->parse_buffer_count = 0;\
418: jc->parse_buffer[0] = 0;\
419: } while (0)
420:
421: #define parse_buffer_pop_back_char(jc)\
422: do {\
423: assert(jc->parse_buffer_count >= 1);\
424: --jc->parse_buffer_count;\
425: jc->parse_buffer[jc->parse_buffer_count] = 0;\
426: } while (0)
427:
428: void delete_JSON_parser(JSON_parser jc)
429: {
430: if (jc) {
431: if (jc->stack != &jc->static_stack[0]) {
432: JSON_parser_free((void*)jc->stack);
433: }
434: if (jc->parse_buffer != &jc->static_parse_buffer[0]) {
435: JSON_parser_free((void*)jc->parse_buffer);
436: }
437: JSON_parser_free((void*)jc);
438: }
439: }
440:
441:
442: JSON_parser
443: new_JSON_parser(JSON_config* config)
444: {
445: /*
446: new_JSON_parser starts the checking process by constructing a JSON_parser
447: object. It takes a depth parameter that restricts the level of maximum
448: nesting.
449:
450: To continue the process, call JSON_parser_char for each character in the
451: JSON text, and then call JSON_parser_done to obtain the final result.
452: These functions are fully reentrant.
453: */
454:
455: int depth = 0;
456: JSON_config default_config;
457:
1.2 ! moko 458: JSON_parser jc = (JSON_parser)JSON_parser_malloc(sizeof(struct JSON_parser_struct), "parser");
1.1 misha 459:
460: if (jc == NULL) {
461: return NULL;
462: }
463:
464: memset(jc, 0, sizeof(*jc));
465:
466: /* initialize configuration */
467: init_JSON_config(&default_config);
468:
469: /* set to default configuration if none was provided */
470: if (config == NULL) {
471: config = &default_config;
472: }
473:
474: depth = config->depth;
475:
476: /* We need to be able to push at least one object */
477: if (depth == 0) {
478: depth = 1;
479: }
480:
481: jc->state = GO;
482: jc->top = -1;
483:
484: /* Do we want non-bound stack? */
485: if (depth > 0) {
486: jc->stack_capacity = depth;
487: jc->depth = depth;
488: if (depth <= (int)COUNTOF(jc->static_stack)) {
489: jc->stack = &jc->static_stack[0];
490: } else {
491: jc->stack = (signed char*)JSON_parser_malloc(jc->stack_capacity * sizeof(jc->stack[0]), "stack");
492: if (jc->stack == NULL) {
493: JSON_parser_free(jc);
494: return NULL;
495: }
496: }
497: } else {
498: jc->stack_capacity = (int)COUNTOF(jc->static_stack);
499: jc->depth = -1;
500: jc->stack = &jc->static_stack[0];
501: }
502:
503: /* set parser to start */
504: push(jc, MODE_DONE);
505:
506: /* set up the parse buffer */
507: jc->parse_buffer = &jc->static_parse_buffer[0];
508: jc->parse_buffer_capacity = COUNTOF(jc->static_parse_buffer);
509: parse_buffer_clear(jc);
510:
511: /* set up callback, comment & float handling */
512: jc->callback = config->callback;
513: jc->ctx = config->callback_ctx;
514: jc->allow_comments = (signed char)config->allow_comments != 0;
515: jc->handle_floats_manually = (signed char)config->handle_floats_manually != 0;
516:
517: /* set up decimal point */
518: jc->decimal_point = *localeconv()->decimal_point;
519:
520: return jc;
521: }
522:
523: static int parse_buffer_grow(JSON_parser jc)
524: {
525: const size_t bytes_to_copy = jc->parse_buffer_count * sizeof(jc->parse_buffer[0]);
526: const size_t new_capacity = jc->parse_buffer_capacity * 2;
527: const size_t bytes_to_allocate = new_capacity * sizeof(jc->parse_buffer[0]);
528: void* mem = JSON_parser_malloc(bytes_to_allocate, "parse buffer");
529:
530: if (mem == NULL) {
531: jc->error = JSON_E_OUT_OF_MEMORY;
532: return false;
533: }
534:
535: assert(new_capacity > 0);
536: memcpy(mem, jc->parse_buffer, bytes_to_copy);
537:
538: if (jc->parse_buffer != &jc->static_parse_buffer[0]) {
539: JSON_parser_free(jc->parse_buffer);
540: }
541:
542: jc->parse_buffer = (char*)mem;
543: jc->parse_buffer_capacity = new_capacity;
544:
545: return true;
546: }
547:
548: static int parse_buffer_reserve_for(JSON_parser jc, unsigned chars)
549: {
550: while (jc->parse_buffer_count + chars + 1 > jc->parse_buffer_capacity) {
551: if (!parse_buffer_grow(jc)) {
552: assert(jc->error == JSON_E_OUT_OF_MEMORY);
553: return false;
554: }
555: }
556:
557: return true;
558: }
559:
560: #define parse_buffer_has_space_for(jc, count) \
561: (jc->parse_buffer_count + (count) + 1 <= jc->parse_buffer_capacity)
562:
563: #define parse_buffer_push_back_char(jc, c)\
564: do {\
565: assert(parse_buffer_has_space_for(jc, 1)); \
566: jc->parse_buffer[jc->parse_buffer_count++] = c;\
567: jc->parse_buffer[jc->parse_buffer_count] = 0;\
568: } while (0)
569:
570: #define assert_is_non_container_type(jc) \
571: assert( \
572: jc->type == JSON_T_NULL || \
573: jc->type == JSON_T_FALSE || \
574: jc->type == JSON_T_TRUE || \
575: jc->type == JSON_T_FLOAT || \
576: jc->type == JSON_T_INTEGER || \
577: jc->type == JSON_T_STRING)
578:
579:
580: static int parse_parse_buffer(JSON_parser jc)
581: {
582: if (jc->callback) {
583: JSON_value value, *arg = NULL;
584:
585: if (jc->type != JSON_T_NONE) {
586: assert_is_non_container_type(jc);
587:
588: switch(jc->type) {
589: case JSON_T_FLOAT:
590: arg = &value;
591: if (jc->handle_floats_manually) {
592: value.vu.str.value = jc->parse_buffer;
593: value.vu.str.length = jc->parse_buffer_count;
594: } else {
595: /* not checking with end pointer b/c there may be trailing ws */
596: value.vu.float_value = strtod(jc->parse_buffer, NULL);
597: }
598: break;
599: case JSON_T_INTEGER:
600: arg = &value;
601: sscanf(jc->parse_buffer, JSON_PARSER_INTEGER_SSCANF_TOKEN, &value.vu.integer_value);
602: break;
603: case JSON_T_STRING:
604: arg = &value;
605: value.vu.str.value = jc->parse_buffer;
606: value.vu.str.length = jc->parse_buffer_count;
607: break;
608: }
609:
610: if (!(*jc->callback)(jc->ctx, jc->type, arg)) {
611: return false;
612: }
613: }
614: }
615:
616: parse_buffer_clear(jc);
617:
618: return true;
619: }
620:
621: #define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
622: #define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
623: #define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
624: static unsigned char utf8_lead_bits[4] = { 0x00, 0xC0, 0xE0, 0xF0 };
625:
626: static int decode_unicode_char(JSON_parser jc)
627: {
628: int i;
629: unsigned uc = 0;
630: char* p;
631: int trail_bytes;
632:
633: assert(jc->parse_buffer_count >= 6);
634:
635: p = &jc->parse_buffer[jc->parse_buffer_count - 4];
636:
637: for (i = 12; i >= 0; i -= 4, ++p) {
638: unsigned x = *p;
639:
640: if (x >= 'a') {
641: x -= ('a' - 10);
642: } else if (x >= 'A') {
643: x -= ('A' - 10);
644: } else {
645: x &= ~0x30u;
646: }
647:
648: assert(x < 16);
649:
650: uc |= x << i;
651: }
652:
653: /* clear UTF-16 char from buffer */
654: jc->parse_buffer_count -= 6;
655: jc->parse_buffer[jc->parse_buffer_count] = 0;
656:
657: /* attempt decoding ... */
658: if (jc->utf16_high_surrogate) {
659: if (IS_LOW_SURROGATE(uc)) {
660: uc = DECODE_SURROGATE_PAIR(jc->utf16_high_surrogate, uc);
661: trail_bytes = 3;
662: jc->utf16_high_surrogate = 0;
663: } else {
664: /* high surrogate without a following low surrogate */
665: return false;
666: }
667: } else {
668: if (uc < 0x80) {
669: trail_bytes = 0;
670: } else if (uc < 0x800) {
671: trail_bytes = 1;
672: } else if (IS_HIGH_SURROGATE(uc)) {
673: /* save the high surrogate and wait for the low surrogate */
674: jc->utf16_high_surrogate = (UTF16)uc;
675: return true;
676: } else if (IS_LOW_SURROGATE(uc)) {
677: /* low surrogate without a preceding high surrogate */
678: return false;
679: } else {
680: trail_bytes = 2;
681: }
682: }
683:
684: jc->parse_buffer[jc->parse_buffer_count++] = (char) ((uc >> (trail_bytes * 6)) | utf8_lead_bits[trail_bytes]);
685:
686: for (i = trail_bytes * 6 - 6; i >= 0; i -= 6) {
687: jc->parse_buffer[jc->parse_buffer_count++] = (char) (((uc >> i) & 0x3F) | 0x80);
688: }
689:
690: jc->parse_buffer[jc->parse_buffer_count] = 0;
691:
692: return true;
693: }
694:
695: static int add_escaped_char_to_parse_buffer(JSON_parser jc, int next_char)
696: {
697: assert(parse_buffer_has_space_for(jc, 1));
698:
699: jc->escaped = 0;
700: /* remove the backslash */
701: parse_buffer_pop_back_char(jc);
702: switch(next_char) {
703: case 'b':
704: parse_buffer_push_back_char(jc, '\b');
705: break;
706: case 'f':
707: parse_buffer_push_back_char(jc, '\f');
708: break;
709: case 'n':
710: parse_buffer_push_back_char(jc, '\n');
711: break;
712: case 'r':
713: parse_buffer_push_back_char(jc, '\r');
714: break;
715: case 't':
716: parse_buffer_push_back_char(jc, '\t');
717: break;
718: case '"':
719: parse_buffer_push_back_char(jc, '"');
720: break;
721: case '\\':
722: parse_buffer_push_back_char(jc, '\\');
723: break;
724: case '/':
725: parse_buffer_push_back_char(jc, '/');
726: break;
727: case 'u':
728: parse_buffer_push_back_char(jc, '\\');
729: parse_buffer_push_back_char(jc, 'u');
730: break;
731: default:
732: return false;
733: }
734:
735: return true;
736: }
737:
738: static int add_char_to_parse_buffer(JSON_parser jc, int next_char, int next_class)
739: {
740: if (!parse_buffer_reserve_for(jc, 1)) {
741: assert(JSON_E_OUT_OF_MEMORY == jc->error);
742: return false;
743: }
744:
745: if (jc->escaped) {
746: if (!add_escaped_char_to_parse_buffer(jc, next_char)) {
747: jc->error = JSON_E_INVALID_ESCAPE_SEQUENCE;
748: return false;
749: }
750: } else if (!jc->comment) {
751: if ((jc->type != JSON_T_NONE) | !((next_class == C_SPACE) | (next_class == C_WHITE)) /* non-white-space */) {
752: parse_buffer_push_back_char(jc, (char)next_char);
753: }
754: }
755:
756: return true;
757: }
758:
759: #define assert_type_isnt_string_null_or_bool(jc) \
760: assert(jc->type != JSON_T_FALSE); \
761: assert(jc->type != JSON_T_TRUE); \
762: assert(jc->type != JSON_T_NULL); \
763: assert(jc->type != JSON_T_STRING)
764:
765:
766: int
767: JSON_parser_char(JSON_parser jc, int next_char)
768: {
769: /*
770: After calling new_JSON_parser, call this function for each character (or
771: partial character) in your JSON text. It can accept UTF-8, UTF-16, or
772: UTF-32. It returns true if things are looking ok so far. If it rejects the
773: text, it returns false.
774: */
775: int next_class, next_state;
776:
777: /*
778: Store the current char for error handling
779: */
780: jc->current_char = next_char;
781:
782: /*
783: Determine the character's class.
784: */
785: if (next_char < 0) {
786: jc->error = JSON_E_INVALID_CHAR;
787: return false;
788: }
789: if (next_char >= 128) {
790: next_class = C_ETC;
791: } else {
792: next_class = ascii_class[next_char];
793: if (next_class <= __) {
794: set_error(jc);
795: return false;
796: }
797: }
798:
799: if (!add_char_to_parse_buffer(jc, next_char, next_class)) {
800: return false;
801: }
802:
803: /*
804: Get the next state from the state transition table.
805: */
806: next_state = state_transition_table[jc->state][next_class];
807: if (next_state >= 0) {
808: /*
809: Change the state.
810: */
811: jc->state = (signed char)next_state;
812: } else {
813: /*
814: Or perform one of the actions.
815: */
816: switch (next_state) {
817: /* Unicode character */
818: case UC:
819: if(!decode_unicode_char(jc)) {
820: jc->error = JSON_E_INVALID_UNICODE_SEQUENCE;
821: return false;
822: }
823: /* check if we need to read a second UTF-16 char */
824: if (jc->utf16_high_surrogate) {
825: jc->state = D1;
826: } else {
827: jc->state = ST;
828: }
829: break;
830: /* escaped char */
831: case EX:
832: jc->escaped = 1;
833: jc->state = ES;
834: break;
835: /* integer detected by minus */
836: case MX:
837: jc->type = JSON_T_INTEGER;
838: jc->state = MI;
839: break;
840: /* integer detected by zero */
841: case ZX:
842: jc->type = JSON_T_INTEGER;
843: jc->state = ZE;
844: break;
845: /* integer detected by 1-9 */
846: case IX:
847: jc->type = JSON_T_INTEGER;
848: jc->state = IT;
849: break;
850:
851: /* floating point number detected by exponent*/
852: case DE:
853: assert_type_isnt_string_null_or_bool(jc);
854: jc->type = JSON_T_FLOAT;
855: jc->state = E1;
856: break;
857:
858: /* floating point number detected by fraction */
859: case DF:
860: assert_type_isnt_string_null_or_bool(jc);
861: if (!jc->handle_floats_manually) {
862: /*
863: Some versions of strtod (which underlies sscanf) don't support converting
864: C-locale formated floating point values.
865: */
866: assert(jc->parse_buffer[jc->parse_buffer_count-1] == '.');
867: jc->parse_buffer[jc->parse_buffer_count-1] = jc->decimal_point;
868: }
869: jc->type = JSON_T_FLOAT;
870: jc->state = FX;
871: break;
872: /* string begin " */
873: case SB:
874: parse_buffer_clear(jc);
875: assert(jc->type == JSON_T_NONE);
876: jc->type = JSON_T_STRING;
877: jc->state = ST;
878: break;
879:
880: /* n */
881: case NU:
882: assert(jc->type == JSON_T_NONE);
883: jc->type = JSON_T_NULL;
884: jc->state = N1;
885: break;
886: /* f */
887: case FA:
888: assert(jc->type == JSON_T_NONE);
889: jc->type = JSON_T_FALSE;
890: jc->state = F1;
891: break;
892: /* t */
893: case TR:
894: assert(jc->type == JSON_T_NONE);
895: jc->type = JSON_T_TRUE;
896: jc->state = T1;
897: break;
898:
899: /* closing comment */
900: case CE:
901: jc->comment = 0;
902: assert(jc->parse_buffer_count == 0);
903: assert(jc->type == JSON_T_NONE);
904: jc->state = jc->before_comment_state;
905: break;
906:
907: /* opening comment */
908: case CB:
909: if (!jc->allow_comments) {
910: return false;
911: }
912: parse_buffer_pop_back_char(jc);
913: if (!parse_parse_buffer(jc)) {
914: return false;
915: }
916: assert(jc->parse_buffer_count == 0);
917: assert(jc->type != JSON_T_STRING);
918: switch (jc->stack[jc->top]) {
919: case MODE_ARRAY:
920: case MODE_OBJECT:
921: switch(jc->state) {
922: case VA:
923: case AR:
924: jc->before_comment_state = jc->state;
925: break;
926: default:
927: jc->before_comment_state = OK;
928: break;
929: }
930: break;
931: default:
932: jc->before_comment_state = jc->state;
933: break;
934: }
935: jc->type = JSON_T_NONE;
936: jc->state = C1;
937: jc->comment = 1;
938: break;
939: /* empty } */
940: case -9:
941: parse_buffer_clear(jc);
942: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) {
943: return false;
944: }
945: if (!pop(jc, MODE_KEY)) {
946: return false;
947: }
948: jc->state = OK;
949: break;
950:
951: /* } */ case -8:
952: parse_buffer_pop_back_char(jc);
953: if (!parse_parse_buffer(jc)) {
954: return false;
955: }
956: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) {
957: return false;
958: }
959: if (!pop(jc, MODE_OBJECT)) {
960: jc->error = JSON_E_UNBALANCED_COLLECTION;
961: return false;
962: }
963: jc->type = JSON_T_NONE;
964: jc->state = OK;
965: break;
966:
967: /* ] */ case -7:
968: parse_buffer_pop_back_char(jc);
969: if (!parse_parse_buffer(jc)) {
970: return false;
971: }
972: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_ARRAY_END, NULL)) {
973: return false;
974: }
975: if (!pop(jc, MODE_ARRAY)) {
976: jc->error = JSON_E_UNBALANCED_COLLECTION;
977: return false;
978: }
979:
980: jc->type = JSON_T_NONE;
981: jc->state = OK;
982: break;
983:
984: /* { */ case -6:
985: parse_buffer_pop_back_char(jc);
986: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_BEGIN, NULL)) {
987: return false;
988: }
989: if (!push(jc, MODE_KEY)) {
990: return false;
991: }
992: assert(jc->type == JSON_T_NONE);
993: jc->state = OB;
994: break;
995:
996: /* [ */ case -5:
997: parse_buffer_pop_back_char(jc);
998: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_ARRAY_BEGIN, NULL)) {
999: return false;
1000: }
1001: if (!push(jc, MODE_ARRAY)) {
1002: return false;
1003: }
1004: assert(jc->type == JSON_T_NONE);
1005: jc->state = AR;
1006: break;
1007:
1008: /* string end " */ case -4:
1009: parse_buffer_pop_back_char(jc);
1010: switch (jc->stack[jc->top]) {
1011: case MODE_KEY:
1012: assert(jc->type == JSON_T_STRING);
1013: jc->type = JSON_T_NONE;
1014: jc->state = CO;
1015:
1016: if (jc->callback) {
1017: JSON_value value;
1018: value.vu.str.value = jc->parse_buffer;
1019: value.vu.str.length = jc->parse_buffer_count;
1020: if (!(*jc->callback)(jc->ctx, JSON_T_KEY, &value)) {
1021: return false;
1022: }
1023: }
1024: parse_buffer_clear(jc);
1025: break;
1026: case MODE_ARRAY:
1027: case MODE_OBJECT:
1028: assert(jc->type == JSON_T_STRING);
1029: if (!parse_parse_buffer(jc)) {
1030: return false;
1031: }
1032: jc->type = JSON_T_NONE;
1033: jc->state = OK;
1034: break;
1035: default:
1036: return false;
1037: }
1038: break;
1039:
1040: /* , */ case -3:
1041: parse_buffer_pop_back_char(jc);
1042: if (!parse_parse_buffer(jc)) {
1043: return false;
1044: }
1045: switch (jc->stack[jc->top]) {
1046: case MODE_OBJECT:
1047: /*
1048: A comma causes a flip from object mode to key mode.
1049: */
1050: if (!pop(jc, MODE_OBJECT) || !push(jc, MODE_KEY)) {
1051: return false;
1052: }
1053: assert(jc->type != JSON_T_STRING);
1054: jc->type = JSON_T_NONE;
1055: jc->state = KE;
1056: break;
1057: case MODE_ARRAY:
1058: assert(jc->type != JSON_T_STRING);
1059: jc->type = JSON_T_NONE;
1060: jc->state = VA;
1061: break;
1062: default:
1063: return false;
1064: }
1065: break;
1066:
1067: /* : */ case -2:
1068: /*
1069: A colon causes a flip from key mode to object mode.
1070: */
1071: parse_buffer_pop_back_char(jc);
1072: if (!pop(jc, MODE_KEY) || !push(jc, MODE_OBJECT)) {
1073: return false;
1074: }
1075: assert(jc->type == JSON_T_NONE);
1076: jc->state = VA;
1077: break;
1078: /*
1079: Bad action.
1080: */
1081: default:
1082: set_error(jc);
1083: return false;
1084: }
1085: }
1086: return true;
1087: }
1088:
1089: int
1090: JSON_parser_done(JSON_parser jc)
1091: {
1092: if ((jc->state == OK || jc->state == GO) && pop(jc, MODE_DONE))
1093: {
1094: return true;
1095: }
1096:
1097: jc->error = JSON_E_UNBALANCED_COLLECTION;
1098: return false;
1099: }
1100:
1101:
1102: int JSON_parser_is_legal_white_space_string(const char* s)
1103: {
1104: int c, char_class;
1105:
1106: if (s == NULL) {
1107: return false;
1108: }
1109:
1110: for (; *s; ++s) {
1111: c = *s;
1112:
1113: if (c < 0 || c >= 128) {
1114: return false;
1115: }
1116:
1117: char_class = ascii_class[c];
1118:
1119: if (char_class != C_SPACE && char_class != C_WHITE) {
1120: return false;
1121: }
1122: }
1123:
1124: return true;
1125: }
1126:
1127: int JSON_parser_get_last_error(JSON_parser jc)
1128: {
1129: return jc->error;
1130: }
1131:
1132:
1133: void init_JSON_config(JSON_config* config)
1134: {
1135: if (config) {
1136: memset(config, 0, sizeof(*config));
1137:
1138: config->depth = JSON_PARSER_STACK_SIZE - 1;
1139: }
1140: }
E-mail: