Annotation of parser3/src/lib/json/JSON_parser.C, revision 1.4
1.1 misha 1: /*
2: Copyright (c) 2005 JSON.org
3:
4: Permission is hereby granted, free of charge, to any person obtaining a copy
5: of this software and associated documentation files (the "Software"), to deal
6: in the Software without restriction, including without limitation the rights
7: to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8: copies of the Software, and to permit persons to whom the Software is
9: furnished to do so, subject to the following conditions:
10:
11: The above copyright notice and this permission notice shall be included in all
12: copies or substantial portions of the Software.
13:
14: The Software shall be used for Good, not Evil.
15:
16: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17: IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18: FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19: AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20: LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21: OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22: SOFTWARE.
23: */
24:
25: /*
26: Callbacks, comments, Unicode handling by Jean Gressmann (jean@0x42.de), 2007-2010.
27:
28: Changelog:
29: 2010-05-07
30: Added error handling for memory allocation failure (sgbeal@googlemail.com).
31: Added diagnosis errors for invalid JSON.
32:
33: 2010-03-25
34: Fixed buffer overrun in grow_parse_buffer & cleaned up code.
35:
36: 2009-10-19
37: Replaced long double in JSON_value_struct with double after reports
38: of strtold being broken on some platforms (charles@transmissionbt.com).
39:
40: 2009-05-17
41: Incorporated benrudiak@googlemail.com fix for UTF16 decoding.
42:
43: 2009-05-14
44: Fixed float parsing bug related to a locale being set that didn't
45: use '.' as decimal point character (charles@transmissionbt.com).
46:
47: 2008-10-14
48: Renamed states.IN to states.IT to avoid name clash which IN macro
49: defined in windef.h (alexey.pelykh@gmail.com)
50:
51: 2008-07-19
52: Removed some duplicate code & debugging variable (charles@transmissionbt.com)
53:
54: 2008-05-28
55: Made JSON_value structure ansi C compliant. This bug was report by
56: trisk@acm.jhu.edu
57:
58: 2008-05-20
59: Fixed bug reported by charles@transmissionbt.com where the switching
60: from static to dynamic parse buffer did not copy the static parse
61: buffer's content.
62: */
63:
64:
65:
66: #include <assert.h>
67: #include <ctype.h>
68: #include <float.h>
69: #include <stddef.h>
70: #include <stdio.h>
71: #include <stdlib.h>
72: #include <string.h>
73: #include <locale.h>
74:
75: #include "JSON_parser.h"
76:
1.3 moko 77: #include "pa_memory.h"
78:
1.1 misha 79: #ifdef _MSC_VER
80: # if _MSC_VER >= 1400 /* Visual Studio 2005 and up */
81: # pragma warning(disable:4996) // unsecure sscanf
82: # pragma warning(disable:4127) // conditional expression is constant
83: # endif
84: #endif
85:
86:
87: #define true 1
88: #define false 0
89: #define __ -1 /* the universal error code */
90:
91: /* values chosen so that the object size is approx equal to one page (4K) */
92: #ifndef JSON_PARSER_STACK_SIZE
93: # define JSON_PARSER_STACK_SIZE 128
94: #endif
95:
96: #ifndef JSON_PARSER_PARSE_BUFFER_SIZE
97: # define JSON_PARSER_PARSE_BUFFER_SIZE 3500
98: #endif
99:
1.3 moko 100: #define JSON_parser_malloc(bytes, reason) pa_malloc(bytes)
101: #define JSON_parser_free(ptr) pa_free(ptr)
1.1 misha 102:
103: typedef unsigned short UTF16;
104:
105: struct JSON_parser_struct {
106: JSON_parser_callback callback;
107: void* ctx;
108: signed char state, before_comment_state, type, escaped, comment, allow_comments, handle_floats_manually, error;
109: char decimal_point;
110: UTF16 utf16_high_surrogate;
111: int current_char;
112: int depth;
113: int top;
114: int stack_capacity;
115: signed char* stack;
116: char* parse_buffer;
117: size_t parse_buffer_capacity;
118: size_t parse_buffer_count;
119: signed char static_stack[JSON_PARSER_STACK_SIZE];
120: char static_parse_buffer[JSON_PARSER_PARSE_BUFFER_SIZE];
121: };
122:
123: #define COUNTOF(x) (sizeof(x)/sizeof(x[0]))
124:
125: /*
126: Characters are mapped into these character classes. This allows for
127: a significant reduction in the size of the state transition table.
128: */
129:
130:
131:
132: enum classes {
133: C_SPACE, /* space */
134: C_WHITE, /* other whitespace */
135: C_LCURB, /* { */
136: C_RCURB, /* } */
137: C_LSQRB, /* [ */
138: C_RSQRB, /* ] */
139: C_COLON, /* : */
140: C_COMMA, /* , */
141: C_QUOTE, /* " */
142: C_BACKS, /* \ */
143: C_SLASH, /* / */
144: C_PLUS, /* + */
145: C_MINUS, /* - */
146: C_POINT, /* . */
147: C_ZERO , /* 0 */
148: C_DIGIT, /* 123456789 */
149: C_LOW_A, /* a */
150: C_LOW_B, /* b */
151: C_LOW_C, /* c */
152: C_LOW_D, /* d */
153: C_LOW_E, /* e */
154: C_LOW_F, /* f */
155: C_LOW_L, /* l */
156: C_LOW_N, /* n */
157: C_LOW_R, /* r */
158: C_LOW_S, /* s */
159: C_LOW_T, /* t */
160: C_LOW_U, /* u */
161: C_ABCDF, /* ABCDF */
162: C_E, /* E */
163: C_ETC, /* everything else */
164: C_STAR, /* * */
165: NR_CLASSES
166: };
167:
168: static signed char ascii_class[128] = {
169: /*
170: This array maps the 128 ASCII characters into character classes.
171: The remaining Unicode characters should be mapped to C_ETC.
172: Non-whitespace control characters are errors.
173: */
174: __, __, __, __, __, __, __, __,
175: __, C_WHITE, C_WHITE, __, __, C_WHITE, __, __,
176: __, __, __, __, __, __, __, __,
177: __, __, __, __, __, __, __, __,
178:
179: C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
180: C_ETC, C_ETC, C_STAR, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH,
181: C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
182: C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
183:
184: C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC,
185: C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
186: C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
187: C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC,
188:
189: C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC,
190: C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC,
191: C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC,
192: C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC
193: };
194:
195:
196: /*
197: The state codes.
198: */
199: enum states {
200: GO, /* start */
201: OK, /* ok */
202: OB, /* object */
203: KE, /* key */
204: CO, /* colon */
205: VA, /* value */
206: AR, /* array */
207: ST, /* string */
1.4 ! misha 208: ESC, /* escape */
1.1 misha 209: U1, /* u1 */
210: U2, /* u2 */
211: U3, /* u3 */
212: U4, /* u4 */
213: MI, /* minus */
214: ZE, /* zero */
215: IT, /* integer */
216: FR, /* fraction */
217: E1, /* e */
218: E2, /* ex */
219: E3, /* exp */
220: T1, /* tr */
221: T2, /* tru */
222: T3, /* true */
223: F1, /* fa */
224: F2, /* fal */
225: F3, /* fals */
226: F4, /* false */
227: N1, /* nu */
228: N2, /* nul */
229: N3, /* null */
230: C1, /* / */
231: C2, /* / * */
232: C3, /* * */
233: FX, /* *.* *eE* */
234: D1, /* second UTF-16 character decoding started by \ */
235: D2, /* second UTF-16 character proceeded by u */
236: NR_STATES
237: };
238:
239: enum actions
240: {
241: CB = -10, /* comment begin */
242: CE = -11, /* comment end */
243: FA = -12, /* false */
244: TR = -13, /* false */
245: NU = -14, /* null */
246: DE = -15, /* double detected by exponent e E */
247: DF = -16, /* double detected by fraction . */
248: SB = -17, /* string begin */
249: MX = -18, /* integer detected by minus */
250: ZX = -19, /* integer detected by zero */
251: IX = -20, /* integer detected by 1-9 */
252: EX = -21, /* next char is escaped */
253: UC = -22 /* Unicode character read */
254: };
255:
256:
257: static signed char state_transition_table[NR_STATES][NR_CLASSES] = {
258: /*
259: The state transition table takes the current state and the current symbol,
260: and returns either a new state or an action. An action is represented as a
261: negative number. A JSON text is accepted if at the end of the text the
262: state is OK and if the mode is MODE_DONE.
263:
264: white 1-9 ABCDF etc
265: space | { } [ ] : , " \ / + - . 0 | a b c d e f l n r s t u | E | * */
266: /*start GO*/ {GO,GO,-6,__,-5,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
267: /*ok OK*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
268: /*object OB*/ {OB,OB,__,-9,__,__,__,__,SB,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
269: /*key KE*/ {KE,KE,__,__,__,__,__,__,SB,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
270: /*colon CO*/ {CO,CO,__,__,__,__,-2,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
271: /*value VA*/ {VA,VA,-6,__,-5,__,__,__,SB,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,FA,__,NU,__,__,TR,__,__,__,__,__},
272: /*array AR*/ {AR,AR,-6,__,-5,-7,__,__,SB,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,FA,__,NU,__,__,TR,__,__,__,__,__},
273: /*string ST*/ {ST,__,ST,ST,ST,ST,ST,ST,-4,EX,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST},
1.4 ! misha 274: /*escape ESC*/{__,__,__,__,__,__,__,__,ST,ST,ST,__,__,__,__,__,__,ST,__,__,__,ST,__,ST,ST,__,ST,U1,__,__,__,__},
1.1 misha 275: /*u1 U1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__,__},
276: /*u2 U2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__,__},
277: /*u3 U3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__,__},
278: /*u4 U4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,UC,UC,UC,UC,UC,UC,UC,UC,__,__,__,__,__,__,UC,UC,__,__},
279: /*minus MI*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ZE,IT,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
280: /*zero ZE*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,DF,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
281: /*int IT*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,DF,IT,IT,__,__,__,__,DE,__,__,__,__,__,__,__,__,DE,__,__},
282: /*frac FR*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__,__},
283: /*e E1*/ {__,__,__,__,__,__,__,__,__,__,__,E2,E2,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
284: /*ex E2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
285: /*exp E3*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
286: /*tr T1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__,__},
287: /*tru T2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__,__},
288: /*true T3*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__,__},
289: /*fa F1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
290: /*fal F2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__,__},
291: /*fals F3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__,__},
292: /*false F4*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__,__},
293: /*nu N1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__,__},
294: /*nul N2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__,__},
295: /*null N3*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__},
296: /*/ C1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,C2},
297: /*/* C2*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3},
298: /** C3*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,CE,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3},
299: /*_. FX*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__,__},
300: /*\ D1*/ {__,__,__,__,__,__,__,__,__,D2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
301: /*\ D2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U1,__,__,__,__},
302: };
303:
304:
305: /*
306: These modes can be pushed on the stack.
307: */
308: enum modes {
309: MODE_ARRAY = 1,
310: MODE_DONE = 2,
311: MODE_KEY = 3,
312: MODE_OBJECT = 4
313: };
314:
315: static void set_error(JSON_parser jc)
316: {
317: switch (jc->state) {
318: case GO:
319: switch (jc->current_char) {
320: case '{': case '}': case '[': case ']':
321: jc->error = JSON_E_UNBALANCED_COLLECTION;
322: break;
323: default:
324: jc->error = JSON_E_INVALID_CHAR;
325: break;
326: }
327: break;
328: case OB:
329: jc->error = JSON_E_EXPECTED_KEY;
330: break;
331: case AR:
332: jc->error = JSON_E_UNBALANCED_COLLECTION;
333: break;
334: case CO:
335: jc->error = JSON_E_EXPECTED_COLON;
336: break;
337: case KE:
338: jc->error = JSON_E_EXPECTED_KEY;
339: break;
340: /* \uXXXX\uYYYY */
341: case U1: case U2: case U3:case U4: case D1: case D2:
342: jc->error = JSON_E_INVALID_UNICODE_SEQUENCE;
343: break;
344: /* true, false, null */
345: case T1: case T2: case T3: case F1: case F2: case F3: case F4: case N1: case N2: case N3:
346: jc->error = JSON_E_INVALID_KEYWORD;
347: break;
348: /* minus, integer, fraction, exponent */
349: case MI: case ZE: case IT: case FR: case E1: case E2: case E3:
350: jc->error = JSON_E_INVALID_NUMBER;
351: break;
352: default:
353: jc->error = JSON_E_INVALID_CHAR;
354: break;
355: }
356: }
357:
358: static int
359: push(JSON_parser jc, int mode)
360: {
361: /*
362: Push a mode onto the stack. Return false if there is overflow.
363: */
364: assert(jc->top <= jc->stack_capacity);
365:
366: if (jc->depth < 0) {
367: if (jc->top == jc->stack_capacity) {
368: const size_t bytes_to_copy = jc->stack_capacity * sizeof(jc->stack[0]);
369: const size_t new_capacity = jc->stack_capacity * 2;
370: const size_t bytes_to_allocate = new_capacity * sizeof(jc->stack[0]);
371: void* mem = JSON_parser_malloc(bytes_to_allocate, "stack");
372: if (!mem) {
373: jc->error = JSON_E_OUT_OF_MEMORY;
374: return false;
375: }
376: jc->stack_capacity = (int)new_capacity;
377: memcpy(mem, jc->stack, bytes_to_copy);
378: if (jc->stack != &jc->static_stack[0]) {
379: JSON_parser_free(jc->stack);
380: }
381: jc->stack = (signed char*)mem;
382: }
383: } else {
384: if (jc->top == jc->depth) {
385: jc->error = JSON_E_NESTING_DEPTH_REACHED;
386: return false;
387: }
388: }
389: jc->stack[++jc->top] = (signed char)mode;
390: return true;
391: }
392:
393:
394: static int
395: pop(JSON_parser jc, int mode)
396: {
397: /*
398: Pop the stack, assuring that the current mode matches the expectation.
399: Return false if there is underflow or if the modes mismatch.
400: */
401: if (jc->top < 0 || jc->stack[jc->top] != mode) {
402: return false;
403: }
404: jc->top -= 1;
405: return true;
406: }
407:
408:
409: #define parse_buffer_clear(jc) \
410: do {\
411: jc->parse_buffer_count = 0;\
412: jc->parse_buffer[0] = 0;\
413: } while (0)
414:
415: #define parse_buffer_pop_back_char(jc)\
416: do {\
417: assert(jc->parse_buffer_count >= 1);\
418: --jc->parse_buffer_count;\
419: jc->parse_buffer[jc->parse_buffer_count] = 0;\
420: } while (0)
421:
422: void delete_JSON_parser(JSON_parser jc)
423: {
424: if (jc) {
425: if (jc->stack != &jc->static_stack[0]) {
426: JSON_parser_free((void*)jc->stack);
427: }
428: if (jc->parse_buffer != &jc->static_parse_buffer[0]) {
429: JSON_parser_free((void*)jc->parse_buffer);
430: }
431: JSON_parser_free((void*)jc);
432: }
433: }
434:
435:
436: JSON_parser
437: new_JSON_parser(JSON_config* config)
438: {
439: /*
440: new_JSON_parser starts the checking process by constructing a JSON_parser
441: object. It takes a depth parameter that restricts the level of maximum
442: nesting.
443:
444: To continue the process, call JSON_parser_char for each character in the
445: JSON text, and then call JSON_parser_done to obtain the final result.
446: These functions are fully reentrant.
447: */
448:
449: int depth = 0;
450: JSON_config default_config;
451:
1.2 moko 452: JSON_parser jc = (JSON_parser)JSON_parser_malloc(sizeof(struct JSON_parser_struct), "parser");
1.1 misha 453:
454: if (jc == NULL) {
455: return NULL;
456: }
457:
458: memset(jc, 0, sizeof(*jc));
459:
460: /* initialize configuration */
461: init_JSON_config(&default_config);
462:
463: /* set to default configuration if none was provided */
464: if (config == NULL) {
465: config = &default_config;
466: }
467:
468: depth = config->depth;
469:
470: /* We need to be able to push at least one object */
471: if (depth == 0) {
472: depth = 1;
473: }
474:
475: jc->state = GO;
476: jc->top = -1;
477:
478: /* Do we want non-bound stack? */
479: if (depth > 0) {
480: jc->stack_capacity = depth;
481: jc->depth = depth;
482: if (depth <= (int)COUNTOF(jc->static_stack)) {
483: jc->stack = &jc->static_stack[0];
484: } else {
485: jc->stack = (signed char*)JSON_parser_malloc(jc->stack_capacity * sizeof(jc->stack[0]), "stack");
486: if (jc->stack == NULL) {
487: JSON_parser_free(jc);
488: return NULL;
489: }
490: }
491: } else {
492: jc->stack_capacity = (int)COUNTOF(jc->static_stack);
493: jc->depth = -1;
494: jc->stack = &jc->static_stack[0];
495: }
496:
497: /* set parser to start */
498: push(jc, MODE_DONE);
499:
500: /* set up the parse buffer */
501: jc->parse_buffer = &jc->static_parse_buffer[0];
502: jc->parse_buffer_capacity = COUNTOF(jc->static_parse_buffer);
503: parse_buffer_clear(jc);
504:
505: /* set up callback, comment & float handling */
506: jc->callback = config->callback;
507: jc->ctx = config->callback_ctx;
508: jc->allow_comments = (signed char)config->allow_comments != 0;
509: jc->handle_floats_manually = (signed char)config->handle_floats_manually != 0;
510:
511: /* set up decimal point */
512: jc->decimal_point = *localeconv()->decimal_point;
513:
514: return jc;
515: }
516:
517: static int parse_buffer_grow(JSON_parser jc)
518: {
519: const size_t bytes_to_copy = jc->parse_buffer_count * sizeof(jc->parse_buffer[0]);
520: const size_t new_capacity = jc->parse_buffer_capacity * 2;
521: const size_t bytes_to_allocate = new_capacity * sizeof(jc->parse_buffer[0]);
522: void* mem = JSON_parser_malloc(bytes_to_allocate, "parse buffer");
523:
524: if (mem == NULL) {
525: jc->error = JSON_E_OUT_OF_MEMORY;
526: return false;
527: }
528:
529: assert(new_capacity > 0);
530: memcpy(mem, jc->parse_buffer, bytes_to_copy);
531:
532: if (jc->parse_buffer != &jc->static_parse_buffer[0]) {
533: JSON_parser_free(jc->parse_buffer);
534: }
535:
536: jc->parse_buffer = (char*)mem;
537: jc->parse_buffer_capacity = new_capacity;
538:
539: return true;
540: }
541:
542: static int parse_buffer_reserve_for(JSON_parser jc, unsigned chars)
543: {
544: while (jc->parse_buffer_count + chars + 1 > jc->parse_buffer_capacity) {
545: if (!parse_buffer_grow(jc)) {
546: assert(jc->error == JSON_E_OUT_OF_MEMORY);
547: return false;
548: }
549: }
550:
551: return true;
552: }
553:
554: #define parse_buffer_has_space_for(jc, count) \
555: (jc->parse_buffer_count + (count) + 1 <= jc->parse_buffer_capacity)
556:
557: #define parse_buffer_push_back_char(jc, c)\
558: do {\
559: assert(parse_buffer_has_space_for(jc, 1)); \
560: jc->parse_buffer[jc->parse_buffer_count++] = c;\
561: jc->parse_buffer[jc->parse_buffer_count] = 0;\
562: } while (0)
563:
564: #define assert_is_non_container_type(jc) \
565: assert( \
566: jc->type == JSON_T_NULL || \
567: jc->type == JSON_T_FALSE || \
568: jc->type == JSON_T_TRUE || \
569: jc->type == JSON_T_FLOAT || \
570: jc->type == JSON_T_INTEGER || \
571: jc->type == JSON_T_STRING)
572:
573:
574: static int parse_parse_buffer(JSON_parser jc)
575: {
576: if (jc->callback) {
577: JSON_value value, *arg = NULL;
578:
579: if (jc->type != JSON_T_NONE) {
580: assert_is_non_container_type(jc);
581:
582: switch(jc->type) {
583: case JSON_T_FLOAT:
584: arg = &value;
585: if (jc->handle_floats_manually) {
586: value.vu.str.value = jc->parse_buffer;
587: value.vu.str.length = jc->parse_buffer_count;
588: } else {
589: /* not checking with end pointer b/c there may be trailing ws */
590: value.vu.float_value = strtod(jc->parse_buffer, NULL);
591: }
592: break;
593: case JSON_T_INTEGER:
594: arg = &value;
595: sscanf(jc->parse_buffer, JSON_PARSER_INTEGER_SSCANF_TOKEN, &value.vu.integer_value);
596: break;
597: case JSON_T_STRING:
598: arg = &value;
599: value.vu.str.value = jc->parse_buffer;
600: value.vu.str.length = jc->parse_buffer_count;
601: break;
602: }
603:
604: if (!(*jc->callback)(jc->ctx, jc->type, arg)) {
605: return false;
606: }
607: }
608: }
609:
610: parse_buffer_clear(jc);
611:
612: return true;
613: }
614:
615: #define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
616: #define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
617: #define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
618: static unsigned char utf8_lead_bits[4] = { 0x00, 0xC0, 0xE0, 0xF0 };
619:
620: static int decode_unicode_char(JSON_parser jc)
621: {
622: int i;
623: unsigned uc = 0;
624: char* p;
625: int trail_bytes;
626:
627: assert(jc->parse_buffer_count >= 6);
628:
629: p = &jc->parse_buffer[jc->parse_buffer_count - 4];
630:
631: for (i = 12; i >= 0; i -= 4, ++p) {
632: unsigned x = *p;
633:
634: if (x >= 'a') {
635: x -= ('a' - 10);
636: } else if (x >= 'A') {
637: x -= ('A' - 10);
638: } else {
639: x &= ~0x30u;
640: }
641:
642: assert(x < 16);
643:
644: uc |= x << i;
645: }
646:
647: /* clear UTF-16 char from buffer */
648: jc->parse_buffer_count -= 6;
649: jc->parse_buffer[jc->parse_buffer_count] = 0;
650:
651: /* attempt decoding ... */
652: if (jc->utf16_high_surrogate) {
653: if (IS_LOW_SURROGATE(uc)) {
654: uc = DECODE_SURROGATE_PAIR(jc->utf16_high_surrogate, uc);
655: trail_bytes = 3;
656: jc->utf16_high_surrogate = 0;
657: } else {
658: /* high surrogate without a following low surrogate */
659: return false;
660: }
661: } else {
662: if (uc < 0x80) {
663: trail_bytes = 0;
664: } else if (uc < 0x800) {
665: trail_bytes = 1;
666: } else if (IS_HIGH_SURROGATE(uc)) {
667: /* save the high surrogate and wait for the low surrogate */
668: jc->utf16_high_surrogate = (UTF16)uc;
669: return true;
670: } else if (IS_LOW_SURROGATE(uc)) {
671: /* low surrogate without a preceding high surrogate */
672: return false;
673: } else {
674: trail_bytes = 2;
675: }
676: }
677:
678: jc->parse_buffer[jc->parse_buffer_count++] = (char) ((uc >> (trail_bytes * 6)) | utf8_lead_bits[trail_bytes]);
679:
680: for (i = trail_bytes * 6 - 6; i >= 0; i -= 6) {
681: jc->parse_buffer[jc->parse_buffer_count++] = (char) (((uc >> i) & 0x3F) | 0x80);
682: }
683:
684: jc->parse_buffer[jc->parse_buffer_count] = 0;
685:
686: return true;
687: }
688:
689: static int add_escaped_char_to_parse_buffer(JSON_parser jc, int next_char)
690: {
691: assert(parse_buffer_has_space_for(jc, 1));
692:
693: jc->escaped = 0;
694: /* remove the backslash */
695: parse_buffer_pop_back_char(jc);
696: switch(next_char) {
697: case 'b':
698: parse_buffer_push_back_char(jc, '\b');
699: break;
700: case 'f':
701: parse_buffer_push_back_char(jc, '\f');
702: break;
703: case 'n':
704: parse_buffer_push_back_char(jc, '\n');
705: break;
706: case 'r':
707: parse_buffer_push_back_char(jc, '\r');
708: break;
709: case 't':
710: parse_buffer_push_back_char(jc, '\t');
711: break;
712: case '"':
713: parse_buffer_push_back_char(jc, '"');
714: break;
715: case '\\':
716: parse_buffer_push_back_char(jc, '\\');
717: break;
718: case '/':
719: parse_buffer_push_back_char(jc, '/');
720: break;
721: case 'u':
722: parse_buffer_push_back_char(jc, '\\');
723: parse_buffer_push_back_char(jc, 'u');
724: break;
725: default:
726: return false;
727: }
728:
729: return true;
730: }
731:
732: static int add_char_to_parse_buffer(JSON_parser jc, int next_char, int next_class)
733: {
734: if (!parse_buffer_reserve_for(jc, 1)) {
735: assert(JSON_E_OUT_OF_MEMORY == jc->error);
736: return false;
737: }
738:
739: if (jc->escaped) {
740: if (!add_escaped_char_to_parse_buffer(jc, next_char)) {
741: jc->error = JSON_E_INVALID_ESCAPE_SEQUENCE;
742: return false;
743: }
744: } else if (!jc->comment) {
745: if ((jc->type != JSON_T_NONE) | !((next_class == C_SPACE) | (next_class == C_WHITE)) /* non-white-space */) {
746: parse_buffer_push_back_char(jc, (char)next_char);
747: }
748: }
749:
750: return true;
751: }
752:
753: #define assert_type_isnt_string_null_or_bool(jc) \
754: assert(jc->type != JSON_T_FALSE); \
755: assert(jc->type != JSON_T_TRUE); \
756: assert(jc->type != JSON_T_NULL); \
757: assert(jc->type != JSON_T_STRING)
758:
759:
760: int
761: JSON_parser_char(JSON_parser jc, int next_char)
762: {
763: /*
764: After calling new_JSON_parser, call this function for each character (or
765: partial character) in your JSON text. It can accept UTF-8, UTF-16, or
766: UTF-32. It returns true if things are looking ok so far. If it rejects the
767: text, it returns false.
768: */
769: int next_class, next_state;
770:
771: /*
772: Store the current char for error handling
773: */
774: jc->current_char = next_char;
775:
776: /*
777: Determine the character's class.
778: */
779: if (next_char < 0) {
780: jc->error = JSON_E_INVALID_CHAR;
781: return false;
782: }
783: if (next_char >= 128) {
784: next_class = C_ETC;
785: } else {
786: next_class = ascii_class[next_char];
787: if (next_class <= __) {
788: set_error(jc);
789: return false;
790: }
791: }
792:
793: if (!add_char_to_parse_buffer(jc, next_char, next_class)) {
794: return false;
795: }
796:
797: /*
798: Get the next state from the state transition table.
799: */
800: next_state = state_transition_table[jc->state][next_class];
801: if (next_state >= 0) {
802: /*
803: Change the state.
804: */
805: jc->state = (signed char)next_state;
806: } else {
807: /*
808: Or perform one of the actions.
809: */
810: switch (next_state) {
811: /* Unicode character */
812: case UC:
813: if(!decode_unicode_char(jc)) {
814: jc->error = JSON_E_INVALID_UNICODE_SEQUENCE;
815: return false;
816: }
817: /* check if we need to read a second UTF-16 char */
818: if (jc->utf16_high_surrogate) {
819: jc->state = D1;
820: } else {
821: jc->state = ST;
822: }
823: break;
824: /* escaped char */
825: case EX:
826: jc->escaped = 1;
1.4 ! misha 827: jc->state = ESC;
1.1 misha 828: break;
829: /* integer detected by minus */
830: case MX:
831: jc->type = JSON_T_INTEGER;
832: jc->state = MI;
833: break;
834: /* integer detected by zero */
835: case ZX:
836: jc->type = JSON_T_INTEGER;
837: jc->state = ZE;
838: break;
839: /* integer detected by 1-9 */
840: case IX:
841: jc->type = JSON_T_INTEGER;
842: jc->state = IT;
843: break;
844:
845: /* floating point number detected by exponent*/
846: case DE:
847: assert_type_isnt_string_null_or_bool(jc);
848: jc->type = JSON_T_FLOAT;
849: jc->state = E1;
850: break;
851:
852: /* floating point number detected by fraction */
853: case DF:
854: assert_type_isnt_string_null_or_bool(jc);
855: if (!jc->handle_floats_manually) {
856: /*
857: Some versions of strtod (which underlies sscanf) don't support converting
858: C-locale formated floating point values.
859: */
860: assert(jc->parse_buffer[jc->parse_buffer_count-1] == '.');
861: jc->parse_buffer[jc->parse_buffer_count-1] = jc->decimal_point;
862: }
863: jc->type = JSON_T_FLOAT;
864: jc->state = FX;
865: break;
866: /* string begin " */
867: case SB:
868: parse_buffer_clear(jc);
869: assert(jc->type == JSON_T_NONE);
870: jc->type = JSON_T_STRING;
871: jc->state = ST;
872: break;
873:
874: /* n */
875: case NU:
876: assert(jc->type == JSON_T_NONE);
877: jc->type = JSON_T_NULL;
878: jc->state = N1;
879: break;
880: /* f */
881: case FA:
882: assert(jc->type == JSON_T_NONE);
883: jc->type = JSON_T_FALSE;
884: jc->state = F1;
885: break;
886: /* t */
887: case TR:
888: assert(jc->type == JSON_T_NONE);
889: jc->type = JSON_T_TRUE;
890: jc->state = T1;
891: break;
892:
893: /* closing comment */
894: case CE:
895: jc->comment = 0;
896: assert(jc->parse_buffer_count == 0);
897: assert(jc->type == JSON_T_NONE);
898: jc->state = jc->before_comment_state;
899: break;
900:
901: /* opening comment */
902: case CB:
903: if (!jc->allow_comments) {
904: return false;
905: }
906: parse_buffer_pop_back_char(jc);
907: if (!parse_parse_buffer(jc)) {
908: return false;
909: }
910: assert(jc->parse_buffer_count == 0);
911: assert(jc->type != JSON_T_STRING);
912: switch (jc->stack[jc->top]) {
913: case MODE_ARRAY:
914: case MODE_OBJECT:
915: switch(jc->state) {
916: case VA:
917: case AR:
918: jc->before_comment_state = jc->state;
919: break;
920: default:
921: jc->before_comment_state = OK;
922: break;
923: }
924: break;
925: default:
926: jc->before_comment_state = jc->state;
927: break;
928: }
929: jc->type = JSON_T_NONE;
930: jc->state = C1;
931: jc->comment = 1;
932: break;
933: /* empty } */
934: case -9:
935: parse_buffer_clear(jc);
936: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) {
937: return false;
938: }
939: if (!pop(jc, MODE_KEY)) {
940: return false;
941: }
942: jc->state = OK;
943: break;
944:
945: /* } */ case -8:
946: parse_buffer_pop_back_char(jc);
947: if (!parse_parse_buffer(jc)) {
948: return false;
949: }
950: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) {
951: return false;
952: }
953: if (!pop(jc, MODE_OBJECT)) {
954: jc->error = JSON_E_UNBALANCED_COLLECTION;
955: return false;
956: }
957: jc->type = JSON_T_NONE;
958: jc->state = OK;
959: break;
960:
961: /* ] */ case -7:
962: parse_buffer_pop_back_char(jc);
963: if (!parse_parse_buffer(jc)) {
964: return false;
965: }
966: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_ARRAY_END, NULL)) {
967: return false;
968: }
969: if (!pop(jc, MODE_ARRAY)) {
970: jc->error = JSON_E_UNBALANCED_COLLECTION;
971: return false;
972: }
973:
974: jc->type = JSON_T_NONE;
975: jc->state = OK;
976: break;
977:
978: /* { */ case -6:
979: parse_buffer_pop_back_char(jc);
980: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_BEGIN, NULL)) {
981: return false;
982: }
983: if (!push(jc, MODE_KEY)) {
984: return false;
985: }
986: assert(jc->type == JSON_T_NONE);
987: jc->state = OB;
988: break;
989:
990: /* [ */ case -5:
991: parse_buffer_pop_back_char(jc);
992: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_ARRAY_BEGIN, NULL)) {
993: return false;
994: }
995: if (!push(jc, MODE_ARRAY)) {
996: return false;
997: }
998: assert(jc->type == JSON_T_NONE);
999: jc->state = AR;
1000: break;
1001:
1002: /* string end " */ case -4:
1003: parse_buffer_pop_back_char(jc);
1004: switch (jc->stack[jc->top]) {
1005: case MODE_KEY:
1006: assert(jc->type == JSON_T_STRING);
1007: jc->type = JSON_T_NONE;
1008: jc->state = CO;
1009:
1010: if (jc->callback) {
1011: JSON_value value;
1012: value.vu.str.value = jc->parse_buffer;
1013: value.vu.str.length = jc->parse_buffer_count;
1014: if (!(*jc->callback)(jc->ctx, JSON_T_KEY, &value)) {
1015: return false;
1016: }
1017: }
1018: parse_buffer_clear(jc);
1019: break;
1020: case MODE_ARRAY:
1021: case MODE_OBJECT:
1022: assert(jc->type == JSON_T_STRING);
1023: if (!parse_parse_buffer(jc)) {
1024: return false;
1025: }
1026: jc->type = JSON_T_NONE;
1027: jc->state = OK;
1028: break;
1029: default:
1030: return false;
1031: }
1032: break;
1033:
1034: /* , */ case -3:
1035: parse_buffer_pop_back_char(jc);
1036: if (!parse_parse_buffer(jc)) {
1037: return false;
1038: }
1039: switch (jc->stack[jc->top]) {
1040: case MODE_OBJECT:
1041: /*
1042: A comma causes a flip from object mode to key mode.
1043: */
1044: if (!pop(jc, MODE_OBJECT) || !push(jc, MODE_KEY)) {
1045: return false;
1046: }
1047: assert(jc->type != JSON_T_STRING);
1048: jc->type = JSON_T_NONE;
1049: jc->state = KE;
1050: break;
1051: case MODE_ARRAY:
1052: assert(jc->type != JSON_T_STRING);
1053: jc->type = JSON_T_NONE;
1054: jc->state = VA;
1055: break;
1056: default:
1057: return false;
1058: }
1059: break;
1060:
1061: /* : */ case -2:
1062: /*
1063: A colon causes a flip from key mode to object mode.
1064: */
1065: parse_buffer_pop_back_char(jc);
1066: if (!pop(jc, MODE_KEY) || !push(jc, MODE_OBJECT)) {
1067: return false;
1068: }
1069: assert(jc->type == JSON_T_NONE);
1070: jc->state = VA;
1071: break;
1072: /*
1073: Bad action.
1074: */
1075: default:
1076: set_error(jc);
1077: return false;
1078: }
1079: }
1080: return true;
1081: }
1082:
1083: int
1084: JSON_parser_done(JSON_parser jc)
1085: {
1086: if ((jc->state == OK || jc->state == GO) && pop(jc, MODE_DONE))
1087: {
1088: return true;
1089: }
1090:
1091: jc->error = JSON_E_UNBALANCED_COLLECTION;
1092: return false;
1093: }
1094:
1095:
1096: int JSON_parser_is_legal_white_space_string(const char* s)
1097: {
1098: int c, char_class;
1099:
1100: if (s == NULL) {
1101: return false;
1102: }
1103:
1104: for (; *s; ++s) {
1105: c = *s;
1106:
1107: if (c < 0 || c >= 128) {
1108: return false;
1109: }
1110:
1111: char_class = ascii_class[c];
1112:
1113: if (char_class != C_SPACE && char_class != C_WHITE) {
1114: return false;
1115: }
1116: }
1117:
1118: return true;
1119: }
1120:
1121: int JSON_parser_get_last_error(JSON_parser jc)
1122: {
1123: return jc->error;
1124: }
1125:
1126:
1127: void init_JSON_config(JSON_config* config)
1128: {
1129: if (config) {
1130: memset(config, 0, sizeof(*config));
1131:
1132: config->depth = JSON_PARSER_STACK_SIZE - 1;
1133: }
1134: }
E-mail: