Annotation of parser3/src/lib/json/JSON_parser.C, revision 1.5
1.1 misha 1: /*
2: Copyright (c) 2005 JSON.org
3:
4: Permission is hereby granted, free of charge, to any person obtaining a copy
5: of this software and associated documentation files (the "Software"), to deal
6: in the Software without restriction, including without limitation the rights
7: to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8: copies of the Software, and to permit persons to whom the Software is
9: furnished to do so, subject to the following conditions:
10:
11: The above copyright notice and this permission notice shall be included in all
12: copies or substantial portions of the Software.
13:
14: The Software shall be used for Good, not Evil.
15:
16: THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17: IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18: FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19: AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20: LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21: OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22: SOFTWARE.
23: */
24:
25: /*
26: Callbacks, comments, Unicode handling by Jean Gressmann (jean@0x42.de), 2007-2010.
27:
28: Changelog:
29: 2010-05-07
30: Added error handling for memory allocation failure (sgbeal@googlemail.com).
31: Added diagnosis errors for invalid JSON.
32:
33: 2010-03-25
34: Fixed buffer overrun in grow_parse_buffer & cleaned up code.
35:
36: 2009-10-19
37: Replaced long double in JSON_value_struct with double after reports
38: of strtold being broken on some platforms (charles@transmissionbt.com).
39:
40: 2009-05-17
41: Incorporated benrudiak@googlemail.com fix for UTF16 decoding.
42:
43: 2009-05-14
44: Fixed float parsing bug related to a locale being set that didn't
45: use '.' as decimal point character (charles@transmissionbt.com).
46:
47: 2008-10-14
48: Renamed states.IN to states.IT to avoid name clash which IN macro
49: defined in windef.h (alexey.pelykh@gmail.com)
50:
51: 2008-07-19
52: Removed some duplicate code & debugging variable (charles@transmissionbt.com)
53:
54: 2008-05-28
55: Made JSON_value structure ansi C compliant. This bug was report by
56: trisk@acm.jhu.edu
57:
58: 2008-05-20
59: Fixed bug reported by charles@transmissionbt.com where the switching
60: from static to dynamic parse buffer did not copy the static parse
61: buffer's content.
62: */
63:
64:
65:
66: #include "JSON_parser.h"
1.3 moko 67: #include "pa_memory.h"
68:
1.1 misha 69: #ifdef _MSC_VER
70: # if _MSC_VER >= 1400 /* Visual Studio 2005 and up */
71: # pragma warning(disable:4996) // unsecure sscanf
72: # pragma warning(disable:4127) // conditional expression is constant
73: # endif
74: #endif
75:
76:
77: #define true 1
78: #define false 0
79: #define __ -1 /* the universal error code */
80:
81: /* values chosen so that the object size is approx equal to one page (4K) */
82: #ifndef JSON_PARSER_STACK_SIZE
83: # define JSON_PARSER_STACK_SIZE 128
84: #endif
85:
86: #ifndef JSON_PARSER_PARSE_BUFFER_SIZE
87: # define JSON_PARSER_PARSE_BUFFER_SIZE 3500
88: #endif
89:
1.3 moko 90: #define JSON_parser_malloc(bytes, reason) pa_malloc(bytes)
91: #define JSON_parser_free(ptr) pa_free(ptr)
1.1 misha 92:
93: typedef unsigned short UTF16;
94:
95: struct JSON_parser_struct {
96: JSON_parser_callback callback;
97: void* ctx;
98: signed char state, before_comment_state, type, escaped, comment, allow_comments, handle_floats_manually, error;
99: char decimal_point;
100: UTF16 utf16_high_surrogate;
101: int current_char;
102: int depth;
103: int top;
104: int stack_capacity;
105: signed char* stack;
106: char* parse_buffer;
107: size_t parse_buffer_capacity;
108: size_t parse_buffer_count;
109: signed char static_stack[JSON_PARSER_STACK_SIZE];
110: char static_parse_buffer[JSON_PARSER_PARSE_BUFFER_SIZE];
111: };
112:
113: #define COUNTOF(x) (sizeof(x)/sizeof(x[0]))
114:
115: /*
116: Characters are mapped into these character classes. This allows for
117: a significant reduction in the size of the state transition table.
118: */
119:
120:
121:
122: enum classes {
123: C_SPACE, /* space */
124: C_WHITE, /* other whitespace */
125: C_LCURB, /* { */
126: C_RCURB, /* } */
127: C_LSQRB, /* [ */
128: C_RSQRB, /* ] */
129: C_COLON, /* : */
130: C_COMMA, /* , */
131: C_QUOTE, /* " */
132: C_BACKS, /* \ */
133: C_SLASH, /* / */
134: C_PLUS, /* + */
135: C_MINUS, /* - */
136: C_POINT, /* . */
137: C_ZERO , /* 0 */
138: C_DIGIT, /* 123456789 */
139: C_LOW_A, /* a */
140: C_LOW_B, /* b */
141: C_LOW_C, /* c */
142: C_LOW_D, /* d */
143: C_LOW_E, /* e */
144: C_LOW_F, /* f */
145: C_LOW_L, /* l */
146: C_LOW_N, /* n */
147: C_LOW_R, /* r */
148: C_LOW_S, /* s */
149: C_LOW_T, /* t */
150: C_LOW_U, /* u */
151: C_ABCDF, /* ABCDF */
152: C_E, /* E */
153: C_ETC, /* everything else */
154: C_STAR, /* * */
155: NR_CLASSES
156: };
157:
158: static signed char ascii_class[128] = {
159: /*
160: This array maps the 128 ASCII characters into character classes.
161: The remaining Unicode characters should be mapped to C_ETC.
162: Non-whitespace control characters are errors.
163: */
164: __, __, __, __, __, __, __, __,
165: __, C_WHITE, C_WHITE, __, __, C_WHITE, __, __,
166: __, __, __, __, __, __, __, __,
167: __, __, __, __, __, __, __, __,
168:
169: C_SPACE, C_ETC, C_QUOTE, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
170: C_ETC, C_ETC, C_STAR, C_PLUS, C_COMMA, C_MINUS, C_POINT, C_SLASH,
171: C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
172: C_DIGIT, C_DIGIT, C_COLON, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
173:
174: C_ETC, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_ETC,
175: C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
176: C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC, C_ETC,
177: C_ETC, C_ETC, C_ETC, C_LSQRB, C_BACKS, C_RSQRB, C_ETC, C_ETC,
178:
179: C_ETC, C_LOW_A, C_LOW_B, C_LOW_C, C_LOW_D, C_LOW_E, C_LOW_F, C_ETC,
180: C_ETC, C_ETC, C_ETC, C_ETC, C_LOW_L, C_ETC, C_LOW_N, C_ETC,
181: C_ETC, C_ETC, C_LOW_R, C_LOW_S, C_LOW_T, C_LOW_U, C_ETC, C_ETC,
182: C_ETC, C_ETC, C_ETC, C_LCURB, C_ETC, C_RCURB, C_ETC, C_ETC
183: };
184:
185:
186: /*
187: The state codes.
188: */
189: enum states {
190: GO, /* start */
191: OK, /* ok */
192: OB, /* object */
193: KE, /* key */
194: CO, /* colon */
195: VA, /* value */
196: AR, /* array */
197: ST, /* string */
1.4 misha 198: ESC, /* escape */
1.1 misha 199: U1, /* u1 */
200: U2, /* u2 */
201: U3, /* u3 */
202: U4, /* u4 */
203: MI, /* minus */
204: ZE, /* zero */
205: IT, /* integer */
206: FR, /* fraction */
207: E1, /* e */
208: E2, /* ex */
209: E3, /* exp */
210: T1, /* tr */
211: T2, /* tru */
212: T3, /* true */
213: F1, /* fa */
214: F2, /* fal */
215: F3, /* fals */
216: F4, /* false */
217: N1, /* nu */
218: N2, /* nul */
219: N3, /* null */
220: C1, /* / */
221: C2, /* / * */
222: C3, /* * */
223: FX, /* *.* *eE* */
224: D1, /* second UTF-16 character decoding started by \ */
225: D2, /* second UTF-16 character proceeded by u */
226: NR_STATES
227: };
228:
229: enum actions
230: {
231: CB = -10, /* comment begin */
232: CE = -11, /* comment end */
233: FA = -12, /* false */
234: TR = -13, /* false */
235: NU = -14, /* null */
236: DE = -15, /* double detected by exponent e E */
237: DF = -16, /* double detected by fraction . */
238: SB = -17, /* string begin */
239: MX = -18, /* integer detected by minus */
240: ZX = -19, /* integer detected by zero */
241: IX = -20, /* integer detected by 1-9 */
242: EX = -21, /* next char is escaped */
243: UC = -22 /* Unicode character read */
244: };
245:
246:
247: static signed char state_transition_table[NR_STATES][NR_CLASSES] = {
248: /*
249: The state transition table takes the current state and the current symbol,
250: and returns either a new state or an action. An action is represented as a
251: negative number. A JSON text is accepted if at the end of the text the
252: state is OK and if the mode is MODE_DONE.
253:
254: white 1-9 ABCDF etc
255: space | { } [ ] : , " \ / + - . 0 | a b c d e f l n r s t u | E | * */
256: /*start GO*/ {GO,GO,-6,__,-5,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
257: /*ok OK*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
258: /*object OB*/ {OB,OB,__,-9,__,__,__,__,SB,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
259: /*key KE*/ {KE,KE,__,__,__,__,__,__,SB,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
260: /*colon CO*/ {CO,CO,__,__,__,__,-2,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
261: /*value VA*/ {VA,VA,-6,__,-5,__,__,__,SB,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,FA,__,NU,__,__,TR,__,__,__,__,__},
262: /*array AR*/ {AR,AR,-6,__,-5,-7,__,__,SB,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,FA,__,NU,__,__,TR,__,__,__,__,__},
263: /*string ST*/ {ST,__,ST,ST,ST,ST,ST,ST,-4,EX,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST,ST},
1.4 misha 264: /*escape ESC*/{__,__,__,__,__,__,__,__,ST,ST,ST,__,__,__,__,__,__,ST,__,__,__,ST,__,ST,ST,__,ST,U1,__,__,__,__},
1.1 misha 265: /*u1 U1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__,__},
266: /*u2 U2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__,__},
267: /*u3 U3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__,__},
268: /*u4 U4*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,UC,UC,UC,UC,UC,UC,UC,UC,__,__,__,__,__,__,UC,UC,__,__},
269: /*minus MI*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,ZE,IT,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
270: /*zero ZE*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,DF,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
271: /*int IT*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,DF,IT,IT,__,__,__,__,DE,__,__,__,__,__,__,__,__,DE,__,__},
272: /*frac FR*/ {OK,OK,__,-8,__,-7,__,-3,__,__,CB,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__,__},
273: /*e E1*/ {__,__,__,__,__,__,__,__,__,__,__,E2,E2,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
274: /*ex E2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
275: /*exp E3*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,E3,E3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
276: /*tr T1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__,__},
277: /*tru T2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__,__},
278: /*true T3*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__,__},
279: /*fa F1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
280: /*fal F2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__,__},
281: /*fals F3*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__,__},
282: /*false F4*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__,__,__},
283: /*nu N1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__,__},
284: /*nul N2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__,__},
285: /*null N3*/ {__,__,__,__,__,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,OK,__,__,__,__,__,__,__,__,__},
286: /*/ C1*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,C2},
287: /*/* C2*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3},
288: /** C3*/ {C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,CE,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3},
289: /*_. FX*/ {OK,OK,__,-8,__,-7,__,-3,__,__,__,__,__,__,FR,FR,__,__,__,__,E1,__,__,__,__,__,__,__,__,E1,__,__},
290: /*\ D1*/ {__,__,__,__,__,__,__,__,__,D2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__},
291: /*\ D2*/ {__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U1,__,__,__,__},
292: };
293:
294:
295: /*
296: These modes can be pushed on the stack.
297: */
298: enum modes {
299: MODE_ARRAY = 1,
300: MODE_DONE = 2,
301: MODE_KEY = 3,
302: MODE_OBJECT = 4
303: };
304:
305: static void set_error(JSON_parser jc)
306: {
307: switch (jc->state) {
308: case GO:
309: switch (jc->current_char) {
310: case '{': case '}': case '[': case ']':
311: jc->error = JSON_E_UNBALANCED_COLLECTION;
312: break;
313: default:
314: jc->error = JSON_E_INVALID_CHAR;
315: break;
316: }
317: break;
318: case OB:
319: jc->error = JSON_E_EXPECTED_KEY;
320: break;
321: case AR:
322: jc->error = JSON_E_UNBALANCED_COLLECTION;
323: break;
324: case CO:
325: jc->error = JSON_E_EXPECTED_COLON;
326: break;
327: case KE:
328: jc->error = JSON_E_EXPECTED_KEY;
329: break;
330: /* \uXXXX\uYYYY */
331: case U1: case U2: case U3:case U4: case D1: case D2:
332: jc->error = JSON_E_INVALID_UNICODE_SEQUENCE;
333: break;
334: /* true, false, null */
335: case T1: case T2: case T3: case F1: case F2: case F3: case F4: case N1: case N2: case N3:
336: jc->error = JSON_E_INVALID_KEYWORD;
337: break;
338: /* minus, integer, fraction, exponent */
339: case MI: case ZE: case IT: case FR: case E1: case E2: case E3:
340: jc->error = JSON_E_INVALID_NUMBER;
341: break;
342: default:
343: jc->error = JSON_E_INVALID_CHAR;
344: break;
345: }
346: }
347:
348: static int
349: push(JSON_parser jc, int mode)
350: {
351: /*
352: Push a mode onto the stack. Return false if there is overflow.
353: */
354: assert(jc->top <= jc->stack_capacity);
355:
356: if (jc->depth < 0) {
357: if (jc->top == jc->stack_capacity) {
358: const size_t bytes_to_copy = jc->stack_capacity * sizeof(jc->stack[0]);
359: const size_t new_capacity = jc->stack_capacity * 2;
360: const size_t bytes_to_allocate = new_capacity * sizeof(jc->stack[0]);
361: void* mem = JSON_parser_malloc(bytes_to_allocate, "stack");
362: if (!mem) {
363: jc->error = JSON_E_OUT_OF_MEMORY;
364: return false;
365: }
366: jc->stack_capacity = (int)new_capacity;
367: memcpy(mem, jc->stack, bytes_to_copy);
368: if (jc->stack != &jc->static_stack[0]) {
369: JSON_parser_free(jc->stack);
370: }
371: jc->stack = (signed char*)mem;
372: }
373: } else {
374: if (jc->top == jc->depth) {
375: jc->error = JSON_E_NESTING_DEPTH_REACHED;
376: return false;
377: }
378: }
379: jc->stack[++jc->top] = (signed char)mode;
380: return true;
381: }
382:
383:
384: static int
385: pop(JSON_parser jc, int mode)
386: {
387: /*
388: Pop the stack, assuring that the current mode matches the expectation.
389: Return false if there is underflow or if the modes mismatch.
390: */
391: if (jc->top < 0 || jc->stack[jc->top] != mode) {
392: return false;
393: }
394: jc->top -= 1;
395: return true;
396: }
397:
398:
399: #define parse_buffer_clear(jc) \
400: do {\
401: jc->parse_buffer_count = 0;\
402: jc->parse_buffer[0] = 0;\
403: } while (0)
404:
405: #define parse_buffer_pop_back_char(jc)\
406: do {\
407: assert(jc->parse_buffer_count >= 1);\
408: --jc->parse_buffer_count;\
409: jc->parse_buffer[jc->parse_buffer_count] = 0;\
410: } while (0)
411:
412: void delete_JSON_parser(JSON_parser jc)
413: {
414: if (jc) {
415: if (jc->stack != &jc->static_stack[0]) {
416: JSON_parser_free((void*)jc->stack);
417: }
418: if (jc->parse_buffer != &jc->static_parse_buffer[0]) {
419: JSON_parser_free((void*)jc->parse_buffer);
420: }
421: JSON_parser_free((void*)jc);
422: }
423: }
424:
425:
426: JSON_parser
427: new_JSON_parser(JSON_config* config)
428: {
429: /*
430: new_JSON_parser starts the checking process by constructing a JSON_parser
431: object. It takes a depth parameter that restricts the level of maximum
432: nesting.
433:
434: To continue the process, call JSON_parser_char for each character in the
435: JSON text, and then call JSON_parser_done to obtain the final result.
436: These functions are fully reentrant.
437: */
438:
439: int depth = 0;
440: JSON_config default_config;
441:
1.2 moko 442: JSON_parser jc = (JSON_parser)JSON_parser_malloc(sizeof(struct JSON_parser_struct), "parser");
1.1 misha 443:
444: if (jc == NULL) {
445: return NULL;
446: }
447:
448: memset(jc, 0, sizeof(*jc));
449:
450: /* initialize configuration */
451: init_JSON_config(&default_config);
452:
453: /* set to default configuration if none was provided */
454: if (config == NULL) {
455: config = &default_config;
456: }
457:
458: depth = config->depth;
459:
460: /* We need to be able to push at least one object */
461: if (depth == 0) {
462: depth = 1;
463: }
464:
465: jc->state = GO;
466: jc->top = -1;
467:
468: /* Do we want non-bound stack? */
469: if (depth > 0) {
470: jc->stack_capacity = depth;
471: jc->depth = depth;
472: if (depth <= (int)COUNTOF(jc->static_stack)) {
473: jc->stack = &jc->static_stack[0];
474: } else {
475: jc->stack = (signed char*)JSON_parser_malloc(jc->stack_capacity * sizeof(jc->stack[0]), "stack");
476: if (jc->stack == NULL) {
477: JSON_parser_free(jc);
478: return NULL;
479: }
480: }
481: } else {
482: jc->stack_capacity = (int)COUNTOF(jc->static_stack);
483: jc->depth = -1;
484: jc->stack = &jc->static_stack[0];
485: }
486:
487: /* set parser to start */
488: push(jc, MODE_DONE);
489:
490: /* set up the parse buffer */
491: jc->parse_buffer = &jc->static_parse_buffer[0];
492: jc->parse_buffer_capacity = COUNTOF(jc->static_parse_buffer);
493: parse_buffer_clear(jc);
494:
495: /* set up callback, comment & float handling */
496: jc->callback = config->callback;
497: jc->ctx = config->callback_ctx;
498: jc->allow_comments = (signed char)config->allow_comments != 0;
499: jc->handle_floats_manually = (signed char)config->handle_floats_manually != 0;
500:
501: /* set up decimal point */
1.5 ! moko 502: jc->decimal_point = '.';
1.1 misha 503:
504: return jc;
505: }
506:
507: static int parse_buffer_grow(JSON_parser jc)
508: {
509: const size_t bytes_to_copy = jc->parse_buffer_count * sizeof(jc->parse_buffer[0]);
510: const size_t new_capacity = jc->parse_buffer_capacity * 2;
511: const size_t bytes_to_allocate = new_capacity * sizeof(jc->parse_buffer[0]);
512: void* mem = JSON_parser_malloc(bytes_to_allocate, "parse buffer");
513:
514: if (mem == NULL) {
515: jc->error = JSON_E_OUT_OF_MEMORY;
516: return false;
517: }
518:
519: assert(new_capacity > 0);
520: memcpy(mem, jc->parse_buffer, bytes_to_copy);
521:
522: if (jc->parse_buffer != &jc->static_parse_buffer[0]) {
523: JSON_parser_free(jc->parse_buffer);
524: }
525:
526: jc->parse_buffer = (char*)mem;
527: jc->parse_buffer_capacity = new_capacity;
528:
529: return true;
530: }
531:
532: static int parse_buffer_reserve_for(JSON_parser jc, unsigned chars)
533: {
534: while (jc->parse_buffer_count + chars + 1 > jc->parse_buffer_capacity) {
535: if (!parse_buffer_grow(jc)) {
536: assert(jc->error == JSON_E_OUT_OF_MEMORY);
537: return false;
538: }
539: }
540:
541: return true;
542: }
543:
544: #define parse_buffer_has_space_for(jc, count) \
545: (jc->parse_buffer_count + (count) + 1 <= jc->parse_buffer_capacity)
546:
547: #define parse_buffer_push_back_char(jc, c)\
548: do {\
549: assert(parse_buffer_has_space_for(jc, 1)); \
550: jc->parse_buffer[jc->parse_buffer_count++] = c;\
551: jc->parse_buffer[jc->parse_buffer_count] = 0;\
552: } while (0)
553:
554: #define assert_is_non_container_type(jc) \
555: assert( \
556: jc->type == JSON_T_NULL || \
557: jc->type == JSON_T_FALSE || \
558: jc->type == JSON_T_TRUE || \
559: jc->type == JSON_T_FLOAT || \
560: jc->type == JSON_T_INTEGER || \
561: jc->type == JSON_T_STRING)
562:
563:
564: static int parse_parse_buffer(JSON_parser jc)
565: {
566: if (jc->callback) {
567: JSON_value value, *arg = NULL;
568:
569: if (jc->type != JSON_T_NONE) {
570: assert_is_non_container_type(jc);
571:
572: switch(jc->type) {
573: case JSON_T_FLOAT:
574: arg = &value;
575: if (jc->handle_floats_manually) {
576: value.vu.str.value = jc->parse_buffer;
577: value.vu.str.length = jc->parse_buffer_count;
578: } else {
579: /* not checking with end pointer b/c there may be trailing ws */
580: value.vu.float_value = strtod(jc->parse_buffer, NULL);
581: }
582: break;
583: case JSON_T_INTEGER:
584: arg = &value;
585: sscanf(jc->parse_buffer, JSON_PARSER_INTEGER_SSCANF_TOKEN, &value.vu.integer_value);
586: break;
587: case JSON_T_STRING:
588: arg = &value;
589: value.vu.str.value = jc->parse_buffer;
590: value.vu.str.length = jc->parse_buffer_count;
591: break;
592: }
593:
594: if (!(*jc->callback)(jc->ctx, jc->type, arg)) {
595: return false;
596: }
597: }
598: }
599:
600: parse_buffer_clear(jc);
601:
602: return true;
603: }
604:
605: #define IS_HIGH_SURROGATE(uc) (((uc) & 0xFC00) == 0xD800)
606: #define IS_LOW_SURROGATE(uc) (((uc) & 0xFC00) == 0xDC00)
607: #define DECODE_SURROGATE_PAIR(hi,lo) ((((hi) & 0x3FF) << 10) + ((lo) & 0x3FF) + 0x10000)
608: static unsigned char utf8_lead_bits[4] = { 0x00, 0xC0, 0xE0, 0xF0 };
609:
610: static int decode_unicode_char(JSON_parser jc)
611: {
612: int i;
613: unsigned uc = 0;
614: char* p;
615: int trail_bytes;
616:
617: assert(jc->parse_buffer_count >= 6);
618:
619: p = &jc->parse_buffer[jc->parse_buffer_count - 4];
620:
621: for (i = 12; i >= 0; i -= 4, ++p) {
622: unsigned x = *p;
623:
624: if (x >= 'a') {
625: x -= ('a' - 10);
626: } else if (x >= 'A') {
627: x -= ('A' - 10);
628: } else {
629: x &= ~0x30u;
630: }
631:
632: assert(x < 16);
633:
634: uc |= x << i;
635: }
636:
637: /* clear UTF-16 char from buffer */
638: jc->parse_buffer_count -= 6;
639: jc->parse_buffer[jc->parse_buffer_count] = 0;
640:
641: /* attempt decoding ... */
642: if (jc->utf16_high_surrogate) {
643: if (IS_LOW_SURROGATE(uc)) {
644: uc = DECODE_SURROGATE_PAIR(jc->utf16_high_surrogate, uc);
645: trail_bytes = 3;
646: jc->utf16_high_surrogate = 0;
647: } else {
648: /* high surrogate without a following low surrogate */
649: return false;
650: }
651: } else {
652: if (uc < 0x80) {
653: trail_bytes = 0;
654: } else if (uc < 0x800) {
655: trail_bytes = 1;
656: } else if (IS_HIGH_SURROGATE(uc)) {
657: /* save the high surrogate and wait for the low surrogate */
658: jc->utf16_high_surrogate = (UTF16)uc;
659: return true;
660: } else if (IS_LOW_SURROGATE(uc)) {
661: /* low surrogate without a preceding high surrogate */
662: return false;
663: } else {
664: trail_bytes = 2;
665: }
666: }
667:
668: jc->parse_buffer[jc->parse_buffer_count++] = (char) ((uc >> (trail_bytes * 6)) | utf8_lead_bits[trail_bytes]);
669:
670: for (i = trail_bytes * 6 - 6; i >= 0; i -= 6) {
671: jc->parse_buffer[jc->parse_buffer_count++] = (char) (((uc >> i) & 0x3F) | 0x80);
672: }
673:
674: jc->parse_buffer[jc->parse_buffer_count] = 0;
675:
676: return true;
677: }
678:
679: static int add_escaped_char_to_parse_buffer(JSON_parser jc, int next_char)
680: {
681: assert(parse_buffer_has_space_for(jc, 1));
682:
683: jc->escaped = 0;
684: /* remove the backslash */
685: parse_buffer_pop_back_char(jc);
686: switch(next_char) {
687: case 'b':
688: parse_buffer_push_back_char(jc, '\b');
689: break;
690: case 'f':
691: parse_buffer_push_back_char(jc, '\f');
692: break;
693: case 'n':
694: parse_buffer_push_back_char(jc, '\n');
695: break;
696: case 'r':
697: parse_buffer_push_back_char(jc, '\r');
698: break;
699: case 't':
700: parse_buffer_push_back_char(jc, '\t');
701: break;
702: case '"':
703: parse_buffer_push_back_char(jc, '"');
704: break;
705: case '\\':
706: parse_buffer_push_back_char(jc, '\\');
707: break;
708: case '/':
709: parse_buffer_push_back_char(jc, '/');
710: break;
711: case 'u':
712: parse_buffer_push_back_char(jc, '\\');
713: parse_buffer_push_back_char(jc, 'u');
714: break;
715: default:
716: return false;
717: }
718:
719: return true;
720: }
721:
722: static int add_char_to_parse_buffer(JSON_parser jc, int next_char, int next_class)
723: {
724: if (!parse_buffer_reserve_for(jc, 1)) {
725: assert(JSON_E_OUT_OF_MEMORY == jc->error);
726: return false;
727: }
728:
729: if (jc->escaped) {
730: if (!add_escaped_char_to_parse_buffer(jc, next_char)) {
731: jc->error = JSON_E_INVALID_ESCAPE_SEQUENCE;
732: return false;
733: }
734: } else if (!jc->comment) {
735: if ((jc->type != JSON_T_NONE) | !((next_class == C_SPACE) | (next_class == C_WHITE)) /* non-white-space */) {
736: parse_buffer_push_back_char(jc, (char)next_char);
737: }
738: }
739:
740: return true;
741: }
742:
743: #define assert_type_isnt_string_null_or_bool(jc) \
744: assert(jc->type != JSON_T_FALSE); \
745: assert(jc->type != JSON_T_TRUE); \
746: assert(jc->type != JSON_T_NULL); \
747: assert(jc->type != JSON_T_STRING)
748:
749:
750: int
751: JSON_parser_char(JSON_parser jc, int next_char)
752: {
753: /*
754: After calling new_JSON_parser, call this function for each character (or
755: partial character) in your JSON text. It can accept UTF-8, UTF-16, or
756: UTF-32. It returns true if things are looking ok so far. If it rejects the
757: text, it returns false.
758: */
759: int next_class, next_state;
760:
761: /*
762: Store the current char for error handling
763: */
764: jc->current_char = next_char;
765:
766: /*
767: Determine the character's class.
768: */
769: if (next_char < 0) {
770: jc->error = JSON_E_INVALID_CHAR;
771: return false;
772: }
773: if (next_char >= 128) {
774: next_class = C_ETC;
775: } else {
776: next_class = ascii_class[next_char];
777: if (next_class <= __) {
778: set_error(jc);
779: return false;
780: }
781: }
782:
783: if (!add_char_to_parse_buffer(jc, next_char, next_class)) {
784: return false;
785: }
786:
787: /*
788: Get the next state from the state transition table.
789: */
790: next_state = state_transition_table[jc->state][next_class];
791: if (next_state >= 0) {
792: /*
793: Change the state.
794: */
795: jc->state = (signed char)next_state;
796: } else {
797: /*
798: Or perform one of the actions.
799: */
800: switch (next_state) {
801: /* Unicode character */
802: case UC:
803: if(!decode_unicode_char(jc)) {
804: jc->error = JSON_E_INVALID_UNICODE_SEQUENCE;
805: return false;
806: }
807: /* check if we need to read a second UTF-16 char */
808: if (jc->utf16_high_surrogate) {
809: jc->state = D1;
810: } else {
811: jc->state = ST;
812: }
813: break;
814: /* escaped char */
815: case EX:
816: jc->escaped = 1;
1.4 misha 817: jc->state = ESC;
1.1 misha 818: break;
819: /* integer detected by minus */
820: case MX:
821: jc->type = JSON_T_INTEGER;
822: jc->state = MI;
823: break;
824: /* integer detected by zero */
825: case ZX:
826: jc->type = JSON_T_INTEGER;
827: jc->state = ZE;
828: break;
829: /* integer detected by 1-9 */
830: case IX:
831: jc->type = JSON_T_INTEGER;
832: jc->state = IT;
833: break;
834:
835: /* floating point number detected by exponent*/
836: case DE:
837: assert_type_isnt_string_null_or_bool(jc);
838: jc->type = JSON_T_FLOAT;
839: jc->state = E1;
840: break;
841:
842: /* floating point number detected by fraction */
843: case DF:
844: assert_type_isnt_string_null_or_bool(jc);
845: if (!jc->handle_floats_manually) {
846: /*
847: Some versions of strtod (which underlies sscanf) don't support converting
848: C-locale formated floating point values.
849: */
850: assert(jc->parse_buffer[jc->parse_buffer_count-1] == '.');
851: jc->parse_buffer[jc->parse_buffer_count-1] = jc->decimal_point;
852: }
853: jc->type = JSON_T_FLOAT;
854: jc->state = FX;
855: break;
856: /* string begin " */
857: case SB:
858: parse_buffer_clear(jc);
859: assert(jc->type == JSON_T_NONE);
860: jc->type = JSON_T_STRING;
861: jc->state = ST;
862: break;
863:
864: /* n */
865: case NU:
866: assert(jc->type == JSON_T_NONE);
867: jc->type = JSON_T_NULL;
868: jc->state = N1;
869: break;
870: /* f */
871: case FA:
872: assert(jc->type == JSON_T_NONE);
873: jc->type = JSON_T_FALSE;
874: jc->state = F1;
875: break;
876: /* t */
877: case TR:
878: assert(jc->type == JSON_T_NONE);
879: jc->type = JSON_T_TRUE;
880: jc->state = T1;
881: break;
882:
883: /* closing comment */
884: case CE:
885: jc->comment = 0;
886: assert(jc->parse_buffer_count == 0);
887: assert(jc->type == JSON_T_NONE);
888: jc->state = jc->before_comment_state;
889: break;
890:
891: /* opening comment */
892: case CB:
893: if (!jc->allow_comments) {
894: return false;
895: }
896: parse_buffer_pop_back_char(jc);
897: if (!parse_parse_buffer(jc)) {
898: return false;
899: }
900: assert(jc->parse_buffer_count == 0);
901: assert(jc->type != JSON_T_STRING);
902: switch (jc->stack[jc->top]) {
903: case MODE_ARRAY:
904: case MODE_OBJECT:
905: switch(jc->state) {
906: case VA:
907: case AR:
908: jc->before_comment_state = jc->state;
909: break;
910: default:
911: jc->before_comment_state = OK;
912: break;
913: }
914: break;
915: default:
916: jc->before_comment_state = jc->state;
917: break;
918: }
919: jc->type = JSON_T_NONE;
920: jc->state = C1;
921: jc->comment = 1;
922: break;
923: /* empty } */
924: case -9:
925: parse_buffer_clear(jc);
926: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) {
927: return false;
928: }
929: if (!pop(jc, MODE_KEY)) {
930: return false;
931: }
932: jc->state = OK;
933: break;
934:
935: /* } */ case -8:
936: parse_buffer_pop_back_char(jc);
937: if (!parse_parse_buffer(jc)) {
938: return false;
939: }
940: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_END, NULL)) {
941: return false;
942: }
943: if (!pop(jc, MODE_OBJECT)) {
944: jc->error = JSON_E_UNBALANCED_COLLECTION;
945: return false;
946: }
947: jc->type = JSON_T_NONE;
948: jc->state = OK;
949: break;
950:
951: /* ] */ case -7:
952: parse_buffer_pop_back_char(jc);
953: if (!parse_parse_buffer(jc)) {
954: return false;
955: }
956: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_ARRAY_END, NULL)) {
957: return false;
958: }
959: if (!pop(jc, MODE_ARRAY)) {
960: jc->error = JSON_E_UNBALANCED_COLLECTION;
961: return false;
962: }
963:
964: jc->type = JSON_T_NONE;
965: jc->state = OK;
966: break;
967:
968: /* { */ case -6:
969: parse_buffer_pop_back_char(jc);
970: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_OBJECT_BEGIN, NULL)) {
971: return false;
972: }
973: if (!push(jc, MODE_KEY)) {
974: return false;
975: }
976: assert(jc->type == JSON_T_NONE);
977: jc->state = OB;
978: break;
979:
980: /* [ */ case -5:
981: parse_buffer_pop_back_char(jc);
982: if (jc->callback && !(*jc->callback)(jc->ctx, JSON_T_ARRAY_BEGIN, NULL)) {
983: return false;
984: }
985: if (!push(jc, MODE_ARRAY)) {
986: return false;
987: }
988: assert(jc->type == JSON_T_NONE);
989: jc->state = AR;
990: break;
991:
992: /* string end " */ case -4:
993: parse_buffer_pop_back_char(jc);
994: switch (jc->stack[jc->top]) {
995: case MODE_KEY:
996: assert(jc->type == JSON_T_STRING);
997: jc->type = JSON_T_NONE;
998: jc->state = CO;
999:
1000: if (jc->callback) {
1001: JSON_value value;
1002: value.vu.str.value = jc->parse_buffer;
1003: value.vu.str.length = jc->parse_buffer_count;
1004: if (!(*jc->callback)(jc->ctx, JSON_T_KEY, &value)) {
1005: return false;
1006: }
1007: }
1008: parse_buffer_clear(jc);
1009: break;
1010: case MODE_ARRAY:
1011: case MODE_OBJECT:
1012: assert(jc->type == JSON_T_STRING);
1013: if (!parse_parse_buffer(jc)) {
1014: return false;
1015: }
1016: jc->type = JSON_T_NONE;
1017: jc->state = OK;
1018: break;
1019: default:
1020: return false;
1021: }
1022: break;
1023:
1024: /* , */ case -3:
1025: parse_buffer_pop_back_char(jc);
1026: if (!parse_parse_buffer(jc)) {
1027: return false;
1028: }
1029: switch (jc->stack[jc->top]) {
1030: case MODE_OBJECT:
1031: /*
1032: A comma causes a flip from object mode to key mode.
1033: */
1034: if (!pop(jc, MODE_OBJECT) || !push(jc, MODE_KEY)) {
1035: return false;
1036: }
1037: assert(jc->type != JSON_T_STRING);
1038: jc->type = JSON_T_NONE;
1039: jc->state = KE;
1040: break;
1041: case MODE_ARRAY:
1042: assert(jc->type != JSON_T_STRING);
1043: jc->type = JSON_T_NONE;
1044: jc->state = VA;
1045: break;
1046: default:
1047: return false;
1048: }
1049: break;
1050:
1051: /* : */ case -2:
1052: /*
1053: A colon causes a flip from key mode to object mode.
1054: */
1055: parse_buffer_pop_back_char(jc);
1056: if (!pop(jc, MODE_KEY) || !push(jc, MODE_OBJECT)) {
1057: return false;
1058: }
1059: assert(jc->type == JSON_T_NONE);
1060: jc->state = VA;
1061: break;
1062: /*
1063: Bad action.
1064: */
1065: default:
1066: set_error(jc);
1067: return false;
1068: }
1069: }
1070: return true;
1071: }
1072:
1073: int
1074: JSON_parser_done(JSON_parser jc)
1075: {
1076: if ((jc->state == OK || jc->state == GO) && pop(jc, MODE_DONE))
1077: {
1078: return true;
1079: }
1080:
1081: jc->error = JSON_E_UNBALANCED_COLLECTION;
1082: return false;
1083: }
1084:
1085:
1086: int JSON_parser_is_legal_white_space_string(const char* s)
1087: {
1088: int c, char_class;
1089:
1090: if (s == NULL) {
1091: return false;
1092: }
1093:
1094: for (; *s; ++s) {
1095: c = *s;
1096:
1097: if (c < 0 || c >= 128) {
1098: return false;
1099: }
1100:
1101: char_class = ascii_class[c];
1102:
1103: if (char_class != C_SPACE && char_class != C_WHITE) {
1104: return false;
1105: }
1106: }
1107:
1108: return true;
1109: }
1110:
1111: int JSON_parser_get_last_error(JSON_parser jc)
1112: {
1113: return jc->error;
1114: }
1115:
1116:
1117: void init_JSON_config(JSON_config* config)
1118: {
1119: if (config) {
1120: memset(config, 0, sizeof(*config));
1121:
1122: config->depth = JSON_PARSER_STACK_SIZE - 1;
1123: }
1124: }
E-mail: