Annotation of parser3/src/lib/json/json.c, revision 1.2
1.1 moko 1: /*
2: * Copyright (C) 2009 Vincent Hanquez <vincent@snarc.org>
3: *
4: * This program is free software; you can redistribute it and/or modify
5: * it under the terms of the GNU Lesser General Public License as published
6: * by the Free Software Foundation; version 2.1 or version 3.0 only.
7: *
8: * This program is distributed in the hope that it will be useful,
9: * but WITHOUT ANY WARRANTY; without even the implied warranty of
10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11: * GNU General Public License for more details.
12: */
13:
14: /*
15: * the class, states and state transition tables has been inspired by the JSON_parser.c
16: * available at http://json.org, but are quite different on the way that the
17: * parser handles its parse buffer and contains significant differences that affect
18: * the JSON compliance.
19: */
20:
21: #include "json.h"
22:
23: #ifdef TRACING_ENABLE
24: #include <stdio.h>
25: #define TRACING(fmt, ...) fprintf(stderr, "tracing: " fmt, ##__VA_ARGS__)
26: #else
27: #define TRACING(fmt, ...) ((void) 0)
28: #endif
29:
30: enum classes {
31: C_SPACE, /* space */
32: C_NL, /* newline */
33: C_WHITE, /* tab, CR */
34: C_LCURB, C_RCURB, /* object opening/closing */
35: C_LSQRB, C_RSQRB, /* array opening/closing */
36: /* syntax symbols */
37: C_COLON,
38: C_COMMA,
39: C_QUOTE, /* " */
40: C_BACKS, /* \ */
41: C_SLASH, /* / */
42: C_PLUS,
43: C_MINUS,
44: C_DOT,
45: C_ZERO, C_DIGIT, /* digits */
46: C_a, C_b, C_c, C_d, C_e, C_f, C_l, C_n, C_r, C_s, C_t, C_u, /* nocaps letters */
47: C_ABCDF, C_E, /* caps letters */
48: C_OTHER, /* all other */
49: C_STAR, /* star in C style comment */
50: C_HASH, /* # for YAML comment */
51: C_ERROR = 0xfe,
52: };
53:
54: /* map from character < 128 to classes. from 128 to 256 all C_OTHER */
55: static uint8_t character_class[128] = {
56: C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR,
57: C_ERROR, C_WHITE, C_NL, C_ERROR, C_ERROR, C_WHITE, C_ERROR, C_ERROR,
58: C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR,
59: C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR,
60:
61: C_SPACE, C_OTHER, C_QUOTE, C_HASH, C_OTHER, C_OTHER, C_OTHER, C_OTHER,
62: C_OTHER, C_OTHER, C_STAR, C_PLUS, C_COMMA, C_MINUS, C_DOT, C_SLASH,
63: C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
64: C_DIGIT, C_DIGIT, C_COLON, C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER,
65:
66: C_OTHER, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_OTHER,
67: C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER,
68: C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER,
69: C_OTHER, C_OTHER, C_OTHER, C_LSQRB, C_BACKS, C_RSQRB, C_OTHER, C_OTHER,
70:
71: C_OTHER, C_a, C_b, C_c, C_d, C_e, C_f, C_OTHER,
72: C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_l, C_OTHER, C_n, C_OTHER,
73: C_OTHER, C_OTHER, C_r, C_s, C_t, C_u, C_OTHER, C_OTHER,
74: C_OTHER, C_OTHER, C_OTHER, C_LCURB, C_OTHER, C_RCURB, C_OTHER, C_OTHER
75: };
76:
77: /* only the first 36 ascii characters need an escape */
78: static char *character_escape[36] = {
79: "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", "\\u0007", /* 0-7 */
80: "\\b" , "\\t", "\\n", "\\u000b", "\\f", "\\r", "\\u000e", "\\u000f", /* 8-f */
81: "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", "\\u0015", "\\u0016", "\\u0017", /* 10-17 */
82: "\\u0018", "\\u0019", "\\u001a", "\\u001b", "\\u001c", "\\u001d", "\\u001e", "\\u001f", /* 18-1f */
83: " " , "!" , "\\\"" , "#",
84: };
85:
86: /* define all states and actions that will be taken on each transition.
87: *
88: * states are defined first because of the fact they are use as index in the
89: * transitions table. they usually contains either a number or a prefix _
90: * for simple state like string, object, value ...
91: *
92: * actions are defined starting from 0x80. state error is defined as 0xff
93: */
94:
95: enum states {
96: STATE_GO, /* start */
97: STATE_OK, /* ok */
98: STATE__O, /* object */
99: STATE__K, /* key */
100: STATE_CO, /* colon */
101: STATE__V, /* value */
102: STATE__A, /* array */
103: STATE__S, /* string */
104: STATE_E0, /* escape */
105: STATE_U1, STATE_U2, STATE_U3, STATE_U4, /* unicode states */
106: STATE_M0, STATE_Z0, STATE_I0, /* number states */
107: STATE_R1, STATE_R2, /* real states (after-dot digits) */
108: STATE_X1, STATE_X2, STATE_X3, /* exponant states */
109: STATE_T1, STATE_T2, STATE_T3, /* true constant states */
110: STATE_F1, STATE_F2, STATE_F3, STATE_F4, /* false constant states */
111: STATE_N1, STATE_N2, STATE_N3, /* null constant states */
112: STATE_C1, STATE_C2, STATE_C3, /* C-comment states */
113: STATE_Y1, /* YAML-comment state */
114: STATE_D1, STATE_D2, /* multi unicode states */
115: };
116:
117: /* the following are actions that need to be taken */
118: enum actions {
119: STATE_KS = 0x80, /* key separator */
120: STATE_SP, /* comma separator */
121: STATE_AB, /* array begin */
122: STATE_AE, /* array ending */
123: STATE_OB, /* object begin */
124: STATE_OE, /* object end */
125: STATE_CB, /* C-comment begin */
126: STATE_YB, /* YAML-comment begin */
127: STATE_CE, /* YAML/C comment end */
128: STATE_FA, /* false */
129: STATE_TR, /* true */
130: STATE_NU, /* null */
131: STATE_DE, /* double detected by exponent */
132: STATE_DF, /* double detected by . */
133: STATE_SE, /* string end */
134: STATE_MX, /* integer detected by minus */
135: STATE_ZX, /* integer detected by zero */
136: STATE_IX, /* integer detected by 1-9 */
137: STATE_UC, /* Unicode character read */
138: };
139:
140: /* error state */
141: #define STATE___ 0xff
142:
143: #define NR_STATES (STATE_D2 + 1)
144: #define NR_CLASSES (C_HASH + 1)
145:
146: #define IS_STATE_ACTION(s) ((s) & 0x80)
147: #define S(x) STATE_##x
148: #define PT_(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a1,b1,c1,d1,e1,f1,g1,h1) \
149: { S(a),S(b),S(c),S(d),S(e),S(f),S(g),S(h),S(i),S(j),S(k),S(l),S(m),S(n), \
150: S(o),S(p),S(q),S(r),S(s),S(t),S(u),S(v),S(w),S(x),S(y),S(z),S(a1),S(b1), \
151: S(c1),S(d1),S(e1),S(f1),S(g1),S(h1) }
152:
153: /* map from the (previous state+new character class) to the next parser transition */
154: static const uint8_t state_transition_table[NR_STATES][NR_CLASSES] = {
155: /* white ABCDF other */
156: /* sp nl | { } [ ] : , " \ / + - . 0 19 a b c d e f l n r s t u | E | * # */
157: /*GO*/ PT_(GO,GO,GO,OB,__,AB,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,YB),
158: /*OK*/ PT_(OK,OK,OK,__,OE,__,AE,__,SP,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,YB),
159: /*_O*/ PT_(_O,_O,_O,__,OE,__,__,__,__,_S,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,YB),
160: /*_K*/ PT_(_K,_K,_K,__,__,__,__,__,__,_S,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,YB),
161: /*CO*/ PT_(CO,CO,CO,__,__,__,__,KS,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,YB),
162: /*_V*/ PT_(_V,_V,_V,OB,__,AB,__,__,__,_S,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__,__,YB),
163: /*_A*/ PT_(_A,_A,_A,OB,__,AB,AE,__,__,_S,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__,__,YB),
164: /****************************************************************************************************************/
165: /*_S*/ PT_(_S,__,__,_S,_S,_S,_S,_S,_S,SE,E0,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S),
166: /*E0*/ PT_(__,__,__,__,__,__,__,__,__,_S,_S,_S,__,__,__,__,__,__,_S,__,__,__,_S,__,_S,_S,__,_S,U1,__,__,__,__,__),
167: /*U1*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__,__,__),
168: /*U2*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__,__,__),
169: /*U3*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__,__,__),
170: /*U4*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,UC,UC,UC,UC,UC,UC,UC,UC,__,__,__,__,__,__,UC,UC,__,__,__),
171: /****************************************************************************************************************/
172: /*M0*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,Z0,I0,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__),
173: /*Z0*/ PT_(OK,OK,OK,__,OE,__,AE,__,SP,__,__,CB,__,__,DF,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,YB),
174: /*I0*/ PT_(OK,OK,OK,__,OE,__,AE,__,SP,__,__,CB,__,__,DF,I0,I0,__,__,__,__,DE,__,__,__,__,__,__,__,__,DE,__,__,YB),
175: /*R1*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,R2,R2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__),
176: /*R2*/ PT_(OK,OK,OK,__,OE,__,AE,__,SP,__,__,CB,__,__,__,R2,R2,__,__,__,__,X1,__,__,__,__,__,__,__,__,X1,__,__,YB),
177: /*X1*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,X2,X2,__,X3,X3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__),
178: /*X2*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,X3,X3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__),
179: /*X3*/ PT_(OK,OK,OK,__,OE,__,AE,__,SP,__,__,__,__,__,__,X3,X3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__),
180: /****************************************************************************************************************/
181: /*T1*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__,__,__),
182: /*T2*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__,__,__),
183: /*T3*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,TR,__,__,__,__,__,__,__,__,__,__,__,__),
184: /*F1*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__),
185: /*F2*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__,__,__),
186: /*F3*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__,__,__),
187: /*F4*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,FA,__,__,__,__,__,__,__,__,__,__,__,__),
188: /*N1*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__,__,__),
189: /*N2*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__,__,__),
190: /*N3*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,NU,__,__,__,__,__,__,__,__,__,__),
191: /****************************************************************************************************************/
192: /*C1*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,C2,__),
193: /*C2*/ PT_(C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3,C2),
194: /*C3*/ PT_(C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,CE,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3,C2),
195: /*Y1*/ PT_(Y1,CE,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1),
196: /*D1*/ PT_(__,__,__,__,__,__,__,__,__,__,D2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__),
197: /*D2*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U1,__,__,__,__,__),
198: };
199: #undef S
200: #undef PT_
201:
202: /* map from (previous state+new character class) to the buffer policy. ignore=0/append=1/escape=2 */
203: static const uint8_t buffer_policy_table[NR_STATES][NR_CLASSES] = {
204: /* white ABCDF other */
205: /* sp nl | { } [ ] : , " \ / + - . 0 19 a b c d e f l n r s t u | E | * # */
206: /*GO*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
207: /*OK*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
208: /*_O*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
209: /*_K*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
210: /*CO*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
211: /*_V*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
212: /*_A*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
213: /**************************************************************************************************************/
214: /*_S*/ { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
215: /*E0*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0 },
216: /*U1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
217: /*U2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
218: /*U3*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
219: /*U4*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
220: /**************************************************************************************************************/
221: /*M0*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
222: /*Z0*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
223: /*I0*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 },
224: /*R1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
225: /*R2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 },
226: /*X1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
227: /*X2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
228: /*X3*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
229: /**************************************************************************************************************/
230: /*T1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
231: /*T2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
232: /*T3*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
233: /*F1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
234: /*F2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
235: /*F3*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
236: /*F4*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
237: /*N1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
238: /*N2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
239: /*N3*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
240: /**************************************************************************************************************/
241: /*C1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
242: /*C2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
243: /*C3*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
244: /*Y1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
245: /*D1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
246: /*D2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
247: };
248:
249: #define MODE_ARRAY 0
250: #define MODE_OBJECT 1
251:
252: #define parser_malloc(parser, s) parser->config.user_malloc(s)
253: #define parser_realloc(parser, p, s) parser->config.user_realloc(p, s)
254: #define parser_free(parser, p) parser->config.user_free(p)
255:
256: static int state_grow(json_parser *parser)
257: {
258: uint32_t newsize = parser->stack_size * 2;
259: void *ptr;
260:
261: if (parser->config.max_nesting != 0)
262: return JSON_ERROR_NESTING_LIMIT;
263:
264: ptr = parser_realloc(parser, parser->stack, newsize * sizeof(uint8_t));
265: if (!ptr)
266: return JSON_ERROR_NO_MEMORY;
1.2 ! moko 267: parser->stack = (uint8_t*)ptr;
1.1 moko 268: parser->stack_size = newsize;
269: return 0;
270: }
271:
1.2 ! moko 272: static int state_push(json_parser *parser, uint8_t mode)
1.1 moko 273: {
274: if (parser->stack_offset >= parser->stack_size) {
275: int ret = state_grow(parser);
276: if (ret)
277: return ret;
278: }
279: parser->stack[parser->stack_offset++] = mode;
280: return 0;
281: }
282:
1.2 ! moko 283: static int state_pop(json_parser *parser, uint8_t mode)
1.1 moko 284: {
285: if (parser->stack_offset == 0)
286: return JSON_ERROR_POP_EMPTY;
287: parser->stack_offset--;
288: if (parser->stack[parser->stack_offset] != mode)
289: return JSON_ERROR_POP_UNEXPECTED_MODE;
290: return 0;
291: }
292:
293: static int buffer_grow(json_parser *parser)
294: {
295: uint32_t newsize;
296: void *ptr;
1.2 ! moko 297: uint32_t max = parser->config.max_data;
1.1 moko 298:
299: if (max > 0 && parser->buffer_size == max)
300: return JSON_ERROR_DATA_LIMIT;
301: newsize = parser->buffer_size * 2;
302: if (max > 0 && newsize > max)
303: newsize = max;
304:
305: ptr = parser_realloc(parser, parser->buffer, newsize * sizeof(char));
306: if (!ptr)
307: return JSON_ERROR_NO_MEMORY;
1.2 ! moko 308: parser->buffer = (char *)ptr;
1.1 moko 309: parser->buffer_size = newsize;
310: return 0;
311: }
312:
313: static int buffer_push(json_parser *parser, unsigned char c)
314: {
315: int ret;
316:
317: if (parser->buffer_offset + 1 >= parser->buffer_size) {
318: ret = buffer_grow(parser);
319: if (ret)
320: return ret;
321: }
322: parser->buffer[parser->buffer_offset++] = c;
323: return 0;
324: }
325:
326: static int do_callback_withbuf(json_parser *parser, int type)
327: {
328: if (!parser->callback)
329: return 0;
330: parser->buffer[parser->buffer_offset] = '\0';
331: return (*parser->callback)(parser->userdata, type, parser->buffer, parser->buffer_offset);
332: }
333:
334: static int do_callback(json_parser *parser, int type)
335: {
336: if (!parser->callback)
337: return 0;
338: return (*parser->callback)(parser->userdata, type, NULL, 0);
339: }
340:
341: static int do_buffer(json_parser *parser)
342: {
343: int ret = 0;
344:
345: switch (parser->type) {
346: case JSON_KEY: case JSON_STRING:
347: case JSON_FLOAT: case JSON_INT:
348: case JSON_NULL: case JSON_TRUE: case JSON_FALSE:
349: ret = do_callback_withbuf(parser, parser->type);
350: if (ret)
351: return ret;
352: break;
353: default:
354: break;
355: }
356: parser->buffer_offset = 0;
357: return ret;
358: }
359:
360: static const uint8_t hextable[] = {
1.2 ! moko 361: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
! 362: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
! 363: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
! 364: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,255,255,255,255,255,255,
! 365: 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255,
! 366: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
! 367: 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255,
! 368: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
1.1 moko 369: };
370:
371: #define hex(c) (hextable[(uint8_t) c])
372:
373: /* high surrogate range from d800 to dbff */
374: /* low surrogate range dc00 to dfff */
375: #define IS_HIGH_SURROGATE(uc) (((uc) & 0xfc00) == 0xd800)
376: #define IS_LOW_SURROGATE(uc) (((uc) & 0xfc00) == 0xdc00)
377:
378: /* transform an unicode [0-9A-Fa-f]{4} sequence into a proper value */
379: static int decode_unicode_char(json_parser *parser)
380: {
381: uint32_t uval;
382: char *b = parser->buffer;
383: int offset = parser->buffer_offset;
384:
385: uval = (hex(b[offset - 4]) << 12) | (hex(b[offset - 3]) << 8)
386: | (hex(b[offset - 2]) << 4) | hex(b[offset - 1]);
387:
388: parser->buffer_offset -= 4;
389:
390: /* fast case */
391: if (!parser->unicode_multi && uval < 0x80) {
392: b[parser->buffer_offset++] = (char) uval;
393: return 0;
394: }
395:
396: if (parser->unicode_multi) {
397: if (!IS_LOW_SURROGATE(uval))
398: return JSON_ERROR_UNICODE_MISSING_LOW_SURROGATE;
399:
400: uval = 0x10000 + ((parser->unicode_multi & 0x3ff) << 10) + (uval & 0x3ff);
401: b[parser->buffer_offset++] = (char) ((uval >> 18) | 0xf0);
402: b[parser->buffer_offset++] = (char) (((uval >> 12) & 0x3f) | 0x80);
403: b[parser->buffer_offset++] = (char) (((uval >> 6) & 0x3f) | 0x80);
404: b[parser->buffer_offset++] = (char) ((uval & 0x3f) | 0x80);
405: parser->unicode_multi = 0;
406: return 0;
407: }
408:
409: if (IS_LOW_SURROGATE(uval))
410: return JSON_ERROR_UNICODE_UNEXPECTED_LOW_SURROGATE;
411: if (IS_HIGH_SURROGATE(uval)) {
412: parser->unicode_multi = uval;
413: return 0;
414: }
415:
416: if (uval < 0x800) {
417: b[parser->buffer_offset++] = (char) ((uval >> 6) | 0xc0);
418: b[parser->buffer_offset++] = (char) ((uval & 0x3f) | 0x80);
419: } else {
420: b[parser->buffer_offset++] = (char) ((uval >> 12) | 0xe0);
421: b[parser->buffer_offset++] = (char) (((uval >> 6) & 0x3f) | 0x80);
422: b[parser->buffer_offset++] = (char) (((uval >> 0) & 0x3f) | 0x80);
423: }
424: return 0;
425: }
426:
427: static int buffer_push_escape(json_parser *parser, unsigned char next)
428: {
429: char c = '\0';
430:
431: switch (next) {
432: case 'b': c = '\b'; break;
433: case 'f': c = '\f'; break;
434: case 'n': c = '\n'; break;
435: case 'r': c = '\r'; break;
436: case 't': c = '\t'; break;
437: case '"': c = '"'; break;
438: case '/': c = '/'; break;
439: case '\\': c = '\\'; break;
440: }
441: /* push the escaped character */
442: return buffer_push(parser, c);
443: }
444:
445: #define CHK(f) ({ ret = f; if (ret) return ret; })
446:
447: int act_uc(json_parser *parser)
448: {
449: int ret;
450: CHK(decode_unicode_char(parser));
451: parser->state = (parser->unicode_multi) ? STATE_D1 : STATE__S;
452: return 0;
453: }
454:
455: int act_yb(json_parser *parser)
456: {
457: if (!parser->config.allow_yaml_comments)
458: return JSON_ERROR_COMMENT_NOT_ALLOWED;
459: parser->save_state = parser->state;
460: return 0;
461: }
462:
463: int act_cb(json_parser *parser)
464: {
465: if (!parser->config.allow_c_comments)
466: return JSON_ERROR_COMMENT_NOT_ALLOWED;
467: parser->save_state = parser->state;
468: return 0;
469: }
470:
471: int act_ce(json_parser *parser)
472: {
473: parser->state = (parser->save_state > STATE__A) ? STATE_OK : parser->save_state;
474: return 0;
475: }
476:
477: int act_ob(json_parser *parser)
478: {
479: int ret;
480: CHK(do_callback(parser, JSON_OBJECT_BEGIN));
481: CHK(state_push(parser, MODE_OBJECT));
482: parser->expecting_key = 1;
483: return 0;
484: }
485:
486: int act_oe(json_parser *parser)
487: {
488: int ret;
489: CHK(do_callback(parser, JSON_OBJECT_END));
490: CHK(state_pop(parser, MODE_OBJECT));
491: parser->expecting_key = 0;
492: return 0;
493: }
494:
495: int act_ab(json_parser *parser)
496: {
497: int ret;
498: CHK(do_callback(parser, JSON_ARRAY_BEGIN));
499: CHK(state_push(parser, MODE_ARRAY));
500: return 0;
501: }
502: int act_ae(json_parser *parser)
503: {
504: int ret;
505: CHK(do_callback(parser, JSON_ARRAY_END));
506: CHK(state_pop(parser, MODE_ARRAY));
507: return 0;
508: }
509:
510: int act_se(json_parser *parser)
511: {
512: int ret;
513: CHK(do_callback_withbuf(parser, (parser->expecting_key) ? JSON_KEY : JSON_STRING));
514: parser->buffer_offset = 0;
515: parser->state = (parser->expecting_key) ? STATE_CO : STATE_OK;
516: parser->expecting_key = 0;
517: return 0;
518: }
519:
520: int act_sp(json_parser *parser)
521: {
522: if (parser->stack_offset == 0)
523: return JSON_ERROR_COMMA_OUT_OF_STRUCTURE;
524: if (parser->stack[parser->stack_offset - 1] == MODE_OBJECT) {
525: parser->expecting_key = 1;
526: parser->state = STATE__K;
527: } else
528: parser->state = STATE__V;
529: return 0;
530: }
531:
532: struct action_descr
533: {
534: int (*call)(json_parser *parser);
535: uint8_t type;
536: uint8_t state; /* 0 if we let the callback set the value it want */
537: uint8_t dobuffer;
538: };
539:
540: static struct action_descr actions_map[] = {
541: [STATE_MX & ~0x80] = { NULL, JSON_INT, STATE_M0, 0 },
542: [STATE_ZX & ~0x80] = { NULL, JSON_INT, STATE_Z0, 0 },
543: [STATE_IX & ~0x80] = { NULL, JSON_INT, STATE_I0, 0 },
544: [STATE_DE & ~0x80] = { NULL, JSON_FLOAT, STATE_X1, 0 },
545: [STATE_DF & ~0x80] = { NULL, JSON_FLOAT, STATE_R1, 0 },
546: [STATE_NU & ~0x80] = { NULL, JSON_NULL, STATE_OK, 0 },
547: [STATE_FA & ~0x80] = { NULL, JSON_FALSE, STATE_OK, 0 },
548: [STATE_TR & ~0x80] = { NULL, JSON_TRUE, STATE_OK, 0 },
549: [STATE_KS & ~0x80] = { NULL, JSON_NONE, STATE__V, 0 },
550: [STATE_UC & ~0x80] = { act_uc, JSON_NONE, 0, 0 },
551: [STATE_YB & ~0x80] = { act_yb, JSON_NONE, STATE_Y1, 1 },
552: [STATE_CB & ~0x80] = { act_cb, JSON_NONE, STATE_C1, 1 },
553: [STATE_CE & ~0x80] = { act_ce, JSON_NONE, 0, 0 },
554: [STATE_OB & ~0x80] = { act_ob, JSON_NONE, STATE__O, 0 },
555: [STATE_OE & ~0x80] = { act_oe, JSON_NONE, STATE_OK, 1 },
556: [STATE_AB & ~0x80] = { act_ab, JSON_NONE, STATE__A, 0 },
557: [STATE_AE & ~0x80] = { act_ae, JSON_NONE, STATE_OK, 1 },
558: [STATE_SE & ~0x80] = { act_se, JSON_NONE, 0, 0 },
559: [STATE_SP & ~0x80] = { act_sp, JSON_NONE, 0, 1 },
560: };
561:
562: static int do_action(json_parser *parser, int next_state)
563: {
564: struct action_descr *descr = &actions_map[next_state & ~0x80];
565:
566: if (descr->call) {
567: int ret;
568: if (descr->dobuffer)
569: CHK(do_buffer(parser));
570: CHK((descr->call)(parser));
571: }
572: if (descr->state)
573: parser->state = descr->state;
574: parser->type = descr->type;
575: return 0;
576: }
577:
578: /** json_parser_init initialize a parser structure taking a config,
579: * a config and its userdata.
580: * return JSON_ERROR_NO_MEMORY if memory allocation failed or SUCCESS.
581: */
582: int json_parser_init(json_parser *parser, json_config *config,
583: json_parser_callback callback, void *userdata)
584: {
585: memset(parser, 0, sizeof(*parser));
586:
587: if (config)
588: memcpy(&parser->config, config, sizeof(json_config));
589: parser->callback = callback;
590: parser->userdata = userdata;
591:
592: /* initialise parsing stack and state */
593: parser->stack_offset = 0;
594: parser->state = STATE_GO;
595:
596: /* initialize the parse stack */
597: parser->stack_size = (parser->config.max_nesting > 0)
598: ? parser->config.max_nesting
599: : LIBJSON_DEFAULT_STACK_SIZE;
600:
601: parser->stack = parser_malloc(parser, parser->stack_size * sizeof(parser->stack[0]));
602: if (!parser->stack)
603: return JSON_ERROR_NO_MEMORY;
604:
605: /* initialize the parse buffer */
606: parser->buffer_size = (parser->config.buffer_initial_size > 0)
607: ? parser->config.buffer_initial_size
608: : LIBJSON_DEFAULT_BUFFER_SIZE;
609:
610: if (parser->config.max_data > 0 && parser->buffer_size > parser->config.max_data)
611: parser->buffer_size = parser->config.max_data;
612:
613: parser->buffer = parser_malloc(parser, parser->buffer_size * sizeof(char));
614: if (!parser->buffer) {
615: parser_free(parser, parser->stack);
616: return JSON_ERROR_NO_MEMORY;
617: }
618: return 0;
619: }
620:
621: /** json_parser_free freed memory structure allocated by the parser */
622: int json_parser_free(json_parser *parser)
623: {
624: if (!parser)
625: return 0;
626: parser_free(parser, parser->stack);
627: parser_free(parser, parser->buffer);
628: parser->stack = NULL;
629: parser->buffer = NULL;
630: return 0;
631: }
632:
633: /** json_parser_is_done return 0 is the parser isn't in a finish state. !0 if it is */
634: int json_parser_is_done(json_parser *parser)
635: {
636: /* need to compare the state to !GO to not accept empty document */
637: return parser->stack_offset == 0 && parser->state != STATE_GO;
638: }
639:
640: /** json_parser_string append a string s with a specific length to the parser
641: * return 0 if everything went ok, a JSON_ERROR_* otherwise.
642: * the user can supplied a valid processed pointer that will
643: * be fill with the number of processed characters before returning */
644: int json_parser_string(json_parser *parser, const char *s,
645: uint32_t length, uint32_t *processed)
646: {
647: int ret;
648: int next_class, next_state;
649: int buffer_policy;
650: uint32_t i;
651:
652: ret = 0;
653: for (i = 0; i < length; i++) {
654: unsigned char ch = s[i];
655:
656: ret = 0;
657: next_class = (ch >= 128) ? C_OTHER : character_class[ch];
658: if (next_class == C_ERROR) {
659: ret = JSON_ERROR_BAD_CHAR;
660: break;
661: }
662:
663: next_state = state_transition_table[parser->state][next_class];
664: buffer_policy = buffer_policy_table[parser->state][next_class];
665: TRACING("addchar %d (current-state=%d, next-state=%d, buf-policy=%d)\n",
666: ch, parser->state, next_state, buffer_policy);
667: if (next_state == STATE___) {
668: ret = JSON_ERROR_UNEXPECTED_CHAR;
669: break;
670: }
671:
672: /* add char to buffer */
673: if (buffer_policy) {
674: ret = (buffer_policy == 2)
675: ? buffer_push_escape(parser, ch)
676: : buffer_push(parser, ch);
677: if (ret)
678: break;
679: }
680:
681: /* move to the next level */
682: if (IS_STATE_ACTION(next_state))
683: ret = do_action(parser, next_state);
684: else
685: parser->state = next_state;
686: if (ret)
687: break;
688: }
689: if (processed)
690: *processed = i;
691: return ret;
692: }
693:
694: /** json_parser_char append one single char to the parser
695: * return 0 if everything went ok, a JSON_ERROR_* otherwise */
696: int json_parser_char(json_parser *parser, unsigned char ch)
697: {
698: return json_parser_string(parser, (char *) &ch, 1, NULL);
699: }
E-mail: