Annotation of parser3/src/lib/json/json.c, revision 1.10
1.1 moko 1: /*
1.8 moko 2: * Copyright (C) 2009-2011 Vincent Hanquez <vincent@snarc.org>
1.1 moko 3: *
4: * This program is free software; you can redistribute it and/or modify
5: * it under the terms of the GNU Lesser General Public License as published
6: * by the Free Software Foundation; version 2.1 or version 3.0 only.
7: *
8: * This program is distributed in the hope that it will be useful,
9: * but WITHOUT ANY WARRANTY; without even the implied warranty of
10: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11: * GNU General Public License for more details.
12: */
13:
14: /*
15: * the class, states and state transition tables has been inspired by the JSON_parser.c
16: * available at http://json.org, but are quite different on the way that the
17: * parser handles its parse buffer and contains significant differences that affect
18: * the JSON compliance.
19: */
20:
21: #include "json.h"
22:
23: enum classes {
24: C_SPACE, /* space */
25: C_NL, /* newline */
26: C_WHITE, /* tab, CR */
27: C_LCURB, C_RCURB, /* object opening/closing */
28: C_LSQRB, C_RSQRB, /* array opening/closing */
29: /* syntax symbols */
30: C_COLON,
31: C_COMMA,
32: C_QUOTE, /* " */
33: C_BACKS, /* \ */
34: C_SLASH, /* / */
35: C_PLUS,
36: C_MINUS,
37: C_DOT,
38: C_ZERO, C_DIGIT, /* digits */
39: C_a, C_b, C_c, C_d, C_e, C_f, C_l, C_n, C_r, C_s, C_t, C_u, /* nocaps letters */
40: C_ABCDF, C_E, /* caps letters */
41: C_OTHER, /* all other */
42: C_STAR, /* star in C style comment */
43: C_HASH, /* # for YAML comment */
44: C_ERROR = 0xfe,
45: };
46:
47: /* map from character < 128 to classes. from 128 to 256 all C_OTHER */
48: static uint8_t character_class[128] = {
49: C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR,
50: C_ERROR, C_WHITE, C_NL, C_ERROR, C_ERROR, C_WHITE, C_ERROR, C_ERROR,
51: C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR,
52: C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR, C_ERROR,
53:
54: C_SPACE, C_OTHER, C_QUOTE, C_HASH, C_OTHER, C_OTHER, C_OTHER, C_OTHER,
55: C_OTHER, C_OTHER, C_STAR, C_PLUS, C_COMMA, C_MINUS, C_DOT, C_SLASH,
56: C_ZERO, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT, C_DIGIT,
57: C_DIGIT, C_DIGIT, C_COLON, C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER,
58:
59: C_OTHER, C_ABCDF, C_ABCDF, C_ABCDF, C_ABCDF, C_E, C_ABCDF, C_OTHER,
60: C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER,
61: C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_OTHER,
62: C_OTHER, C_OTHER, C_OTHER, C_LSQRB, C_BACKS, C_RSQRB, C_OTHER, C_OTHER,
63:
64: C_OTHER, C_a, C_b, C_c, C_d, C_e, C_f, C_OTHER,
65: C_OTHER, C_OTHER, C_OTHER, C_OTHER, C_l, C_OTHER, C_n, C_OTHER,
66: C_OTHER, C_OTHER, C_r, C_s, C_t, C_u, C_OTHER, C_OTHER,
67: C_OTHER, C_OTHER, C_OTHER, C_LCURB, C_OTHER, C_RCURB, C_OTHER, C_OTHER
68: };
69:
70: /* only the first 36 ascii characters need an escape */
71: static char *character_escape[36] = {
72: "\\u0000", "\\u0001", "\\u0002", "\\u0003", "\\u0004", "\\u0005", "\\u0006", "\\u0007", /* 0-7 */
73: "\\b" , "\\t", "\\n", "\\u000b", "\\f", "\\r", "\\u000e", "\\u000f", /* 8-f */
74: "\\u0010", "\\u0011", "\\u0012", "\\u0013", "\\u0014", "\\u0015", "\\u0016", "\\u0017", /* 10-17 */
75: "\\u0018", "\\u0019", "\\u001a", "\\u001b", "\\u001c", "\\u001d", "\\u001e", "\\u001f", /* 18-1f */
76: " " , "!" , "\\\"" , "#",
77: };
78:
79: /* define all states and actions that will be taken on each transition.
80: *
81: * states are defined first because of the fact they are use as index in the
82: * transitions table. they usually contains either a number or a prefix _
83: * for simple state like string, object, value ...
84: *
85: * actions are defined starting from 0x80. state error is defined as 0xff
86: */
87:
88: enum states {
89: STATE_GO, /* start */
90: STATE_OK, /* ok */
91: STATE__O, /* object */
92: STATE__K, /* key */
93: STATE_CO, /* colon */
94: STATE__V, /* value */
95: STATE__A, /* array */
96: STATE__S, /* string */
97: STATE_E0, /* escape */
98: STATE_U1, STATE_U2, STATE_U3, STATE_U4, /* unicode states */
99: STATE_M0, STATE_Z0, STATE_I0, /* number states */
100: STATE_R1, STATE_R2, /* real states (after-dot digits) */
101: STATE_X1, STATE_X2, STATE_X3, /* exponant states */
102: STATE_T1, STATE_T2, STATE_T3, /* true constant states */
103: STATE_F1, STATE_F2, STATE_F3, STATE_F4, /* false constant states */
104: STATE_N1, STATE_N2, STATE_N3, /* null constant states */
105: STATE_C1, STATE_C2, STATE_C3, /* C-comment states */
106: STATE_Y1, /* YAML-comment state */
107: STATE_D1, STATE_D2, /* multi unicode states */
108: };
109:
110: /* the following are actions that need to be taken */
111: enum actions {
112: STATE_KS = 0x80, /* key separator */
113: STATE_SP, /* comma separator */
114: STATE_AB, /* array begin */
115: STATE_AE, /* array ending */
116: STATE_OB, /* object begin */
117: STATE_OE, /* object end */
118: STATE_CB, /* C-comment begin */
119: STATE_YB, /* YAML-comment begin */
120: STATE_CE, /* YAML/C comment end */
121: STATE_FA, /* false */
122: STATE_TR, /* true */
123: STATE_NU, /* null */
124: STATE_DE, /* double detected by exponent */
125: STATE_DF, /* double detected by . */
126: STATE_SE, /* string end */
127: STATE_MX, /* integer detected by minus */
128: STATE_ZX, /* integer detected by zero */
129: STATE_IX, /* integer detected by 1-9 */
130: STATE_UC, /* Unicode character read */
131: };
132:
133: /* error state */
134: #define STATE___ 0xff
135:
136: #define NR_STATES (STATE_D2 + 1)
137: #define NR_CLASSES (C_HASH + 1)
138:
139: #define IS_STATE_ACTION(s) ((s) & 0x80)
140: #define S(x) STATE_##x
141: #define PT_(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a1,b1,c1,d1,e1,f1,g1,h1) \
142: { S(a),S(b),S(c),S(d),S(e),S(f),S(g),S(h),S(i),S(j),S(k),S(l),S(m),S(n), \
143: S(o),S(p),S(q),S(r),S(s),S(t),S(u),S(v),S(w),S(x),S(y),S(z),S(a1),S(b1), \
144: S(c1),S(d1),S(e1),S(f1),S(g1),S(h1) }
145:
146: /* map from the (previous state+new character class) to the next parser transition */
147: static const uint8_t state_transition_table[NR_STATES][NR_CLASSES] = {
148: /* white ABCDF other */
149: /* sp nl | { } [ ] : , " \ / + - . 0 19 a b c d e f l n r s t u | E | * # */
150: /*GO*/ PT_(GO,GO,GO,OB,__,AB,__,__,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,YB),
151: /*OK*/ PT_(OK,OK,OK,__,OE,__,AE,__,SP,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,YB),
152: /*_O*/ PT_(_O,_O,_O,__,OE,__,__,__,__,_S,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,YB),
153: /*_K*/ PT_(_K,_K,_K,__,__,__,__,__,__,_S,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,YB),
154: /*CO*/ PT_(CO,CO,CO,__,__,__,__,KS,__,__,__,CB,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,YB),
155: /*_V*/ PT_(_V,_V,_V,OB,__,AB,__,__,__,_S,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__,__,YB),
156: /*_A*/ PT_(_A,_A,_A,OB,__,AB,AE,__,__,_S,__,CB,__,MX,__,ZX,IX,__,__,__,__,__,F1,__,N1,__,__,T1,__,__,__,__,__,YB),
157: /****************************************************************************************************************/
158: /*_S*/ PT_(_S,__,__,_S,_S,_S,_S,_S,_S,SE,E0,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S,_S),
159: /*E0*/ PT_(__,__,__,__,__,__,__,__,__,_S,_S,_S,__,__,__,__,__,__,_S,__,__,__,_S,__,_S,_S,__,_S,U1,__,__,__,__,__),
160: /*U1*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U2,U2,U2,U2,U2,U2,U2,U2,__,__,__,__,__,__,U2,U2,__,__,__),
161: /*U2*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U3,U3,U3,U3,U3,U3,U3,U3,__,__,__,__,__,__,U3,U3,__,__,__),
162: /*U3*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U4,U4,U4,U4,U4,U4,U4,U4,__,__,__,__,__,__,U4,U4,__,__,__),
163: /*U4*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,UC,UC,UC,UC,UC,UC,UC,UC,__,__,__,__,__,__,UC,UC,__,__,__),
164: /****************************************************************************************************************/
165: /*M0*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,Z0,I0,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__),
166: /*Z0*/ PT_(OK,OK,OK,__,OE,__,AE,__,SP,__,__,CB,__,__,DF,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,YB),
167: /*I0*/ PT_(OK,OK,OK,__,OE,__,AE,__,SP,__,__,CB,__,__,DF,I0,I0,__,__,__,__,DE,__,__,__,__,__,__,__,__,DE,__,__,YB),
168: /*R1*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,R2,R2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__),
169: /*R2*/ PT_(OK,OK,OK,__,OE,__,AE,__,SP,__,__,CB,__,__,__,R2,R2,__,__,__,__,X1,__,__,__,__,__,__,__,__,X1,__,__,YB),
170: /*X1*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,X2,X2,__,X3,X3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__),
171: /*X2*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,X3,X3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__),
172: /*X3*/ PT_(OK,OK,OK,__,OE,__,AE,__,SP,__,__,__,__,__,__,X3,X3,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__),
173: /****************************************************************************************************************/
174: /*T1*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T2,__,__,__,__,__,__,__,__),
175: /*T2*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,T3,__,__,__,__,__),
176: /*T3*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,TR,__,__,__,__,__,__,__,__,__,__,__,__),
177: /*F1*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__),
178: /*F2*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F3,__,__,__,__,__,__,__,__,__,__),
179: /*F3*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,F4,__,__,__,__,__,__,__),
180: /*F4*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,FA,__,__,__,__,__,__,__,__,__,__,__,__),
181: /*N1*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N2,__,__,__,__,__),
182: /*N2*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,N3,__,__,__,__,__,__,__,__,__,__),
183: /*N3*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,NU,__,__,__,__,__,__,__,__,__,__),
184: /****************************************************************************************************************/
185: /*C1*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,C2,__),
186: /*C2*/ PT_(C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3,C2),
187: /*C3*/ PT_(C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,CE,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C2,C3,C2),
188: /*Y1*/ PT_(Y1,CE,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1,Y1),
189: /*D1*/ PT_(__,__,__,__,__,__,__,__,__,__,D2,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__),
190: /*D2*/ PT_(__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,__,U1,__,__,__,__,__),
191: };
192: #undef S
193: #undef PT_
194:
195: /* map from (previous state+new character class) to the buffer policy. ignore=0/append=1/escape=2 */
196: static const uint8_t buffer_policy_table[NR_STATES][NR_CLASSES] = {
197: /* white ABCDF other */
198: /* sp nl | { } [ ] : , " \ / + - . 0 19 a b c d e f l n r s t u | E | * # */
199: /*GO*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
200: /*OK*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
201: /*_O*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
202: /*_K*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
203: /*CO*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
204: /*_V*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
205: /*_A*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
206: /**************************************************************************************************************/
207: /*_S*/ { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
208: /*E0*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 2, 0, 2, 2, 0, 2, 0, 0, 0, 0, 0, 0 },
209: /*U1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
210: /*U2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
211: /*U3*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
212: /*U4*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 },
213: /**************************************************************************************************************/
214: /*M0*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
215: /*Z0*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
216: /*I0*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 },
217: /*R1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
218: /*R2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0 },
219: /*X1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
220: /*X2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
221: /*X3*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
222: /**************************************************************************************************************/
223: /*T1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
224: /*T2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
225: /*T3*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
226: /*F1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
227: /*F2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
228: /*F3*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
229: /*F4*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
230: /*N1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
231: /*N2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
232: /*N3*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
233: /**************************************************************************************************************/
234: /*C1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
235: /*C2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
236: /*C3*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
237: /*Y1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
238: /*D1*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
239: /*D2*/ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
240: };
241:
242: #define MODE_ARRAY 0
243: #define MODE_OBJECT 1
244:
245: #define parser_malloc(parser, s) parser->config.user_malloc(s)
246: #define parser_realloc(parser, p, s) parser->config.user_realloc(p, s)
247: #define parser_free(parser, p) parser->config.user_free(p)
248:
249: static int state_grow(json_parser *parser)
250: {
251: uint32_t newsize = parser->stack_size * 2;
252: void *ptr;
253:
254: if (parser->config.max_nesting != 0)
255: return JSON_ERROR_NESTING_LIMIT;
256:
257: ptr = parser_realloc(parser, parser->stack, newsize * sizeof(uint8_t));
258: if (!ptr)
259: return JSON_ERROR_NO_MEMORY;
1.7 moko 260: parser->stack = ptr;
1.1 moko 261: parser->stack_size = newsize;
262: return 0;
263: }
264:
1.2 moko 265: static int state_push(json_parser *parser, uint8_t mode)
1.1 moko 266: {
267: if (parser->stack_offset >= parser->stack_size) {
268: int ret = state_grow(parser);
269: if (ret)
270: return ret;
271: }
272: parser->stack[parser->stack_offset++] = mode;
273: return 0;
274: }
275:
1.2 moko 276: static int state_pop(json_parser *parser, uint8_t mode)
1.1 moko 277: {
278: if (parser->stack_offset == 0)
279: return JSON_ERROR_POP_EMPTY;
280: parser->stack_offset--;
281: if (parser->stack[parser->stack_offset] != mode)
282: return JSON_ERROR_POP_UNEXPECTED_MODE;
283: return 0;
284: }
285:
286: static int buffer_grow(json_parser *parser)
287: {
288: uint32_t newsize;
289: void *ptr;
1.2 moko 290: uint32_t max = parser->config.max_data;
1.1 moko 291:
292: if (max > 0 && parser->buffer_size == max)
293: return JSON_ERROR_DATA_LIMIT;
294: newsize = parser->buffer_size * 2;
295: if (max > 0 && newsize > max)
296: newsize = max;
297:
298: ptr = parser_realloc(parser, parser->buffer, newsize * sizeof(char));
299: if (!ptr)
300: return JSON_ERROR_NO_MEMORY;
1.7 moko 301: parser->buffer = ptr;
1.1 moko 302: parser->buffer_size = newsize;
303: return 0;
304: }
305:
306: static int buffer_push(json_parser *parser, unsigned char c)
307: {
308: int ret;
309:
310: if (parser->buffer_offset + 1 >= parser->buffer_size) {
311: ret = buffer_grow(parser);
312: if (ret)
313: return ret;
314: }
315: parser->buffer[parser->buffer_offset++] = c;
316: return 0;
317: }
318:
319: static int do_callback_withbuf(json_parser *parser, int type)
320: {
321: if (!parser->callback)
322: return 0;
323: parser->buffer[parser->buffer_offset] = '\0';
324: return (*parser->callback)(parser->userdata, type, parser->buffer, parser->buffer_offset);
325: }
326:
327: static int do_callback(json_parser *parser, int type)
328: {
329: if (!parser->callback)
330: return 0;
331: return (*parser->callback)(parser->userdata, type, NULL, 0);
332: }
333:
334: static int do_buffer(json_parser *parser)
335: {
336: int ret = 0;
337:
338: switch (parser->type) {
339: case JSON_KEY: case JSON_STRING:
340: case JSON_FLOAT: case JSON_INT:
341: case JSON_NULL: case JSON_TRUE: case JSON_FALSE:
342: ret = do_callback_withbuf(parser, parser->type);
343: if (ret)
344: return ret;
345: break;
346: default:
347: break;
348: }
349: parser->buffer_offset = 0;
350: return ret;
351: }
352:
353: static const uint8_t hextable[] = {
1.2 moko 354: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
355: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
356: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
357: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,255,255,255,255,255,255,
358: 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255,
359: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
360: 255, 10, 11, 12, 13, 14, 15,255,255,255,255,255,255,255,255,255,
361: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
1.1 moko 362: };
363:
364: #define hex(c) (hextable[(uint8_t) c])
365:
366: /* high surrogate range from d800 to dbff */
367: /* low surrogate range dc00 to dfff */
368: #define IS_HIGH_SURROGATE(uc) (((uc) & 0xfc00) == 0xd800)
369: #define IS_LOW_SURROGATE(uc) (((uc) & 0xfc00) == 0xdc00)
370:
371: /* transform an unicode [0-9A-Fa-f]{4} sequence into a proper value */
372: static int decode_unicode_char(json_parser *parser)
373: {
374: uint32_t uval;
375: char *b = parser->buffer;
1.3 moko 376: uint32_t offset = parser->buffer_offset;
1.1 moko 377:
378: uval = (hex(b[offset - 4]) << 12) | (hex(b[offset - 3]) << 8)
379: | (hex(b[offset - 2]) << 4) | hex(b[offset - 1]);
380:
381: parser->buffer_offset -= 4;
382:
383: /* fast case */
384: if (!parser->unicode_multi && uval < 0x80) {
385: b[parser->buffer_offset++] = (char) uval;
386: return 0;
387: }
388:
389: if (parser->unicode_multi) {
390: if (!IS_LOW_SURROGATE(uval))
391: return JSON_ERROR_UNICODE_MISSING_LOW_SURROGATE;
392:
393: uval = 0x10000 + ((parser->unicode_multi & 0x3ff) << 10) + (uval & 0x3ff);
394: b[parser->buffer_offset++] = (char) ((uval >> 18) | 0xf0);
395: b[parser->buffer_offset++] = (char) (((uval >> 12) & 0x3f) | 0x80);
396: b[parser->buffer_offset++] = (char) (((uval >> 6) & 0x3f) | 0x80);
397: b[parser->buffer_offset++] = (char) ((uval & 0x3f) | 0x80);
398: parser->unicode_multi = 0;
399: return 0;
400: }
401:
402: if (IS_LOW_SURROGATE(uval))
403: return JSON_ERROR_UNICODE_UNEXPECTED_LOW_SURROGATE;
404: if (IS_HIGH_SURROGATE(uval)) {
1.3 moko 405: parser->unicode_multi = (uint16_t)uval;
1.1 moko 406: return 0;
407: }
408:
409: if (uval < 0x800) {
410: b[parser->buffer_offset++] = (char) ((uval >> 6) | 0xc0);
411: b[parser->buffer_offset++] = (char) ((uval & 0x3f) | 0x80);
412: } else {
413: b[parser->buffer_offset++] = (char) ((uval >> 12) | 0xe0);
414: b[parser->buffer_offset++] = (char) (((uval >> 6) & 0x3f) | 0x80);
415: b[parser->buffer_offset++] = (char) (((uval >> 0) & 0x3f) | 0x80);
416: }
417: return 0;
418: }
419:
420: static int buffer_push_escape(json_parser *parser, unsigned char next)
421: {
422: char c = '\0';
423:
424: switch (next) {
425: case 'b': c = '\b'; break;
426: case 'f': c = '\f'; break;
427: case 'n': c = '\n'; break;
428: case 'r': c = '\r'; break;
429: case 't': c = '\t'; break;
430: case '"': c = '"'; break;
431: case '/': c = '/'; break;
432: case '\\': c = '\\'; break;
433: }
434: /* push the escaped character */
435: return buffer_push(parser, c);
436: }
437:
1.5 moko 438: #define CHK(f) { ret = f; if (ret) return ret; }
1.1 moko 439:
1.8 moko 440: static int act_uc(json_parser *parser)
1.1 moko 441: {
442: int ret;
443: CHK(decode_unicode_char(parser));
1.10 ! moko 444: parser->state = (uint8_t)((parser->unicode_multi) ? STATE_D1 : STATE__S);
1.1 moko 445: return 0;
446: }
447:
1.8 moko 448: static int act_yb(json_parser *parser)
1.1 moko 449: {
450: if (!parser->config.allow_yaml_comments)
451: return JSON_ERROR_COMMENT_NOT_ALLOWED;
452: parser->save_state = parser->state;
453: return 0;
454: }
455:
1.8 moko 456: static int act_cb(json_parser *parser)
1.1 moko 457: {
458: if (!parser->config.allow_c_comments)
459: return JSON_ERROR_COMMENT_NOT_ALLOWED;
460: parser->save_state = parser->state;
461: return 0;
462: }
463:
1.8 moko 464: static int act_ce(json_parser *parser)
1.1 moko 465: {
1.10 ! moko 466: parser->state = (uint8_t)((parser->save_state > STATE__A) ? STATE_OK : parser->save_state);
1.1 moko 467: return 0;
468: }
469:
1.8 moko 470: static int act_ob(json_parser *parser)
1.1 moko 471: {
472: int ret;
473: CHK(do_callback(parser, JSON_OBJECT_BEGIN));
474: CHK(state_push(parser, MODE_OBJECT));
475: parser->expecting_key = 1;
476: return 0;
477: }
478:
1.8 moko 479: static int act_oe(json_parser *parser)
1.1 moko 480: {
481: int ret;
1.8 moko 482: CHK(state_pop(parser, MODE_OBJECT));
1.1 moko 483: CHK(do_callback(parser, JSON_OBJECT_END));
484: parser->expecting_key = 0;
485: return 0;
486: }
487:
1.8 moko 488: static int act_ab(json_parser *parser)
1.1 moko 489: {
490: int ret;
491: CHK(do_callback(parser, JSON_ARRAY_BEGIN));
492: CHK(state_push(parser, MODE_ARRAY));
493: return 0;
494: }
1.8 moko 495: static int act_ae(json_parser *parser)
1.1 moko 496: {
497: int ret;
1.8 moko 498: CHK(state_pop(parser, MODE_ARRAY));
1.1 moko 499: CHK(do_callback(parser, JSON_ARRAY_END));
500: return 0;
501: }
502:
1.8 moko 503: static int act_se(json_parser *parser)
1.1 moko 504: {
505: int ret;
506: CHK(do_callback_withbuf(parser, (parser->expecting_key) ? JSON_KEY : JSON_STRING));
507: parser->buffer_offset = 0;
1.10 ! moko 508: parser->state = (uint8_t)((parser->expecting_key) ? STATE_CO : STATE_OK);
1.1 moko 509: parser->expecting_key = 0;
510: return 0;
511: }
512:
1.8 moko 513: static int act_sp(json_parser *parser)
1.1 moko 514: {
515: if (parser->stack_offset == 0)
516: return JSON_ERROR_COMMA_OUT_OF_STRUCTURE;
517: if (parser->stack[parser->stack_offset - 1] == MODE_OBJECT) {
518: parser->expecting_key = 1;
519: parser->state = STATE__K;
520: } else
521: parser->state = STATE__V;
522: return 0;
523: }
524:
525: struct action_descr
526: {
527: int (*call)(json_parser *parser);
528: uint8_t type;
529: uint8_t state; /* 0 if we let the callback set the value it want */
530: uint8_t dobuffer;
531: };
532:
533: static struct action_descr actions_map[] = {
1.8 moko 534: { NULL, JSON_NONE, STATE__V, 0 }, /* KS */
535: { act_sp, JSON_NONE, 0, 1 }, /* SP */
536: { act_ab, JSON_NONE, STATE__A, 0 }, /* AB */
537: { act_ae, JSON_NONE, STATE_OK, 1 }, /* AE */
538: { act_ob, JSON_NONE, STATE__O, 0 }, /* OB */
539: { act_oe, JSON_NONE, STATE_OK, 1 }, /* OE */
540: { act_cb, JSON_NONE, STATE_C1, 1 }, /* CB */
541: { act_yb, JSON_NONE, STATE_Y1, 1 }, /* YB */
542: { act_ce, JSON_NONE, 0, 0 }, /* CE */
543: { NULL, JSON_FALSE, STATE_OK, 0 }, /* FA */
544: { NULL, JSON_TRUE, STATE_OK, 0 }, /* TR */
545: { NULL, JSON_NULL, STATE_OK, 0 }, /* NU */
546: { NULL, JSON_FLOAT, STATE_X1, 0 }, /* DE */
547: { NULL, JSON_FLOAT, STATE_R1, 0 }, /* DF */
548: { act_se, JSON_NONE, 0, 0 }, /* SE */
549: { NULL, JSON_INT, STATE_M0, 0 }, /* MX */
550: { NULL, JSON_INT, STATE_Z0, 0 }, /* ZX */
551: { NULL, JSON_INT, STATE_I0, 0 }, /* IX */
552: { act_uc, JSON_NONE, 0, 0 }, /* UC */
1.1 moko 553: };
554:
1.4 moko 555: static int do_action(json_parser *parser, uint8_t next_state)
1.1 moko 556: {
557: struct action_descr *descr = &actions_map[next_state & ~0x80];
558:
559: if (descr->call) {
560: int ret;
561: if (descr->dobuffer)
562: CHK(do_buffer(parser));
563: CHK((descr->call)(parser));
564: }
565: if (descr->state)
566: parser->state = descr->state;
567: parser->type = descr->type;
568: return 0;
569: }
570:
571: /** json_parser_init initialize a parser structure taking a config,
572: * a config and its userdata.
573: * return JSON_ERROR_NO_MEMORY if memory allocation failed or SUCCESS.
574: */
575: int json_parser_init(json_parser *parser, json_config *config,
576: json_parser_callback callback, void *userdata)
577: {
578: memset(parser, 0, sizeof(*parser));
579:
580: if (config)
581: memcpy(&parser->config, config, sizeof(json_config));
582: parser->callback = callback;
583: parser->userdata = userdata;
584:
585: /* initialise parsing stack and state */
586: parser->stack_offset = 0;
587: parser->state = STATE_GO;
588:
589: /* initialize the parse stack */
590: parser->stack_size = (parser->config.max_nesting > 0)
591: ? parser->config.max_nesting
592: : LIBJSON_DEFAULT_STACK_SIZE;
593:
594: parser->stack = parser_malloc(parser, parser->stack_size * sizeof(parser->stack[0]));
595: if (!parser->stack)
596: return JSON_ERROR_NO_MEMORY;
597:
598: /* initialize the parse buffer */
599: parser->buffer_size = (parser->config.buffer_initial_size > 0)
600: ? parser->config.buffer_initial_size
601: : LIBJSON_DEFAULT_BUFFER_SIZE;
602:
603: if (parser->config.max_data > 0 && parser->buffer_size > parser->config.max_data)
604: parser->buffer_size = parser->config.max_data;
605:
606: parser->buffer = parser_malloc(parser, parser->buffer_size * sizeof(char));
607: if (!parser->buffer) {
608: parser_free(parser, parser->stack);
609: return JSON_ERROR_NO_MEMORY;
610: }
611: return 0;
612: }
613:
614: /** json_parser_free freed memory structure allocated by the parser */
615: int json_parser_free(json_parser *parser)
616: {
617: if (!parser)
618: return 0;
619: parser_free(parser, parser->stack);
620: parser_free(parser, parser->buffer);
621: parser->stack = NULL;
622: parser->buffer = NULL;
623: return 0;
624: }
625:
626: /** json_parser_is_done return 0 is the parser isn't in a finish state. !0 if it is */
627: int json_parser_is_done(json_parser *parser)
628: {
629: /* need to compare the state to !GO to not accept empty document */
630: return parser->stack_offset == 0 && parser->state != STATE_GO;
631: }
632:
633: /** json_parser_string append a string s with a specific length to the parser
634: * return 0 if everything went ok, a JSON_ERROR_* otherwise.
635: * the user can supplied a valid processed pointer that will
636: * be fill with the number of processed characters before returning */
637: int json_parser_string(json_parser *parser, const char *s,
638: uint32_t length, uint32_t *processed)
639: {
640: int ret;
1.4 moko 641: uint8_t next_class, next_state;
1.3 moko 642: uint32_t buffer_policy;
1.1 moko 643: uint32_t i;
644:
645: ret = 0;
646: for (i = 0; i < length; i++) {
647: unsigned char ch = s[i];
648:
649: ret = 0;
1.10 ! moko 650: next_class = (uint8_t)((ch >= 128) ? C_OTHER : character_class[ch]);
1.1 moko 651: if (next_class == C_ERROR) {
652: ret = JSON_ERROR_BAD_CHAR;
653: break;
654: }
655:
656: next_state = state_transition_table[parser->state][next_class];
657: buffer_policy = buffer_policy_table[parser->state][next_class];
658: if (next_state == STATE___) {
659: ret = JSON_ERROR_UNEXPECTED_CHAR;
660: break;
661: }
662:
663: /* add char to buffer */
664: if (buffer_policy) {
665: ret = (buffer_policy == 2)
666: ? buffer_push_escape(parser, ch)
667: : buffer_push(parser, ch);
668: if (ret)
669: break;
670: }
671:
672: /* move to the next level */
673: if (IS_STATE_ACTION(next_state))
674: ret = do_action(parser, next_state);
675: else
676: parser->state = next_state;
677: if (ret)
678: break;
679: }
680: if (processed)
681: *processed = i;
682: return ret;
683: }
684:
685: /** json_parser_char append one single char to the parser
686: * return 0 if everything went ok, a JSON_ERROR_* otherwise */
687: int json_parser_char(json_parser *parser, unsigned char ch)
688: {
689: return json_parser_string(parser, (char *) &ch, 1, NULL);
690: }
E-mail: