--- parser3/src/classes/json.C 2015/03/17 07:28:43 1.34 +++ parser3/src/classes/json.C 2024/09/13 04:01:22 1.61 @@ -1,7 +1,8 @@ /** @file Parser: @b json parser class. - Copyright (c) 2000-2012 Art. Lebedev Studio (http://www.artlebedev.com) + Copyright (c) 2000-2023 Art. Lebedev Studio (http://www.artlebedev.com) + Authors: Konstantin Morshnev */ #include "classes.h" @@ -9,6 +10,7 @@ #include "pa_request.h" #include "pa_vbool.h" +#include "pa_varray.h" #include "pa_charset.h" #include "pa_charsets.h" @@ -18,7 +20,7 @@ #include "pa_vxdoc.h" #endif -volatile const char * IDENT_JSON_C="$Id: json.C,v 1.34 2015/03/17 07:28:43 misha Exp $"; +volatile const char * IDENT_JSON_C="$Id: json.C,v 1.61 2024/09/13 04:01:22 moko Exp $"; // class @@ -29,11 +31,11 @@ public: // global variable -DECLARE_CLASS_VAR(json, new MJson, 0); +DECLARE_CLASS_VAR(json, new MJson); // methods -struct Json { - Stack stack; +struct Json : public PA_Allocated { + Stack stack; Stack key_stack; String* key; @@ -48,11 +50,12 @@ struct Json { bool handle_double; bool handle_int; + bool handle_array; enum Distinct { D_EXCEPTION, D_FIRST, D_LAST, D_ALL } distinct; Json(Charset* acharset): stack(), key_stack(), key(NULL), result(NULL), hook_object(NULL), hook_array(NULL), request(NULL), charset(acharset), taint(String::L_TAINTED), handle_double(true), handle_int(true), - distinct(D_EXCEPTION){} + handle_array(true), distinct(D_EXCEPTION){} bool set_distinct(const String &value){ if (value == "first") distinct = D_FIRST; @@ -61,12 +64,19 @@ struct Json { else return false; return true; } + + bool set_handle_array(const String &value){ + if (value == "array") handle_array = true; + else if (value == "hash") handle_array = false; + else return false; + return true; + } }; static void set_json_value(Json *json, Value *value){ - VHash *top = json->stack.top_value(); + VHashBase *top = json->stack.top_value(); if(json->key == NULL){ - top->hash().put(String(format(top->get_hash()->count(), 0)), value); + top->add(value); } else { switch (json->distinct){ case Json::D_EXCEPTION: @@ -95,19 +105,18 @@ static void set_json_value(Json *json, V String* json_string(Json *json, const char *value, uint32_t length){ String::C result = json->charset !=NULL ? - Charset::transcode(String::C(value, length), UTF8_charset, *json->charset) : + Charset::transcode(String::C(value, length), pa_UTF8_charset, *json->charset) : String::C(pa_strdup(value, length), length); - return new String(result.str, json->taint, result.length); + return new String(result, json->taint); } static Value *json_hook(Request &r, Junction *hook, String* key, Value* value){ - VMethodFrame frame(*hook->method, r.method_frame, hook->self); Value *params[]={new VString(key ? *key : String::Empty), value}; - - frame.store_params(params, 2); - r.execute_method(frame); - - return &frame.result().as_value(); + METHOD_FRAME_ACTION(*hook->method, r.method_frame, hook->self, { + frame.store_params(params, 2); + r.call(frame); + return &frame.result(); + }); } static int json_callback(Json *json, int type, const char *value, uint32_t length) @@ -139,14 +148,15 @@ static int json_callback(Json *json, int break; } case JSON_ARRAY_BEGIN:{ - VHash *v = new VHash(); if (json->hook_array){ json->key_stack.push(json->key); json->key=NULL; + json->stack.push(new VHash); } else { + VHashBase *v = json->handle_array ? (VHashBase *)new VArray : (VHashBase *)new VHash; if (json->stack.count()) set_json_value(json, v); + json->stack.push(v); } - json->stack.push(v); break; } case JSON_ARRAY_END: @@ -205,7 +215,7 @@ static const char* json_error_message(in "nesting limit", "data limit", "comment not allowed by config", - "unexpected char", + "unexpected character", "missing unicode low surrogate", "unexpected unicode low surrogate", "error comma out of structure", @@ -216,6 +226,67 @@ static const char* json_error_message(in extern String::Language get_untaint_lang(const String& lang_name); +#define SOURCE_MAX_LEN 60 + +void json_exception_with_source(Request& r, const char* msg, const char* json, int offset){ + int i; + + int line=0; + int start=0; + int end=strlen(json); + + if(offset>end) + offset=end; + + for(i = 0; i < offset; i++){ + if(json[i]=='\n'){ + line++; + } + } + + if(offset > SOURCE_MAX_LEN/2) + start = offset - SOURCE_MAX_LEN/2; + + for(i = offset-1; i>=start; i--){ + if(json[i]=='\n'){ + start=i+1; + break; + } + } + + if(start+SOURCE_MAX_LEN < end) + end=start+SOURCE_MAX_LEN; + + for(i = offset+1; i0){ + String s_source(pa_strdup(source,source_offset)); + source_offset=s_source.length(r.charsets.source()); + } + } + + throw Exception("json.parse", 0, "%s at line %d\n%s\n%*s", msg, line+1, source, source_offset+1, "^"); +} + static void _parse(Request& r, MethodParams& params) { const String& json_string=params.as_string(0, "json must be string"); @@ -236,15 +307,15 @@ static void _parse(Request& r, MethodPar if(HashStringValue* options=params.as_hash(1)) { int valid_options=0; if(Value* value=options->get("depth")) { - config.max_nesting=r.process_to_value(*value).as_int(); + config.max_nesting=r.process(*value).as_int(); valid_options++; } if(Value* value=options->get("double")) { - json.handle_double=r.process_to_value(*value).as_bool(); + json.handle_double=r.process(*value).as_bool(); valid_options++; } if(Value* value=options->get("int")) { - json.handle_int=r.process_to_value(*value).as_bool(); + json.handle_int=r.process(*value).as_bool(); valid_options++; } if(Value* value=options->get("distinct")) { @@ -260,15 +331,21 @@ static void _parse(Request& r, MethodPar if(Value* value=options->get("object")) { json.hook_object=value->get_junction(); json.request=&r; - if (!json.hook_object || !json.hook_object->method || !json.hook_object->method->params_names || !(json.hook_object->method->params_names->count() == 2)) + if (!json.hook_object || !json.hook_object->method || !json.hook_object->method->params_names || !(json.hook_object->method->params_count == 2)) throw Exception(PARSER_RUNTIME, 0, "$.object must be parser method with 2 parameters"); valid_options++; } if(Value* value=options->get("array")) { - json.hook_array=value->get_junction(); - json.request=&r; - if (!json.hook_array || !json.hook_array->method || !json.hook_array->method->params_names || !(json.hook_array->method->params_names->count() == 2)) - throw Exception(PARSER_RUNTIME, 0, "$.array must be parser method with 2 parameters"); + if(value->get_string()){ + const String& sarray=value->as_string(); + if (!json.set_handle_array(sarray)) + throw Exception(PARSER_RUNTIME, &sarray, "$.array must be parser method with 2 parameters or 'array' or 'hash'"); + } else { + json.hook_array=value->get_junction(); + json.request=&r; + if (!json.hook_array || !json.hook_array->method || !json.hook_array->method->params_names || !(json.hook_array->method->params_count == 2)) + throw Exception(PARSER_RUNTIME, 0, "$.array must be parser method with 2 parameters or 'array' or 'hash'"); + } valid_options++; } if(valid_options!=options->count()) @@ -276,22 +353,29 @@ static void _parse(Request& r, MethodPar } const String::Body json_body = json_string.cstr_to_string_body_untaint(String::L_JSON, r.connection(false), &r.charsets); - const char *json_cstr = json.charset != NULL ? Charset::transcode(json_body, *json.charset, UTF8_charset).cstr() : json_body.cstr(); + const char *json_cstr = json.charset != NULL ? Charset::transcode(json_body, *json.charset, pa_UTF8_charset).cstr() : json_body.cstr(); json_parser parser; if(int result = json_parser_init(&parser, &config, (json_parser_callback)&json_callback, &json)) throw Exception("json.parse", 0, "%s", json_error_message(result)); + if(!*json_cstr) + throw Exception("json.parse", 0, "empty string is not valid json"); + + const char *first_quote=strchr(json_cstr,'"'); + if(first_quote && first_quote>json_cstr && *(--first_quote) == '\\') + json_exception_with_source(r, "illegal quote escape, json may be tainted", json_cstr, first_quote-json_cstr); + uint32_t processed; if(int result = json_parser_string(&parser, json_cstr, strlen(json_cstr), &processed)) - throw Exception("json.parse", 0, "%s at byte %d", json_error_message(result), processed); + json_exception_with_source(r, json_error_message(result), json_cstr, processed); if (!json_parser_is_done(&parser)) - throw Exception("json.parse", 0, "unexpected end of json data"); - + json_exception_with_source(r, "unexpected end of json data", json_cstr, processed); + json_parser_free(&parser); - if (json.result) r.write_no_lang(*json.result); + if (json.result) r.write(*json.result); } const uint ANTI_ENDLESS_JSON_STRING_RECOURSION=128; @@ -299,7 +383,7 @@ const uint ANTI_ENDLESS_JSON_STRING_RECO char *get_indent(uint level){ static char* cache[ANTI_ENDLESS_JSON_STRING_RECOURSION]={}; if (!cache[level]){ - char *result = static_cast(pa_gc_malloc_atomic(level+1)); + char *result = static_cast(pa_malloc_atomic(level+1)); memset(result, '\t', level); result[level]='\0'; return cache[level]=result; @@ -307,6 +391,35 @@ char *get_indent(uint level){ return cache[level]; } +String *get_delim(uint level){ + static String* cache[ANTI_ENDLESS_JSON_STRING_RECOURSION]={}; + + if (!cache[level]){ + char *result = static_cast(pa_malloc_atomic(level+2+1+1)); + result[0]=','; + result[1]='\n'; + memset(result+2, '\t', level); + result[level+2]='"'; + result[level+3]='\0'; + return cache[level] = new String(result, String::L_AS_IS); + } + return cache[level]; +} + +String *get_array_delim(uint level){ + static String* cache[ANTI_ENDLESS_JSON_STRING_RECOURSION]={}; + + if (!cache[level]){ + char *result = static_cast(pa_malloc_atomic(level+2+1)); + result[0]=','; + result[1]='\n'; + memset(result+2, '\t', level); + result[level+2]='\0'; + return cache[level] = new String(result, String::L_AS_IS); + } + return cache[level]; +} + class Json_string_recoursion { Json_options& foptions; public: @@ -322,8 +435,8 @@ public: const String& value_json_string(String::Body key, Value& v, Json_options& options); -const String* Json_options::hash_json_string(HashStringValue &hash) { - if(!hash.count()) +const String* Json_options::hash_json_string(HashStringValue *hash) { + if(!hash || !hash->count()) return new String("{}", String::L_AS_IS); Json_string_recoursion go_down(*this); @@ -334,12 +447,12 @@ const String* Json_options::hash_json_st String *delim=NULL; indent=get_indent(json_string_recoursion); - for(HashStringValue::Iterator i(hash); i; i.next() ){ + for(HashStringValue::Iterator i(*hash); i; i.next() ){ if (delim){ result << *delim; } else { result << indent << "\""; - delim = new String(",\n", String::L_AS_IS); *delim << indent << "\""; + delim = get_delim(json_string_recoursion); } result << String(i.key(), String::L_JSON) << "\":" << value_json_string(i.key(), *i.value(), *this); } @@ -348,7 +461,7 @@ const String* Json_options::hash_json_st } else { bool need_delim=false; - for(HashStringValue::Iterator i(hash); i; i.next() ){ + for(HashStringValue::Iterator i(*hash); i; i.next() ){ result << (need_delim ? ",\n\"" : "\""); result << String(i.key(), String::L_JSON) << "\":" << value_json_string(i.key(), *i.value(), *this); need_delim=true; @@ -360,6 +473,86 @@ const String* Json_options::hash_json_st return &result; } +const String* Json_options::array_json_string(ArrayValue *array) { + if(!array || !array->count()) + return new String("[]", String::L_AS_IS); + + Json_string_recoursion go_down(*this); + + String& result = *new String("[\n", String::L_AS_IS); + + if (indent){ + + String *delim=NULL; + indent=get_indent(json_string_recoursion); + for(ArrayValue::Iterator i(*array); i; i.next() ){ + if (delim){ + result << *delim; + } else { + result << indent; + delim = get_array_delim(json_string_recoursion); + } + result << value_json_string(i.key(), i.value() ? *i.value() : *VVoid::get(), *this); + } + result << "\n" << (indent=get_indent(json_string_recoursion-1)) << "]"; + + } else { + + bool need_delim=false; + for(ArrayValue::Iterator i(*array); i; i.next() ){ + if(need_delim) result << ",\n"; + result << value_json_string(i.key(), i.value() ? *i.value() : *VVoid::get(), *this); + need_delim=true; + } + result << "\n]"; + + } + + return &result; +} + +const String* Json_options::array_compact_json_string(ArrayValue *array) { + if(!array || !array->count()) + return new String("[]", String::L_AS_IS); + + Json_string_recoursion go_down(*this); + + String& result = *new String("[\n", String::L_AS_IS); + + if (indent){ + + String *delim=NULL; + indent=get_indent(json_string_recoursion); + for(ArrayValue::Iterator i(*array); i; i.next() ){ + if (i.value()){ + if (delim){ + result << *delim; + } else { + result << indent; + delim = get_array_delim(json_string_recoursion); + } + result << value_json_string(i.key(), *i.value(), *this); + } + } + result << "\n" << (indent=get_indent(json_string_recoursion-1)) << "]"; + + } else { + + bool need_delim=false; + for(ArrayValue::Iterator i(*array); i; i.next() ){ + if (i.value()){ + if(need_delim) result << ",\n"; + result << value_json_string(i.key(), *i.value(), *this); + need_delim=true; + } + } + result << "\n]"; + + } + + return &result; +} + static bool based_on(HashStringValue::key_type key, HashStringValue::value_type /*value*/, Value* v) { return v->is(key.cstr()); } @@ -373,17 +566,16 @@ const String& value_json_string(String:: } if(method && !method->is_void()) { Junction* junction=method->get_junction(); - VMethodFrame frame(*junction->method, options.r->method_frame, junction->self); - HashStringValue* params_hash=options.params && options.indent ? options.params->get_hash() : NULL; Temp_hash_value indent(params_hash, "indent", new VString(*new String(options.indent, String::L_AS_IS))); Value *params[]={new VString(*new String(key, String::L_JSON)), &v, options.params ? options.params : VVoid::get()}; - frame.store_params(params, 3); - - options.r->execute_method(frame); - return frame.result().as_string(); + METHOD_FRAME_ACTION(*junction->method, options.r->method_frame, junction->self, { + frame.store_params(params, 3); + options.r->call(frame); + return frame.result().as_string(); + }); } } @@ -396,7 +588,7 @@ static void _string(Request& r, MethodPa if(params.count() == 2) if(HashStringValue* options=params.as_hash(1)) { - json.params=params.get(1); + json.params=¶ms[1]; HashStringValue* methods=new HashStringValue(); int valid_options=0; HashStringValue* vvalue; @@ -404,24 +596,32 @@ static void _string(Request& r, MethodPa String::Body key=i.key(); Value* value=i.value(); if(key == "skip-unknown"){ - json.skip_unknown=r.process_to_value(*value).as_bool(); + json.skip_unknown=r.process(*value).as_bool(); + valid_options++; + } else if(key == "one-line"){ + json.one_line=r.process(*value).as_bool(); valid_options++; } else if(key == "date" && value->is_string()){ const String& svalue=value->as_string(); if(!json.set_date_format(svalue)) - throw Exception(PARSER_RUNTIME, &svalue, "must be 'sql-string', 'gmt-string' or 'unix-timestamp'"); + throw Exception(PARSER_RUNTIME, &svalue, "must be 'sql-string', 'gmt-string', 'iso-string' or 'unix-timestamp'"); valid_options++; } else if(key == "indent"){ if(value->is_string()){ json.indent=value->as_string().cstr(); json.json_string_recoursion=strlen(json.indent); - } else json.indent=r.process_to_value(*value).as_bool() ? "" : NULL; + } else json.indent=r.process(*value).as_bool() ? "" : NULL; valid_options++; } else if(key == "table" && value->is_string()){ const String& svalue=value->as_string(); if(!json.set_table_format(svalue)) throw Exception(PARSER_RUNTIME, &svalue, "must be 'array', 'object' or 'compact'"); valid_options++; + } else if(key == "array" && value->is_string()){ + const String& svalue=value->as_string(); + if(!json.set_array_format(svalue)) + throw Exception(PARSER_RUNTIME, &svalue, "must be 'array', 'object' or 'compact'"); + valid_options++; } else if(key == "file" && value->is_string()){ const String& svalue=value->as_string(); if(!json.set_file_format(svalue)) @@ -439,7 +639,7 @@ static void _string(Request& r, MethodPa valid_options++; #endif } else if(Junction* junction=value->get_junction()){ - if(!junction->method || !junction->method->params_names || junction->method->params_names->count() != 3) + if(!junction->method || !junction->method->params_names || junction->method->params_count != 3) throw Exception(PARSER_RUNTIME, 0, "$.%s must be parser method with 3 parameters", key.cstr()); methods->put(key, value); valid_options++; @@ -450,12 +650,12 @@ static void _string(Request& r, MethodPa throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); // special handling for $._default - if(VHash* vhash=static_cast(params[1].as(VHASH_TYPE))) + if(VHashBase* vhash=dynamic_cast(¶ms[1])) if(Value* value=vhash->get_default()) { if(!value->is_string()){ - Junction* junction=value->get_junction(); - if(!junction || !junction->method || !junction->method->params_names || junction->method->params_names->count() != 3) - throw Exception(PARSER_RUNTIME, 0, "$.%s must be string or parser method with 3 parameters", HASH_DEFAULT_ELEMENT_NAME); + Junction* junction=value->get_junction(); + if(!junction || !junction->method || !junction->method->params_names || junction->method->params_count != 3) + throw Exception(PARSER_RUNTIME, 0, "$._default must be string or parser method with 3 parameters"); } json.default_method=value; } @@ -464,15 +664,22 @@ static void _string(Request& r, MethodPa json.methods=methods; } - const String& result_string=value_json_string(String::Body(), r.process_to_value(params[0]), json); + const String& result_string=value_json_string(String::Body(), r.process(params[0]), json); String::Body result_body=result_string.cstr_to_string_body_untaint(String::L_JSON, r.connection(false), &r.charsets); - r.write_pass_lang(*new String(result_body, String::L_AS_IS)); - } + if(json.one_line){ + char *result=result_body.cstrm(); + for(char *c=result;*c;c++) + if(*c=='\n') + *c=' '; + result_body=result; + } + r.write(*new String(result_body, String::L_AS_IS)); +} // constructor MJson::MJson(): Methoded("json") { add_native_method("parse", Method::CT_STATIC, _parse, 1, 2); - add_native_method("string", Method::CT_ANY, _string, 1, 2); + add_native_method("string", Method::CT_STATIC, _string, 1, 2); }