--- parser3/src/classes/string.C 2012/06/08 11:44:02 1.205 +++ parser3/src/classes/string.C 2016/10/26 15:44:49 1.232 @@ -1,7 +1,7 @@ /** @file Parser: @b string parser class. - Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com) + Copyright (c) 2001-2015 Art. Lebedev Studio (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ @@ -20,7 +20,7 @@ #include "pa_vregex.h" #include "pa_charsets.h" -volatile const char * IDENT_STRING_C="$Id: string.C,v 1.205 2012/06/08 11:44:02 misha Exp $"; +volatile const char * IDENT_STRING_C="$Id: string.C,v 1.232 2016/10/26 15:44:49 moko Exp $"; // class @@ -31,7 +31,7 @@ public: // global variable -DECLARE_CLASS_VAR(string, new MString, 0); +DECLARE_CLASS_VAR(string, new MString); // void class, inherited from string and thus should be inited afterwards @@ -42,7 +42,7 @@ public: // void global variable should be after string global variable -DECLARE_CLASS_VAR(void, new MVoid, 0); +DECLARE_CLASS_VAR(void, new MVoid); // defines for statics @@ -53,6 +53,9 @@ DECLARE_CLASS_VAR(void, new MVoid, 0); #define MODE_APPEND "append" +#define UNESCAPE_MODE_JS "js" +#define UNESCAPE_MODE_URI "uri" + // statics static const String match_var_name(MATCH_VAR_NAME); @@ -67,66 +70,71 @@ static void _length(Request& r, MethodPa static void _int(Request& r, MethodParams& params) { const String& self_string=GET_SELF(r, VString).string(); int converted; - try { - if(self_string.is_empty()) - throw Exception(PARSER_RUNTIME, - 0, - "unable to convert empty string without default specified"); - converted=self_string.as_int(); - } catch(...) { // convert problem + + if(self_string.is_empty()) { if(params.count()>0) converted=params.as_int(0, "default must be int", r); // (default) else - rethrow; // we have a problem when no default + throw Exception(PARSER_RUNTIME, 0, "unable to convert empty string without default specified"); + } else { + try { + converted=self_string.as_int(); + } catch(...) { // convert problem + if(params.count()>0) + converted=params.as_int(0, "default must be int", r); // (default) + else + rethrow; // we have a problem when no default + } } + r.write_no_lang(*new VInt(converted)); } static void _double(Request& r, MethodParams& params) { const String& self_string=GET_SELF(r, VString).string(); - double converted; - try { - if(self_string.is_empty()) - throw Exception(PARSER_RUNTIME, - 0, - "unable to convert empty string without default specified"); - converted=self_string.as_double(); - } catch(...) { // convert problem + + if(self_string.is_empty()) { if(params.count()>0) - converted=params.as_double(0, "default must be double", r); // (default) + r.write_no_lang(*new VDouble(params.as_double(0, "default must be double", r))); // (default) else - rethrow; // we have a problem when no default + throw Exception(PARSER_RUNTIME, 0, "unable to convert empty string without default specified"); + } else { + try { + r.write_no_lang(*new VDouble(self_string.as_double())); + } catch(...) { // convert problem + if(params.count()>0) + r.write_no_lang(*new VDouble(params.as_double(0, "default must be double", r))); // (default) + else + rethrow; // we have a problem when no default + } } - - r.write_no_lang(*new VDouble(converted)); } static void _bool(Request& r, MethodParams& params) { const String& self_string=GET_SELF(r, VString).string(); bool converted; - try { - if(self_string.is_empty()) - throw Exception(PARSER_RUNTIME, - 0, - "unable to convert empty string without default specified"); - - try { - converted=self_string.as_bool(); - } catch(...) { - const String& lower_string=self_string.change_case(r.charsets.source(), String::CC_LOWER); - if(lower_string == "true"){ - converted=true; - } else if (lower_string == "false"){ - converted=false; - } else { - rethrow; - } - } - } catch(...) { // convert problem + const char *str=self_string.cstr(); + + if(self_string.is_empty()) { if(params.count()>0) converted=params.as_bool(0, "default must be bool", r); // (default) else - rethrow; // we have a problem when no default + throw Exception(PARSER_RUNTIME, 0, "unable to convert empty string without default specified"); + } else if( (str[0]=='T' || str[0]=='t') && (str[1]=='R' || str[1]=='r') && (str[2]=='U' || str[2]=='u') && + (str[3]=='E' || str[3]=='e') && str[4]==0 ) { // "true" + converted=true; + } else if( (str[0]=='F' || str[0]=='f') && (str[1]=='A' || str[1]=='a') && (str[2]=='L' || str[2]=='l') && + (str[3]=='S' || str[3]=='s') && (str[4]=='E' || str[4]=='e') && str[5]==0 ) { // "false" + converted=false; + } else { + try { + converted=self_string.as_bool(); + } catch(...) { // convert problem + if(params.count()>0) + converted=params.as_bool(0, "default must be bool", r); // (default) + else + rethrow; // we have a problem when no default + } } r.write_no_lang(VBool::get(converted)); @@ -145,27 +153,18 @@ static void _bool(Request& r, MethodPara static void _left(Request& r, MethodParams& params) { ssize_t sn=params.as_int(0, "n must be int", r); - if(sn<0) - throw Exception(PARSER_RUNTIME, - 0, - "n(%d) must be >=0", sn); - size_t n=(size_t)sn; - const String& string=GET_SELF(r, VString).string(); - r.write_assign_lang(string.mid(r.charsets.source(), 0, n)); + r.write_pass_lang(sn<0 ? string : string.mid(r.charsets.source(), 0, (size_t)sn)); } static void _right(Request& r, MethodParams& params) { - ssize_t sn=(size_t)params.as_int(0, "n must be int", r); - if(sn<0) - throw Exception(PARSER_RUNTIME, - 0, - "n(%d) must be >=0", sn); - size_t n=(size_t)sn; - - const String& string=GET_SELF(r, VString).string(); - size_t length=string.length(r.charsets.source()); - r.write_assign_lang(n0){ + size_t n=(size_t)sn; + const String& string=GET_SELF(r, VString).string(); + size_t length=string.length(r.charsets.source()); + r.write_pass_lang(n1){ offset=params.as_int(1, "n must be int", r); if(offset<0) - throw Exception(PARSER_RUNTIME, - 0, - "n(%d) must be >=0", offset); + throw Exception(PARSER_RUNTIME, 0, "n(%d) must be >=0", offset); } r.write_no_lang(*new VInt((int)string.pos(r.charsets.source(), substr.as_string(), (size_t)offset))); } -static void split_list(MethodParams& params, int paramIndex, - const String& string, - ArrayString& result) { +static void split_list(MethodParams& params, int paramIndex, const String& string, ArrayString& result) { Value& delim_value=params.as_no_junction(paramIndex, "delimiter must not be code"); - - size_t pos_after=0; - string.split(result, pos_after, delim_value.as_string()); + string.split(result, 0, delim_value.as_string()); } #define SPLIT_LEFT 0x0001 @@ -401,7 +394,6 @@ static void _match(Request& r, MethodPar vrcleaner.vregex=vregex; } - Temp_lang temp_lang(r, String::L_PASS_APPENDED); const String& src=GET_SELF(r, VString).string(); int matches_count=0; @@ -446,7 +438,7 @@ static void _match(Request& r, MethodPar }; if(is_junction){ - Temp_value_element temp( *replacement_code->get_junction()->method_frame, match_var_name, vtable); + Temp_value_element temp(r, *replacement_code->get_junction()->method_frame, match_var_name, vtable); src.match(vregex, replace_action, &info, matches_count); } else { src.match(vregex, replace_action, &info, matches_count); @@ -455,7 +447,7 @@ static void _match(Request& r, MethodPar if(!matches_count && default_code) r.process_write(*default_code); else - r.write_assign_lang(result); + r.write_pass_lang(result); } } @@ -463,7 +455,7 @@ static void change_case(Request& r, Meth String::Change_case_kind kind) { const String& src=GET_SELF(r, VString).string(); - r.write_assign_lang(src.change_case(r.charsets.source(), kind)); + r.write_pass_lang(src.change_case(r.charsets.source(), kind)); } static void _upper(Request& r, MethodParams& params) { change_case(r, params, String::CC_UPPER); @@ -478,14 +470,14 @@ class String_sql_event_handlers: public bool got_column; public: bool got_cell; - String& result; + const String* result; public: String_sql_event_handlers( const String& astatement_string, const char* astatement_cstr): statement_string(astatement_string), statement_cstr(astatement_cstr), got_column(false), got_cell(false), - result(*new String) {} + result(&String::Empty) {} bool add_column(SQL_Error& error, const char* /*str*/, size_t /*length*/) { if(got_column) { @@ -499,7 +491,7 @@ public: } bool before_rows(SQL_Error& /*error*/ ) { /* ignore */ return false; } bool add_row(SQL_Error& /*error*/) { /* ignore */ return false; } - bool add_row_cell(SQL_Error& error, const char* str, size_t length) { + bool add_row_cell(SQL_Error& error, const char* str, size_t) { if(got_cell) { error=SQL_Error(PARSER_RUNTIME, //statement_string, @@ -509,7 +501,7 @@ public: try { got_cell=true; - result.append_know_length(str, length, String::L_TAINTED); + result=new String(str, String::L_TAINTED /* no length as 0x00 can be inside */ ); return false; } catch(...) { error=SQL_Error("exception occured in String_sql_event_handlers::add_row_cell"); @@ -542,11 +534,11 @@ const String* sql_result_string(Request& } if(Value* vlimit=options->get(sql_limit_name)) { valid_options++; - limit=(ulong)r.process_to_value(*vlimit).as_double(); + limit=(ulong)r.process(*vlimit).as_double(); } if(Value* voffset=options->get(sql_offset_name)) { valid_options++; - offset=(ulong)r.process_to_value(*voffset).as_double(); + offset=(ulong)r.process(*voffset).as_double(); } if((default_code=options->get(sql_default_name))) { valid_options++; @@ -560,9 +552,8 @@ const String* sql_result_string(Request& if(bind) placeholders_count=marshal_binds(*bind, placeholders); - Temp_lang temp_lang(r, String::L_SQL); const String& statement_string=r.process_to_string(statement); - const char* statement_cstr=statement_string.untaint_cstr(r.flang, r.connection()); + const char* statement_cstr=statement_string.untaint_cstr(String::L_SQL, r.connection()); String_sql_event_handlers handlers(statement_string, statement_cstr); @@ -579,7 +570,7 @@ const String* sql_result_string(Request& if(!handlers.got_cell) return 0; // no lines, caller should return second param[default value] - return &handlers.result; + return handlers.result; } static void _sql(Request& r, MethodParams& params) { @@ -595,7 +586,7 @@ static void _sql(Request& r, MethodParam "produced no result, but no default option specified"); } - r.write_assign_lang(*string); + r.write_pass_lang(*string); } static void _replace(Request& r, MethodParams& params) { @@ -605,14 +596,14 @@ static void _replace(Request& r, MethodP // ^string.replace[table] Table* table=params.as_table(0, "param"); Dictionary dict(*table); - r.write_assign_lang(src.replace(dict)); + r.write_pass_lang(src.replace(dict)); } else { // ^string.replace[from-string;to-string] Dictionary dict( params.as_string(0, "from must be string"), params.as_string(1, "to must be string") ); - r.write_assign_lang(src.replace(dict)); + r.write_pass_lang(src.replace(dict)); } } @@ -622,12 +613,12 @@ static void _save(Request& r, MethodPara Charset* asked_charset=0; size_t file_name_index=0; - if(params.count()>1) + if(params.count()>1) { if(HashStringValue* options=params.as_no_junction(1, "second parameter should be string or hash").get_hash()){ // ^file.save[filespec;$.charset[] $.append(true)] int valid_options=0; if(Value* vcharset_name=options->get(PA_CHARSET_NAME)){ - asked_charset=&::charsets.get(vcharset_name->as_string().change_case(r.charsets.source(), String::CC_UPPER)); + asked_charset=&pa_charsets.get(vcharset_name->as_string()); valid_options++; } if(Value* vappend=options->get(MODE_APPEND)){ @@ -647,11 +638,12 @@ static void _save(Request& r, MethodPara &mode, "unknown mode, must be 'append'"); } + } const String& file_name=params.as_string(file_name_index, FILE_NAME_MUST_BE_STRING); const String& src=GET_SELF(r, VString).string(); - String::Body sbody=src.cstr_to_string_body_untaint(String::L_AS_IS, r.connection(false/*no error if none*/)); + String::Body sbody=src.cstr_to_string_body_untaint(String::L_AS_IS, r.connection(false), &r.charsets); // write file_write(r.charsets, r.absolute(file_name), sbody.cstr(), sbody.length(), true, do_append, asked_charset); @@ -660,7 +652,7 @@ static void _save(Request& r, MethodPara static void _normalize(Request& r, MethodParams&) { const String& src=GET_SELF(r, VString).string(); - r.write_assign_lang(src); + r.write_pass_lang(src); } static void _trim(Request& r, MethodParams& params) { @@ -671,17 +663,20 @@ static void _trim(Request& r, MethodPara const char* chars=0; if(params_count>0) { const String& skind=params.as_string(0, "'where' must be string"); - if(!skind.is_empty()) + if(!skind.is_empty()) { if(skind==TRIM_BOTH_OPTION) kind=String::TRIM_BOTH; else if(skind==TRIM_START_OPTION || skind=="start") kind=String::TRIM_START; else if(skind==TRIM_END_OPTION || skind=="end") kind=String::TRIM_END; + else if(params_count==1) + chars=skind.cstr(); else throw Exception(PARSER_RUNTIME, &skind, - "'kind' must be one of "TRIM_START_OPTION", "TRIM_BOTH_OPTION", "TRIM_END_OPTION); + "'kind' must be one of " TRIM_START_OPTION ", " TRIM_BOTH_OPTION ", " TRIM_END_OPTION); + } if(params_count>1) { const String& schars=params.as_string(1, "'chars' must be string"); @@ -690,13 +685,13 @@ static void _trim(Request& r, MethodPara } } - r.write_assign_lang(src.trim(kind, chars, &r.charsets.source())); + r.write_pass_lang(src.trim(kind, chars, &r.charsets.source())); } static void _base64(Request& r, MethodParams& params) { - if(params.count()) { + if(&r.get_self() == string_class) { // decode: ^string:base64[encoded[;$.strict(true|false)]] - const char* cstr=params.as_string(0, PARAMETER_MUST_BE_STRING).cstr(); + const char* cstr=params.count() ? params.as_string(0, PARAMETER_MUST_BE_STRING).cstr() : ""; char* decoded=0; size_t length=0; @@ -705,7 +700,7 @@ static void _base64(Request& r, MethodPa if(HashStringValue* options=params.as_hash(1)) { int valid_options=0; if(Value* vstrict=options->get(BASE64_STRICT_OPTION_NAME)) { - strict=r.process_to_value(*vstrict).as_bool(); + strict=r.process(*vstrict).as_bool(); valid_options++; } if(valid_options!=options->count()) @@ -721,26 +716,79 @@ static void _base64(Request& r, MethodPa fix_line_breaks(decoded, length); if(length) - r.write_assign_lang(*new String(decoded, String::L_TAINTED)); + r.write_pass_lang(*new String(decoded, String::L_TAINTED)); } } else { // encode: ^str.base64[] VString& self=GET_SELF(r, VString); const char* cstr=self.string().cstr(); const char* encoded=pa_base64_encode(cstr, strlen(cstr)); - r.write_assign_lang(*new String(encoded, String::L_TAINTED/*once ?param=base64(something) was needed*/)); + r.write_pass_lang(*new String(encoded, String::L_TAINTED/*once ?param=base64(something) was needed*/)); } } -static void _escape(Request& r, MethodParams&){ +static void _idna(Request& r, MethodParams& params) { + if(&r.get_self() == string_class) { + // decode: ^string:idna[encoded] + const char* cstr=params.count() ? params.as_string(0, PARAMETER_MUST_BE_STRING).cstr() : ""; + r.write_pass_lang(*new String(pa_idna_decode(cstr, r.charsets.source()), String::L_TAINTED)); + } else { + // encode: ^str.idna[] + VString& self=GET_SELF(r, VString); + const char* cstr=self.string().cstr(); + r.write_pass_lang(*new String(pa_idna_encode(cstr, r.charsets.source()), String::L_TAINTED)); + } +} + +static void _js_escape(Request& r, MethodParams&){ const String& src=GET_SELF(r, VString).string(); - r.write_assign_lang(src.escape(r.charsets.source())); + r.write_pass_lang(src.escape(r.charsets.source())); } -static void _unescape(Request& r, MethodParams& params){ +static void _js_unescape(Request& r, MethodParams& params){ const String& src=params.as_string(0, PARAMETER_MUST_BE_STRING); if(const char* result=unescape_chars(src.cstr(), src.length(), &r.charsets.source(), true)) - r.write_assign_lang(*new String(result)); + r.write_pass_lang(*new String(result, String::L_TAINTED)); +} + +static void _unescape(Request& r, MethodParams& params){ + const String& mode=params.as_string(0, MODE_MUST_NOT_BE_CODE); + const String& src=params.as_string(1, PARAMETER_MUST_BE_STRING); + + Charset* from_charset=&r.charsets.client(); + + if(params.count() > 2) + if(HashStringValue* options=params.as_hash(2)) { + int valid_options=0; + if(Value* vcharset_name=options->get(PA_CHARSET_NAME)){ + from_charset=&pa_charsets.get(vcharset_name->as_string()); + valid_options++; + } + if(valid_options!=options->count()) + throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); + } + + bool mode_js; + if(mode==UNESCAPE_MODE_JS){ + mode_js=true; + } else if(mode==UNESCAPE_MODE_URI){ + mode_js=false; + } else { + throw Exception(PARSER_RUNTIME, &mode, "is invalid mode, must be either '" UNESCAPE_MODE_JS "' or '" UNESCAPE_MODE_URI "'"); + } + + const char* unescaped=unescape_chars(src.cstr(), src.length(), from_charset, mode_js); + if(*unescaped){ + const String* result=new String(Charset::transcode(unescaped, *from_charset, r.charsets.source()), String::L_TAINTED); + r.write_pass_lang(*result); + } +} + +static void _contains(Request& r, MethodParams& params) { + // empty or whitespace string is hash compatible + GET_SELF(r, VString).get_element(params.as_string(0, "key must be string")); + // ignoring result as it allways null + r.write_no_lang(VBool::get(false)); } // constructor @@ -813,12 +861,22 @@ MString::MString(): Methoded("string") { add_native_method("trim", Method::CT_DYNAMIC, _trim, 0, 2); // ^string.base64[] << encode - // ^string:base64[encoded string] << decode + // ^string:base64[encoded string] << decode add_native_method("base64", Method::CT_ANY, _base64, 0, 2); + // ^string.idna[] << encode + // ^string:idna[encoded string] << decode + add_native_method("idna", Method::CT_ANY, _idna, 0, 1); + // ^string.js-escape[] - add_native_method("js-escape", Method::CT_ANY, _escape, 0, 0); + add_native_method("js-escape", Method::CT_DYNAMIC, _js_escape, 0, 0); // ^string:js-unescape[escaped%uXXXXstring] - add_native_method("js-unescape", Method::CT_STATIC, _unescape, 1, 1); -} + add_native_method("js-unescape", Method::CT_STATIC, _js_unescape, 1, 1); + + // ^string:unescape[js|uri;escaped;$.charset[...]] + add_native_method("unescape", Method::CT_STATIC, _unescape, 2, 3); + + // ^string.contains[key] for hash compatibility + add_native_method("contains", Method::CT_DYNAMIC, _contains, 1, 1); +}