--- parser3/src/classes/string.C 2019/09/06 10:17:07 1.239 +++ parser3/src/classes/string.C 2024/10/02 22:54:02 1.256 @@ -1,12 +1,13 @@ /** @file Parser: @b string parser class. - Copyright (c) 2001-2017 Art. Lebedev Studio (http://www.artlebedev.com) - Author: Alexandr Petrosian (http://paf.design.ru) + Copyright (c) 2001-2023 Art. Lebedev Studio (http://www.artlebedev.com) + Authors: Konstantin Morshnev , Alexandr Petrosian */ #include "classes.h" #include "pa_vmethod_frame.h" +#include "pa_base64.h" #include "pa_request.h" #include "pa_vdouble.h" @@ -20,7 +21,7 @@ #include "pa_vregex.h" #include "pa_charsets.h" -volatile const char * IDENT_STRING_C="$Id: string.C,v 1.239 2019/09/06 10:17:07 moko Exp $"; +volatile const char * IDENT_STRING_C="$Id: string.C,v 1.256 2024/10/02 22:54:02 moko Exp $"; // class @@ -146,21 +147,21 @@ static void _bool(Request& r, MethodPara // for some time due to stupid {} in original design const String& fmt=r.process_to_string(fmt_maybe_code); - const char* buf=format(r.get_self().as_double(), fmt.trim().cstrm()); + const char* buf=format_double(r.get_self().as_double(), fmt.trim().cstrm()); r.write(String(buf)); } static void _left(Request& r, MethodParams& params) { - ssize_t sn=params.as_int(0, "n must be int", r); + int sn=params.as_int(0, "n must be int", r); const String& string=GET_SELF(r, VString).string(); - r.write(sn<0 ? string : string.mid(r.charsets.source(), 0, (size_t)sn)); + r.write(sn<0 ? string : string.mid(r.charsets.source(), 0, sn)); } static void _right(Request& r, MethodParams& params) { - ssize_t sn=params.as_int(0, "n must be int", r); + int sn=params.as_int(0, "n must be int", r); if(sn>0){ - size_t n=(size_t)sn; + size_t n=sn; const String& string=GET_SELF(r, VString).string(); size_t length=string.length(r.charsets.source()); r.write(n=0", sbegin); - size_t begin=(size_t)sbegin; + int begin=params.as_int(0, "p must be int", r); + if(begin<0) + throw Exception(PARSER_RUNTIME, 0, "p(%d) must be >=0", begin); size_t end; size_t length=0; + if(params.count()>1) { - ssize_t sn=params.as_int(1, "n must be int", r); - if(sn<0) - throw Exception(PARSER_RUNTIME, 0, "n(%d) must be >=0", sn); - end=begin+(size_t)sn; + int n=params.as_int(1, "n must be int", r); + if(n<0) + throw Exception(PARSER_RUNTIME, 0, "n(%d) must be >=0", n); + end=begin+n; } else { length=string.length(r.charsets.source()); end=length; @@ -194,19 +195,41 @@ static void _pos(Request& r, MethodParam Value& substr=params.as_no_junction(0, "substr must not be code"); const String& string=GET_SELF(r, VString).string(); - ssize_t offset=0; + int offset=0; if(params.count()>1){ offset=params.as_int(1, "n must be int", r); if(offset<0) throw Exception(PARSER_RUNTIME, 0, "n(%d) must be >=0", offset); } - r.write(*new VInt((int)string.pos(r.charsets.source(), substr.as_string(), (size_t)offset))); + r.write(*new VInt((int)string.pos(r.charsets.source(), substr.as_string(), offset))); +} + +struct Split_action_info { + const String& src; + ArrayString &result; +}; + +static void split_action(Table& , ArrayString* row, int prestart, int prefinish, int poststart, int postfinish, void *info) { + Split_action_info& ai=*static_cast(info); + if(row) { // begin&middle + // piece from last match['prestart'] to beginning of this match['prefinish'] + ai.result += &ai.src.mid(prestart, prefinish); + } else // end + if(poststart != postfinish) + ai.result += &ai.src.mid(poststart, postfinish); } -static void split_list(MethodParams& params, int paramIndex, const String& string, ArrayString& result) { - Value& delim_value=params.as_no_junction(paramIndex, "delimiter must not be code"); - string.split(result, 0, delim_value.as_string()); +static void split_list(Value& delim_value, const String& string, ArrayString& result) { + if(VRegex *vregex=dynamic_cast(&delim_value)){ + vregex->study(); + + int matches_count=0; + Split_action_info ai = { string, result }; + + string.match(vregex, split_action, &ai, matches_count); + } else + string.split(result, 0, delim_value.as_string()); } #define SPLIT_LEFT 0x0001 @@ -254,8 +277,7 @@ static Table& split_vertical(ArrayString table+=row; } } else { // left - Array_iterator i(pieces); - while(i.has_next()) { + for(ArrayString::Iterator i(pieces); i; ) { Table::element_type row(new ArrayString); *row+=i.next(); table+=row; @@ -272,7 +294,7 @@ static Table& split_horizontal(ArrayStri for(int i=pieces.count(); --i>=0; ) *row+=pieces[i]; } else { // left - for(Array_iterator i(pieces); i.has_next(); ) + for(ArrayString::Iterator i(pieces); i; ) *row+=i.next(); } table+=row; @@ -285,7 +307,7 @@ static void split_with_options(Request& size_t params_count=params.count(); ArrayString pieces; - split_list(params, 0, string, pieces); + split_list(params.as_no_junction(0, "delimiter must not be code"), string, pieces); if(!bits) { const String* options=0; @@ -363,13 +385,12 @@ static void _match(Request& r, MethodPar Value& regexp=params.as_no_junction(0, "regexp must not be code"); Value* options=(params_count>1)?¶ms.as_no_junction(1, OPTIONS_MUST_NOT_BE_CODE):0; - VRegex* vregex; + VRegex* vregex=dynamic_cast(®exp); VRegexCleaner vrcleaner; - if(Value* value=regexp.as(VREGEX_TYPE)){ + if(vregex){ if(options && options->is_defined()) - throw Exception(PARSER_RUNTIME, 0, "you can not specify regex-object and options together"); - vregex=static_cast(value); + throw Exception(PARSER_RUNTIME, 0, "you cannot specify regex-object and options together"); } else { vregex=new VRegex(r.charsets.source(), ®exp.as_string(), (options) ? (&options->as_string()) : 0); vregex->study(); @@ -443,22 +464,19 @@ static void _lower(Request& r, MethodPar #ifndef DOXYGEN class String_sql_event_handlers: public SQL_Driver_query_event_handlers { - const String& statement_string; const char* statement_cstr; bool got_column; public: bool got_cell; const String* result; public: - String_sql_event_handlers( - const String& astatement_string, const char* astatement_cstr): - statement_string(astatement_string), statement_cstr(astatement_cstr), + String_sql_event_handlers(): got_column(false), got_cell(false), result(&String::Empty) {} bool add_column(SQL_Error& error, const char* /*str*/, size_t /*length*/) { if(got_column) { - error=SQL_Error(PARSER_RUNTIME, /*statement_string,*/ "result must contain exactly one column"); + error=SQL_Error("result must contain exactly one column"); return true; } got_column=true; @@ -468,10 +486,9 @@ public: bool add_row(SQL_Error& /*error*/) { /* ignore */ return false; } bool add_row_cell(SQL_Error& error, const char* str, size_t) { if(got_cell) { - error=SQL_Error(PARSER_RUNTIME, /*statement_string,*/ "result must not contain more then one row"); + error=SQL_Error("result must contain no more than one row"); return true; } - try { got_cell=true; result=new String(str, String::L_TAINTED /* no length as 0x00 can be inside */ ); @@ -524,14 +541,9 @@ const String* sql_result_string(Request& const String& statement_string=r.process_to_string(statement); const char* statement_cstr=statement_string.untaint_cstr(String::L_SQL, r.connection()); - String_sql_event_handlers handlers(statement_string, statement_cstr); + String_sql_event_handlers handlers; - r.connection()->query( - statement_cstr, - placeholders_count, placeholders, - offset, limit, - handlers, - statement_string); + r.connection()->query(statement_cstr, placeholders_count, placeholders, offset, limit, handlers, statement_string); if(bind) unmarshal_bind_updates(*bind, placeholders_count, placeholders); @@ -561,9 +573,11 @@ static void _replace(Request& r, MethodP if(params.count()==1) { // ^string.replace[table] - Table* table=params.as_table(0, "param"); - Dictionary dict(*table); - r.write(src.replace(dict)); + if(Table* table=params.as_table(0, "param")){ + Dictionary dict(*table); + r.write(src.replace(dict)); + } else + r.write(src); } else { // ^string.replace[from-string;to-string] Dictionary dict(params.as_string(0, "from must be string"), params.as_string(1, "to must be string")); @@ -610,7 +624,7 @@ static void _save(Request& r, MethodPara String::Body sbody=src.cstr_to_string_body_untaint(String::L_AS_IS, r.connection(false), &r.charsets); // write - file_write(r.charsets, r.absolute(file_name), sbody.cstr(), sbody.length(), true, do_append, asked_charset); + file_write(r.charsets, r.full_disk_path(file_name), sbody.cstr(), sbody.length(), true, do_append, asked_charset); } static void _normalize(Request& r, MethodParams&) { @@ -650,40 +664,83 @@ static void _trim(Request& r, MethodPara r.write(src.trim(kind, chars, &r.charsets.source())); } +Base64Options base64_encode_options(Request& r, HashStringValue* options) { + Base64Options result; + if(options) { + int valid_options=0; + for(HashStringValue::Iterator i(*options); i; i.next()) { + String::Body key=i.key(); + Value* value=i.value(); + if(key == "pad") { + result.pad=r.process(*value).as_bool(); + valid_options++; + } else if(key == "wrap") { + result.wrap=r.process(*value).as_bool(); + valid_options++; + } else if(key == "url-safe") { + if(r.process(*value).as_bool()) + result.set_url_safe_abc(); + valid_options++; + } + } + + if(valid_options != options->count()) + throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); + } + return result; +} + +Base64Options base64_decode_options(Request& r, HashStringValue* options) { + Base64Options result; + if(options) { + int valid_options=0; + for(HashStringValue::Iterator i(*options); i; i.next() ) { + String::Body key=i.key(); + Value* value=i.value(); + if(key == "pad") { + result.pad=r.process(*value).as_bool(); + valid_options++; + } else if(key == "strict") { + result.strict=r.process(*value).as_bool(); + valid_options++; + } else if(key == "url-safe") { + if(r.process(*value).as_bool()) + result.set_url_safe_abc(); + valid_options++; + } + } + + if(valid_options != options->count()) + throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); + } + return result; +} + static void _base64(Request& r, MethodParams& params) { if(&r.get_self() == string_class) { - // decode: ^string:base64[encoded[;$.strict(true|false)]] + // decode: ^string:base64[encoded[;options]] const char* cstr=params.count() ? params.as_string(0, PARAMETER_MUST_BE_STRING).cstr() : ""; - char* decoded=0; - size_t length=0; + Base64Options options = base64_decode_options(r, params.count() > 1 ? params.as_hash(1) : NULL); - bool strict=false; - if(params.count() > 1) - if(HashStringValue* options=params.as_hash(1)) { - int valid_options=0; - if(Value* vstrict=options->get(BASE64_STRICT_OPTION_NAME)) { - strict=r.process(*vstrict).as_bool(); - valid_options++; - } - if(valid_options!=options->count()) - throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); - } + char* decoded=0; + size_t length=pa_base64_decode(cstr, strlen(cstr), decoded, options); - pa_base64_decode(cstr, strlen(cstr), decoded, length, strict); if(decoded && length){ - if(memchr((const char*)decoded, 0, length)) - throw Exception(PARSER_RUNTIME, 0, "Invalid \\x00 character found while decode to string. Decode it to file instead."); + if(memchr(decoded, 0, length)) + throw Exception(PARSER_RUNTIME, 0, "Invalid \\x00 character found while decoding to string. Decode to file instead."); fix_line_breaks(decoded, length); + if(length) r.write(*new String(decoded, String::L_TAINTED)); } } else { - // encode: ^str.base64[] + // encode: ^str.base64[options] VString& self=GET_SELF(r, VString); const char* cstr=self.string().cstr(); - const char* encoded=pa_base64_encode(cstr, strlen(cstr)); - r.write(*new String(encoded, String::L_TAINTED/*once ?param=base64(something) was needed*/)); + Base64Options options = base64_encode_options(r, params.count() ? params.as_hash(0) : NULL); + const char* encoded=pa_base64_encode(cstr, strlen(cstr), options); + r.write(*new String(encoded, String::L_TAINTED /*once ?param=base64(something) was needed*/ )); } }