--- parser3/src/classes/string.C 2005/06/06 08:45:11 1.142 +++ parser3/src/classes/string.C 2009/07/07 05:47:43 1.185 @@ -1,11 +1,11 @@ /** @file Parser: @b string parser class. - Copyright (c) 2001-2004 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ -static const char * const IDENT_STRING_C="$Date: 2005/06/06 08:45:11 $"; +static const char * const IDENT_STRING_C="$Date: 2009/07/07 05:47:43 $"; #include "classes.h" #include "pa_vmethod_frame.h" @@ -19,6 +19,7 @@ static const char * const IDENT_STRING_C #include "pa_sql_connection.h" #include "pa_dictionary.h" #include "pa_vmethod_frame.h" +#include "pa_vregex.h" // class @@ -36,9 +37,9 @@ DECLARE_CLASS_VAR(string, new MString, 0 // defines for statics #define MATCH_VAR_NAME "match" -#define TRIM_START_OPTION "start" +#define TRIM_START_OPTION "left" +#define TRIM_END_OPTION "right" #define TRIM_BOTH_OPTION "both" -#define TRIM_END_OPTION "end" // statics @@ -47,24 +48,22 @@ static const String match_var_name(MATCH // methods static void _length(Request& r, MethodParams&) { - double result=GET_SELF(r, VString).string().length(); + double result=GET_SELF(r, VString).string().length(r.charsets.source()); r.write_no_lang(*new VDouble(result)); } static void _int(Request& r, MethodParams& params) { const String& self_string=GET_SELF(r, VString).string(); int converted; - Value* default_code=params.count()>0?¶ms.as_junction(0, "default must be int") - :0; // (default) try { if(self_string.is_empty()) - throw Exception("parser.runtime", + throw Exception(PARSER_RUNTIME, 0, "parameter is empty string, error converting"); converted=self_string.as_int(); } catch(...) { // convert problem - if(default_code) - converted=r.process_to_value(*default_code).as_int(); + if(params.count()>0) + converted=params.as_int(0, "default must be int", r); // (default) else rethrow; // we have a problem when no default } @@ -74,17 +73,15 @@ static void _int(Request& r, MethodParam static void _double(Request& r, MethodParams& params) { const String& self_string=GET_SELF(r, VString).string(); double converted; - Value* default_code=params.count()>0?¶ms.as_junction(0, "default must be double") - :0; // (default) try { if(self_string.is_empty()) - throw Exception("parser.runtime", + throw Exception(PARSER_RUNTIME, 0, "parameter is empty string, error converting"); converted=self_string.as_double(); } catch(...) { // convert problem - if(default_code) - converted=r.process_to_value(*default_code).as_double(); + if(params.count()>0) + converted=params.as_double(0, "default must be double", r); // (default) else rethrow; // we have a problem when no default } @@ -92,13 +89,44 @@ static void _double(Request& r, MethodPa r.write_no_lang(*new VDouble(converted)); } +static void _bool(Request& r, MethodParams& params) { + const String& self_string=GET_SELF(r, VString).string(); + bool converted; + try { + if(self_string.is_empty()) + throw Exception(PARSER_RUNTIME, + 0, + "parameter is empty string, error converting"); + + try { + converted=self_string.as_bool(); + } catch(...) { + const String& lower_string=self_string.change_case(r.charsets.source(), String::CC_LOWER); + if(lower_string == "true"){ + converted=true; + } else if (lower_string == "false"){ + converted=false; + } else { + rethrow; + } + } + } catch(...) { // convert problem + if(params.count()>0) + converted=params.as_bool(0, "default must be bool", r); // (default) + else + rethrow; // we have a problem when no default + } + + r.write_no_lang(VBool::get(converted)); +} + /*not static*/void _string_format(Request& r, MethodParams& params) { Value& fmt_maybe_code=params[0]; // for some time due to stupid {} in original design const String& fmt=r.process_to_string(fmt_maybe_code); - const char* buf=format(r.get_self().as_double(), fmt.cstrm()); + const char* buf=format(r.get_self().as_double(), fmt.trim().cstrm()); r.write_no_lang(String(buf)); } @@ -106,27 +134,26 @@ static void _double(Request& r, MethodPa static void _left(Request& r, MethodParams& params) { ssize_t sn=params.as_int(0, "n must be int", r); if(sn<0) - throw Exception("parser.runtime", + throw Exception(PARSER_RUNTIME, 0, "n(%d) must be >=0", sn); size_t n=(size_t)sn; const String& string=GET_SELF(r, VString).string(); - r.write_assign_lang(string.mid(0, n)); + r.write_assign_lang(string.mid(r.charsets.source(), 0, n)); } static void _right(Request& r, MethodParams& params) { ssize_t sn=(size_t)params.as_int(0, "n must be int", r); if(sn<0) - throw Exception("parser.runtime", + throw Exception(PARSER_RUNTIME, 0, "n(%d) must be >=0", sn); size_t n=(size_t)sn; - - + const String& string=GET_SELF(r, VString).string(); - size_t length=string.length(); - r.write_assign_lang(n=0", sbegin); size_t begin=(size_t)sbegin; size_t end; + size_t length=0; if(params.count()>1) { ssize_t sn=params.as_int(1, "n must be int", r); if(sn<0) - throw Exception("parser.runtime", + throw Exception(PARSER_RUNTIME, 0, "n(%d) must be >=0", sn); end=begin+(size_t)sn; - } else - end=string.length(); - - r.write_assign_lang(string.mid(begin, end)); + } else { + length=string.length(r.charsets.source()); + end=length; + } + + r.write_assign_lang(string.mid(r.charsets.source(), begin, end, length)); } static void _pos(Request& r, MethodParams& params) { Value& substr=params.as_no_junction(0, "substr must not be code"); const String& string=GET_SELF(r, VString).string(); - r.write_assign_lang(*new VInt((int)string.pos(substr.as_string()))); + ssize_t offset=0; + if(params.count()>1){ + offset=params.as_int(1, "n must be int", r); + if(offset<0) + throw Exception(PARSER_RUNTIME, + 0, + "n(%d) must be >=0", offset); + } + + r.write_no_lang(*new VInt((int)string.pos(r.charsets.source(), substr.as_string(), (size_t)offset))); } static void split_list(MethodParams& params, int paramIndex, @@ -194,7 +233,7 @@ static int split_options(const String* o if(options->pos(o->keyL)!=STRING_NOT_FOUND || (o->keyU && options->pos(o->keyU)!=STRING_NOT_FOUND)) { if(result & o->checkBit) - throw Exception("parser.runtime", + throw Exception(PARSER_RUNTIME, options, "conflicting split options"); result |= o->setBit; @@ -204,9 +243,9 @@ static int split_options(const String* o return result; } -static Table& split_vertical(ArrayString& pieces, bool right) { +static Table& split_vertical(ArrayString& pieces, bool right, const String* column_name) { Table::columns_type columns(new ArrayString); - *columns+=new String("piece"); + *columns+=column_name; Table& table=*new Table(columns, pieces.count()); if(right) { // right @@ -245,13 +284,14 @@ static Table& split_horizontal(ArrayStri static void split_with_options(Request& r, MethodParams& params, int bits) { const String& string=GET_SELF(r, VString).string(); + size_t params_count=params.count(); ArrayString pieces; split_list(params, 0, string, pieces); if(!bits) { const String* options=0; - if(params.count()>1) + if(params_count>1) options=¶ms.as_string(1, "options must not be code"); bits=split_options(options); @@ -259,8 +299,19 @@ static void split_with_options(Request& bool right=(bits & SPLIT_RIGHT) != 0; bool horizontal=(bits & SPLIT_HORIZONTAL) !=0; - Table& table=horizontal?split_horizontal(pieces, right) - :split_vertical(pieces, right); + + const String* column_name=0; + if(params_count>2){ + column_name=¶ms.as_string(2, COLUMN_NAME_MUST_BE_STRING); + if (horizontal && !column_name->is_empty()) + throw Exception(PARSER_RUNTIME, + column_name, + "column name can't be specified with horisontal split"); + } + if(!column_name || column_name->is_empty()) + column_name=new String("piece"); + + Table& table=horizontal?split_horizontal(pieces, right):split_vertical(pieces, right, column_name); r.write_no_lang(*new VTable(&table)); } @@ -281,17 +332,18 @@ static void search_action(Table& table, #ifndef DOXYGEN struct Replace_action_info { - Request* request; - const String* src; String* dest; + Request* request; + const String* src; + String* dest; VTable* vtable; Value* replacement_code; }; #endif /// @todo they can do $global[$result] there, getting pointer to later-invalid local var, kill this static void replace_action(Table& table, ArrayString* row, - int prestart, int prefinish, - int poststart, int postfinish, - void *info) { + int prestart, int prefinish, + int poststart, int postfinish, + void *info) { Replace_action_info& ai=*static_cast(info); if(row) { // begin&middle // piece from last match['prestart'] to beginning of this match['prefinish'] @@ -302,39 +354,72 @@ static void replace_action(Table& table, table.put(0, row); else // begin table+=row; - { // execute 'replacement_code' in 'table' context - ai.vtable->set_table(table); - *ai.dest << ai.request->process_to_string(*ai.replacement_code); + { // execute 'replacement_code' in 'table' context + if(ai.replacement_code){ + ai.vtable->set_table(table); + *ai.dest << ai.request->process_to_string(*ai.replacement_code); + } } } else // end *ai.dest << ai.src->mid(poststart, postfinish); } -/// @todo use pcre:study somehow static void _match(Request& r, MethodParams& params) { + size_t params_count=params.count(); + Value& regexp=params.as_no_junction(0, "regexp must not be code"); + Value* options=(params_count>1)?¶ms.as_no_junction(1, "options must not be code"):0; - const String* options= - params.count()>1? - ¶ms.as_no_junction(1, "options must not be code").as_string():0; + VRegex* vregex; + VRegexCleaner vrcleaner; + + if(Value* value=regexp.as(VREGEX_TYPE, false)){ + if(options && options->is_defined()) + throw Exception(PARSER_RUNTIME, + 0, + "you can not specify regex-object and options together" + ); + vregex=static_cast(value); + } else { + vregex=new VRegex(r.charsets.source(), + ®exp.as_string(), + (options)?(&options->as_string()):0); + vregex->study(); + vrcleaner.vregex=vregex; + } Temp_lang temp_lang(r, String::L_PASS_APPENDED); const String& src=GET_SELF(r, VString).string(); - bool just_matched; - if(params.count()<3) { // search - Table* table=src.match(r.charsets.source(), - regexp.as_string(), options, + int matches_count=0; + + if(params_count<3) { // search + Table* table=src.match(vregex, search_action, 0, - just_matched); - Value* result; - if(table) - result=new VTable(table); // table of pre/match/post+substrings - else - result=new VBool(just_matched); - r.write_assign_lang(*result); + matches_count); + + if(table){ + r.write_no_lang(*new VTable(table)); + } else { + r.write_no_lang(*new VInt(matches_count)); + } + } else { // replace - Value& replacement_code=params.as_junction(2, "replacement param must be code"); + + Value* replacement_code=0; + bool is_junction=false; + + Value* replacement=¶ms[2]; + if(replacement->get_junction()){ + replacement_code=replacement; + is_junction=true; + } else if(replacement->is_string()){ + if(replacement->is_defined()) + replacement_code=replacement; + } else if(!replacement->is_void()) + throw Exception(PARSER_RUNTIME, + 0, + "replacement option should be junction or string"); String result; VTable* vtable=new VTable; @@ -343,15 +428,23 @@ static void _match(Request& r, MethodPar &src, &result, vtable, - &replacement_code + replacement_code }; - Temp_value_element temp_match_var( - *replacement_code.get_junction()->method_frame, - match_var_name, vtable); - src.match(r.charsets.source(), - r.process_to_string(regexp), options, + + Temp_value_element* temp_match_var=0; + + if(is_junction) + temp_match_var=new Temp_value_element( + *replacement_code->get_junction()->method_frame, + match_var_name, vtable); + + src.match(vregex, replace_action, &info, - just_matched); + matches_count); + + if(temp_match_var) + delete temp_match_var; + r.write_assign_lang(result); } } @@ -386,7 +479,7 @@ public: bool add_column(SQL_Error& error, const char* /*str*/, size_t /*length*/) { if(got_column) { - error=SQL_Error("parser.runtime", + error=SQL_Error(PARSER_RUNTIME, //statement_string, "result must contain exactly one column"); return true; @@ -398,7 +491,7 @@ public: bool add_row(SQL_Error& /*error*/) { /* ignore */ return false; } bool add_row_cell(SQL_Error& error, const char* str, size_t length) { if(got_cell) { - error=SQL_Error("parser.runtime", + error=SQL_Error(PARSER_RUNTIME, //statement_string, "result must not contain more then one row"); return true; @@ -428,12 +521,12 @@ const String* sql_result_string(Request& Value& statement=params.as_junction(0, "statement must be code"); HashStringValue* bind=0; - ulong limit=0; + ulong limit=SQL_NO_LIMIT; ulong offset=0; default_code=0; if(params.count()>1) { Value& voptions=params.as_no_junction(1, "options must be hash, not code"); - if(!voptions.is_string()) + if(voptions.is_defined() && !voptions.is_string()) if((options=voptions.get_hash())) { int valid_options=0; if(Value* vbind=options->get(sql_bind_name)) { @@ -450,17 +543,13 @@ const String* sql_result_string(Request& } if((default_code=options->get(sql_default_name))) { valid_options++; - if(!default_code->get_junction()) - throw Exception("parser.runtime", - 0, - "default option must be code"); } if(valid_options!=options->count()) - throw Exception("parser.runtime", + throw Exception(PARSER_RUNTIME, 0, "called with invalid option"); } else - throw Exception("parser.runtime", + throw Exception(PARSER_RUNTIME, 0, "options must be hash"); } else @@ -473,9 +562,10 @@ const String* sql_result_string(Request& Temp_lang temp_lang(r, String::L_SQL); const String& statement_string=r.process_to_string(statement); - const char* statement_cstr= - statement_string.cstr(String::L_UNSPECIFIED, r.connection()); + const char* statement_cstr=statement_string.untaint_cstr(r.flang, r.connection()); + String_sql_event_handlers handlers(statement_string, statement_cstr); + r.connection()->query( statement_cstr, placeholders_count, placeholders, @@ -501,7 +591,7 @@ static void _sql(Request& r, MethodParam if(default_code) { string=&r.process_to_string(*default_code); } else - throw Exception("parser.runtime", + throw Exception(PARSER_RUNTIME, 0, "produced no result, but no default option specified"); } @@ -512,9 +602,9 @@ static void _sql(Request& r, MethodParam static void _replace(Request& r, MethodParams& params) { const String& src=GET_SELF(r, VString).string(); - Table* table=params.as_no_junction(0, "parameter must not be code").get_table(); + Table* table=params.as_no_junction(0, PARAM_MUST_NOT_BE_CODE).get_table(); if(!table) - throw Exception("parser.runtime", + throw Exception(PARSER_RUNTIME, 0, "parameter must be table"); @@ -523,26 +613,25 @@ static void _replace(Request& r, MethodP } static void _save(Request& r, MethodParams& params) { - const String& file_name=params.as_string(params.count()-1, - "file name must be string"); + size_t params_count=params.count(); + const String& file_name=params.as_string(params_count-1, FILE_NAME_MUST_BE_STRING); const String& src=GET_SELF(r, VString).string(); bool do_append=false; - if(params.count()>1) { + if(params_count>1) { const String& mode=params.as_string(0, "mode must be string"); if(mode=="append") do_append=true; else - throw Exception("parser.runtime", + throw Exception(PARSER_RUNTIME, &mode, "unknown mode, must be 'append'"); } // write - const char* buf=src.cstr(String::L_UNSPECIFIED, r.connection(false/*no error if none*/)); - file_write(r.absolute(file_name), - buf, strlen(buf), true, do_append); + String::Body sbody=src.cstr_to_string_body_untaint(String::L_AS_IS, r.connection(false/*no error if none*/)); + file_write(r.absolute(file_name), sbody.cstr(), sbody.length(), true, do_append); } static void _normalize(Request& r, MethodParams&) { @@ -555,42 +644,79 @@ static void _trim(Request& r, MethodPara const String& src=GET_SELF(r, VString).string(); String::Trim_kind kind=String::TRIM_BOTH; + size_t params_count=params.count(); const char* chars=0; - if(params.count()>0) { - const String& skind=params.as_string(0, - "'where' must be string"); - if(skind.length()) - if(skind==TRIM_START_OPTION) + if(params_count>0) { + const String& skind=params.as_string(0, "'where' must be string"); + if(!skind.is_empty()) + if(skind==TRIM_BOTH_OPTION) + kind=String::TRIM_BOTH; + else if(skind==TRIM_START_OPTION || skind=="start") kind=String::TRIM_START; - else if(skind==TRIM_END_OPTION) + else if(skind==TRIM_END_OPTION || skind=="end") kind=String::TRIM_END; - else if(skind==TRIM_BOTH_OPTION) - kind=String::TRIM_BOTH; else - throw Exception("parser.runtime", + throw Exception(PARSER_RUNTIME, &skind, "'kind' must be one of "TRIM_START_OPTION", "TRIM_BOTH_OPTION", "TRIM_END_OPTION); - if(params.count()>1) { + if(params_count>1) { const String& schars=params.as_string(1, "'chars' must be string"); - if(schars.length()) + if(!schars.is_empty()) chars=schars.cstr(); } } - r.write_assign_lang(src.trim(kind, chars)); + r.write_assign_lang(src.trim(kind, chars, &r.charsets.source())); } static void _append(Request& r, MethodParams& params) { // c=a+b VString& va=GET_SELF(r, VString); const String& a=va.string(); - const String& b=params.as_string(0, "parameter must be string"); + const String& b=params.as_string(0, PARAMETER_MUST_BE_STRING); String& c=*new String(a); c.append(b, String::L_PASS_APPENDED); va.set_string(c); } +static void _base64(Request& r, MethodParams& params) { + if(params.count()) { + // decode: ^string:base64[encoded] + const char* cstr=params.as_string(0, PARAMETER_MUST_BE_STRING).cstr(); + char* decoded=0; + size_t length=0; + pa_base64_decode(cstr, strlen(cstr), decoded, length); + if(decoded && length){ + if(memchr((const char*)decoded, 0, length)) + throw Exception(PARSER_RUNTIME, + 0, + "Invalid \\x00 character found while decode to string. Decode it to file instead."); + + fix_line_breaks(decoded, length); + if(length) + r.write_assign_lang(*new String(decoded, String::L_TAINTED)); + } + } else { + // encode: ^str.base64[] + VString& self=GET_SELF(r, VString); + const char* cstr=self.string().cstr(); + const char* encoded=pa_base64_encode(cstr, strlen(cstr)); + r.write_assign_lang(*new String(encoded, String::L_TAINTED/*once ?param=base64(something) was needed*/)); + } +} + +static void _escape(Request& r, MethodParams&){ + const String& src=GET_SELF(r, VString).string(); + r.write_assign_lang(src.escape(r.charsets.source())); +} + +static void _unescape(Request& r, MethodParams& params){ + const String& src=params.as_string(0, PARAMETER_MUST_BE_STRING); + if(const char* result=unescape_chars(src.cstr(), src.length(), &r.charsets.source(), true/* don't unescape '+' char */)) + r.write_assign_lang(*new String(result)); +} + // constructor MString::MString(): Methoded("string") { @@ -603,23 +729,29 @@ MString::MString(): Methoded("string") { // ^string.double[] // ^string.double(default) add_native_method("double", Method::CT_DYNAMIC, _double, 0, 1); + // ^void.bool[] + // ^void.bool(default) + add_native_method("bool", Method::CT_DYNAMIC, _bool, 0, 1); - // ^string.format{format} + // ^string.format[format] add_native_method("format", Method::CT_DYNAMIC, _string_format, 1, 1); // ^string.left(n) add_native_method("left", Method::CT_DYNAMIC, _left, 1, 1); // ^string.right(n) add_native_method("right", Method::CT_DYNAMIC, _right, 1, 1); + // ^string.mid(p) // ^string.mid(p;n) add_native_method("mid", Method::CT_DYNAMIC, _mid, 1, 2); // ^string.pos[substr] - add_native_method("pos", Method::CT_DYNAMIC, _pos, 1, 1); + // ^string.pos[substr](n) + add_native_method("pos", Method::CT_DYNAMIC, _pos, 1, 2); // ^string.split[delim] // ^string.split[delim][options] - add_native_method("split", Method::CT_DYNAMIC, _split, 1, 2); + // ^string.split[delim][options][column name] + add_native_method("split", Method::CT_DYNAMIC, _split, 1, 3); // old names for backward compatibility // ^string.lsplit[delim] add_native_method("lsplit", Method::CT_DYNAMIC, _lsplit, 1, 1); @@ -630,9 +762,9 @@ MString::MString(): Methoded("string") { // ^string.match[regexp][options]{replacement-code} add_native_method("match", Method::CT_DYNAMIC, _match, 1, 3); - // ^string.toupper[] + // ^string.upper[] add_native_method("upper", Method::CT_DYNAMIC, _upper, 0, 0); - // ^string.tolower[] + // ^string.lower[] add_native_method("lower", Method::CT_DYNAMIC, _lower, 0, 0); // ^sql[query] @@ -653,4 +785,13 @@ MString::MString(): Methoded("string") { // ^string.append[string] add_native_method("append", Method::CT_DYNAMIC, _append, 1, 1); + + // ^string.base64[] << encode + // ^string:base64[encoded string] << decode + add_native_method("base64", Method::CT_ANY, _base64, 0, 1); + + // ^string.js-escape[] + // ^string:js-unescape[escaped%uXXXXstring] + add_native_method("js-escape", Method::CT_ANY, _escape, 0, 0); + add_native_method("js-unescape", Method::CT_STATIC, _unescape, 1, 1); }