--- parser3/src/classes/string.C 2008/07/04 11:16:51 1.162 +++ parser3/src/classes/string.C 2009/04/28 04:53:52 1.178 @@ -1,11 +1,11 @@ /** @file Parser: @b string parser class. - Copyright (c) 2001-2005 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ -static const char * const IDENT_STRING_C="$Date: 2008/07/04 11:16:51 $"; +static const char * const IDENT_STRING_C="$Date: 2009/04/28 04:53:52 $"; #include "classes.h" #include "pa_vmethod_frame.h" @@ -19,6 +19,7 @@ static const char * const IDENT_STRING_C #include "pa_sql_connection.h" #include "pa_dictionary.h" #include "pa_vmethod_frame.h" +#include "pa_vregex.h" // class @@ -47,7 +48,7 @@ static const String match_var_name(MATCH // methods static void _length(Request& r, MethodParams&) { - double result=GET_SELF(r, VString).string().length(); + double result=GET_SELF(r, VString).string().length(r.charsets.source()); r.write_no_lang(*new VDouble(result)); } @@ -116,7 +117,7 @@ static void _bool(Request& r, MethodPara rethrow; // we have a problem when no default } - r.write_no_lang(*new VBool(converted)); + r.write_no_lang(VBool::get(converted)); } /*not static*/void _string_format(Request& r, MethodParams& params) { @@ -139,7 +140,7 @@ static void _left(Request& r, MethodPara size_t n=(size_t)sn; const String& string=GET_SELF(r, VString).string(); - r.write_assign_lang(string.mid(0, n)); + r.write_assign_lang(string.mid(r.charsets.source(), 0, n)); } static void _right(Request& r, MethodParams& params) { @@ -151,8 +152,8 @@ static void _right(Request& r, MethodPar size_t n=(size_t)sn; const String& string=GET_SELF(r, VString).string(); - size_t length=string.length(); - r.write_assign_lang(n1) { ssize_t sn=params.as_int(1, "n must be int", r); if(sn<0) @@ -173,17 +175,28 @@ static void _mid(Request& r, MethodParam 0, "n(%d) must be >=0", sn); end=begin+(size_t)sn; - } else - end=string.length(); - - r.write_assign_lang(string.mid(begin, end)); + } else { + length=string.length(r.charsets.source()); + end=length; + } + + r.write_assign_lang(string.mid(r.charsets.source(), begin, end, length)); } static void _pos(Request& r, MethodParams& params) { Value& substr=params.as_no_junction(0, "substr must not be code"); const String& string=GET_SELF(r, VString).string(); - r.write_assign_lang(*new VInt((int)string.pos(substr.as_string()))); + ssize_t offset=0; + if(params.count()>1){ + offset=params.as_int(1, "n must be int", r); + if(offset<0) + throw Exception(PARSER_RUNTIME, + 0, + "n(%d) must be >=0", offset); + } + + r.write_no_lang(*new VInt((int)string.pos(r.charsets.source(), substr.as_string(), (size_t)offset))); } static void split_list(MethodParams& params, int paramIndex, @@ -271,13 +284,14 @@ static Table& split_horizontal(ArrayStri static void split_with_options(Request& r, MethodParams& params, int bits) { const String& string=GET_SELF(r, VString).string(); + size_t params_count=params.count(); ArrayString pieces; split_list(params, 0, string, pieces); if(!bits) { const String* options=0; - if(params.count()>1) + if(params_count>1) options=¶ms.as_string(1, "options must not be code"); bits=split_options(options); @@ -287,14 +301,14 @@ static void split_with_options(Request& bool horizontal=(bits & SPLIT_HORIZONTAL) !=0; const String* column_name=0; - if(params.count()>2){ + if(params_count>2){ column_name=¶ms.as_string(2, COLUMN_NAME_MUST_BE_STRING); - if (horizontal && column_name->length()) + if (horizontal && !column_name->is_empty()) throw Exception(PARSER_RUNTIME, column_name, "column name can't be specified with horisontal split"); } - if(!column_name || !column_name->length()) + if(!column_name || column_name->is_empty()) column_name=new String("piece"); Table& table=horizontal?split_horizontal(pieces, right):split_vertical(pieces, right, column_name); @@ -348,27 +362,43 @@ static void replace_action(Table& table, *ai.dest << ai.src->mid(poststart, postfinish); } -/// @todo use pcre:study somehow static void _match(Request& r, MethodParams& params) { + size_t params_count=params.count(); + Value& regexp=params.as_no_junction(0, "regexp must not be code"); + Value* options=(params_count>1)?¶ms.as_no_junction(1, "options must not be code"):0; - const String* options= - params.count()>1? - ¶ms.as_no_junction(1, "options must not be code").as_string():0; + VRegex* vregex; + VRegexCleaner vrcleaner; + + if(Value* value=regexp.as(VREGEX_TYPE, false)){ + if(options && options->is_defined()) + throw Exception(PARSER_RUNTIME, + 0, + "you can not specify regex-object and options together" + ); + vregex=static_cast(value); + } else { + vregex=new VRegex(r.charsets.source(), + ®exp.as_string(), + (options)?(&options->as_string()):0); + vregex->study(); + vrcleaner.vregex=vregex; + } Temp_lang temp_lang(r, String::L_PASS_APPENDED); const String& src=GET_SELF(r, VString).string(); int matches_count=0; - if(params.count()<3) { // search - Table* table=src.match(r.charsets.source(), - regexp.as_string(), options, + + if(params_count<3) { // search + Table* table=src.match(vregex, search_action, 0, matches_count); - // r.write_assign_lang(*new VTable(table)); + if(table){ - r.write_assign_lang(*new VTable(table)); + r.write_no_lang(*new VTable(table)); } else { - r.write_assign_lang(*new VInt(matches_count)); + r.write_no_lang(*new VInt(matches_count)); } } else { // replace @@ -386,10 +416,11 @@ static void _match(Request& r, MethodPar Temp_value_element temp_match_var( *replacement_code.get_junction()->method_frame, match_var_name, vtable); - src.match(r.charsets.source(), - r.process_to_string(regexp), options, + + src.match(vregex, replace_action, &info, matches_count); + r.write_assign_lang(result); } } @@ -558,12 +589,13 @@ static void _replace(Request& r, MethodP } static void _save(Request& r, MethodParams& params) { - const String& file_name=params.as_string(params.count()-1, FILE_NAME_MUST_BE_STRING); + size_t params_count=params.count(); + const String& file_name=params.as_string(params_count-1, FILE_NAME_MUST_BE_STRING); const String& src=GET_SELF(r, VString).string(); bool do_append=false; - if(params.count()>1) { + if(params_count>1) { const String& mode=params.as_string(0, "mode must be string"); if(mode=="append") do_append=true; @@ -589,11 +621,11 @@ static void _trim(Request& r, MethodPara const String& src=GET_SELF(r, VString).string(); String::Trim_kind kind=String::TRIM_BOTH; + size_t params_count=params.count(); const char* chars=0; - if(params.count()>0) { - const String& skind=params.as_string(0, - "'where' must be string"); - if(skind.length()) + if(params_count>0) { + const String& skind=params.as_string(0, "'where' must be string"); + if(!skind.is_empty()) if(skind==TRIM_BOTH_OPTION) kind=String::TRIM_BOTH; else if(skind==TRIM_START_OPTION || skind=="start") @@ -605,9 +637,9 @@ static void _trim(Request& r, MethodPara &skind, "'kind' must be one of "TRIM_START_OPTION", "TRIM_BOTH_OPTION", "TRIM_END_OPTION); - if(params.count()>1) { + if(params_count>1) { const String& schars=params.as_string(1, "'chars' must be string"); - if(schars.length()) + if(!schars.is_empty()) chars=schars.cstr(); } } @@ -627,22 +659,42 @@ static void _append(Request& r, MethodPa static void _base64(Request& r, MethodParams& params) { if(params.count()) { - // decode + // decode: ^string:base64[encoded] const char* cstr=params.as_string(0, PARAMETER_MUST_BE_STRING).cstr(); - char* decoded_cstr=0; - size_t decoded_size=0; - pa_base64_decode(cstr, strlen(cstr), decoded_cstr, decoded_size); - if(decoded_cstr && decoded_size) - r.write_assign_lang(*new String(decoded_cstr, decoded_size, true)); + char* decoded=0; + size_t length=0; + pa_base64_decode(cstr, strlen(cstr), decoded, length); + if(decoded && length){ + if(memchr((const char*)decoded, 0, length)) + throw Exception(PARSER_RUNTIME, + 0, + "Invalid \\x00 character found while decode to string. Decode it to file instead."); + + fix_line_breaks(decoded, length); + if(length){ + r.write_assign_lang(*new String(decoded, length, true/*tainted*/)); + } + } } else { - // encode + // encode: ^str.base64[] VString& self=GET_SELF(r, VString); const char* cstr=self.string().cstr(); const char* encoded=pa_base64_encode(cstr, strlen(cstr)); - r.write_assign_lang(*new String(encoded, 0, true/*once ?param=base64(something) was needed*/)); + r.write_assign_lang(*new String(encoded, 0, true/*tainted. once ?param=base64(something) was needed*/)); } } +static void _escape(Request& r, MethodParams&){ + const String& src=GET_SELF(r, VString).string(); + r.write_assign_lang(src.escape(r.charsets.source())); +} + +static void _unescape(Request& r, MethodParams& params){ + const String& src=params.as_string(0, PARAMETER_MUST_BE_STRING); + if(const char* result=unescape_chars(src.cstr(), src.length(), &r.charsets.source(), true/* don't unescape '+' char */)) + r.write_assign_lang(*new String(result)); +} + // constructor MString::MString(): Methoded("string") { @@ -659,18 +711,20 @@ MString::MString(): Methoded("string") { // ^void.bool(default) add_native_method("bool", Method::CT_DYNAMIC, _bool, 0, 1); - // ^string.format{format} + // ^string.format[format] add_native_method("format", Method::CT_DYNAMIC, _string_format, 1, 1); // ^string.left(n) add_native_method("left", Method::CT_DYNAMIC, _left, 1, 1); // ^string.right(n) add_native_method("right", Method::CT_DYNAMIC, _right, 1, 1); + // ^string.mid(p) // ^string.mid(p;n) add_native_method("mid", Method::CT_DYNAMIC, _mid, 1, 2); // ^string.pos[substr] - add_native_method("pos", Method::CT_DYNAMIC, _pos, 1, 1); + // ^string.pos[substr](n) + add_native_method("pos", Method::CT_DYNAMIC, _pos, 1, 2); // ^string.split[delim] // ^string.split[delim][options] @@ -686,9 +740,9 @@ MString::MString(): Methoded("string") { // ^string.match[regexp][options]{replacement-code} add_native_method("match", Method::CT_DYNAMIC, _match, 1, 3); - // ^string.toupper[] + // ^string.upper[] add_native_method("upper", Method::CT_DYNAMIC, _upper, 0, 0); - // ^string.tolower[] + // ^string.lower[] add_native_method("lower", Method::CT_DYNAMIC, _lower, 0, 0); // ^sql[query] @@ -713,4 +767,9 @@ MString::MString(): Methoded("string") { // ^string.base64[] << encode // ^string:base64[encoded string] << decode add_native_method("base64", Method::CT_ANY, _base64, 0, 1); + + // ^string.js-escape[] + // ^string:js-unescape[escaped%uXXXXstring] + add_native_method("js-escape", Method::CT_ANY, _escape, 0, 0); + add_native_method("js-unescape", Method::CT_STATIC, _unescape, 1, 1); }