--- parser3/src/classes/file.C 2003/09/25 09:15:02 1.112 +++ parser3/src/classes/file.C 2007/02/07 15:50:32 1.151 @@ -1,11 +1,11 @@ /** @file Parser: @b file parser class. - Copyright (c) 2001-2003 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2005 ArtLebedev Group (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ -static const char* IDENT_FILE_C="$Date: 2003/09/25 09:15:02 $"; +static const char * const IDENT_FILE_C="$Date: 2007/02/07 15:50:32 $"; #include "pa_config_includes.h" @@ -24,19 +24,29 @@ static const char* IDENT_FILE_C="$Date: #include "pa_vtable.h" #include "pa_charset.h" #include "pa_charsets.h" +#include "pa_sql_connection.h" +#include "pa_md5.h" // defines #define TEXT_MODE_NAME "text" +#define BINARY_MODE_NAME "binary" #define STDIN_EXEC_PARAM_NAME "stdin" #define CHARSET_EXEC_PARAM_NAME "charset" +#define NAME_NAME "name" + +// externs + +extern String sql_limit_name; +extern String sql_offset_name; + // class class MFile: public Methoded { public: // VStateless_class - Value* create_new_value() { return new VFile(); } + Value* create_new_value(Pool&, HashStringValue&) { return new VFile(); } public: // Methoded bool used_directly() { return true; } @@ -104,13 +114,23 @@ static const String::Body cdate_name("cd // methods +static bool is_text_mode(const String& mode) { + if(mode==TEXT_MODE_NAME) + return true; + if(mode==BINARY_MODE_NAME) + return false; + throw Exception("parser.runtime", + &mode, + "is invalid mode, must be either '"TEXT_MODE_NAME"' or '"BINARY_MODE_NAME"'"); +} + static void _save(Request& r, MethodParams& params) { Value& vmode_name=params. as_no_junction(0, "mode must not be code"); Value& vfile_name=params.as_no_junction(1, "file name must not be code"); // save GET_SELF(r, VFile).save(r.absolute(vfile_name.as_string()), - vmode_name.as_string()==TEXT_MODE_NAME); + is_text_mode(vmode_name.as_string())); } static void _delete(Request& r, MethodParams& params) { @@ -130,6 +150,47 @@ static void _move(Request& r, MethodPara r.absolute(vto_file_name.as_string())); } +static void copy_process_source( + struct stat& , + int from_file, + const String& , const char* /*fname*/, bool, + void *context) { + int& to_file=*static_cast(context); + + int nCount=0; + do { + unsigned char buffer[FILE_BUFFER_SIZE]; + nCount = file_block_read(from_file, buffer, sizeof(buffer)); + int written=write(to_file, buffer, nCount); + if( written < 0 ) + throw Exception(0, + 0, + "write failed: %s (%d)", strerror(errno), errno); + + } while(nCount > 0); +} + +static void copy_open_target(int f, void *from_spec) { + String& file_spec=*static_cast(from_spec); + file_read_action_under_lock(file_spec, "copy", copy_process_source, &f); +}; + +static void _copy(Request& r, MethodParams& params) { + Value& vfrom_file_name=params.as_no_junction(0, "from file name must not be code"); + Value& vto_file_name=params.as_no_junction(1, "to file name must not be code"); + + String from_spec = r.absolute(vfrom_file_name.as_string()); + const String& to_spec = r.absolute(vto_file_name.as_string()); + + create_dir_for_file(to_spec); + + file_write_action_under_lock( + to_spec, + "copy", + copy_open_target, + &from_spec); +} + static void _load_pass_param( HashStringValue::key_type key, HashStringValue::value_type value, @@ -146,9 +207,22 @@ static void _load(Request& r, MethodPara if(third_param_hash) alt_filename_param_index++; + HashStringValue* options=third_param_hash; + size_t offset=0; + size_t limit=0; + if(options) { + options=new HashStringValue(*options); + if(Value *voffset=(Value *)options->get(sql_offset_name)) { + offset=r.process_to_value(*voffset).as_int(); + } + if(Value *vlimit=(Value *)options->get(sql_limit_name)) { + limit=r.process_to_value(*vlimit).as_int(); + } + // no check on options count here, see file_read + } File_read_result file=file_read(r.charsets, lfile_name, - vmode_name.as_string()==TEXT_MODE_NAME, - third_param_hash + is_text_mode(vmode_name.as_string()), + options, true, 0, offset, limit ); const char *user_file_name=params.count()>alt_filename_param_index? @@ -157,14 +231,36 @@ static void _load(Request& r, MethodPara Value* vcontent_type=0; if(file.headers) - vcontent_type=file.headers->get(content_type_name); + { + if(Value* remote_content_type=file.headers->get("CONTENT-TYPE")) + vcontent_type=new VString(*new String(remote_content_type->as_string().cstr())); + } if(!vcontent_type) vcontent_type=new VString(r.mime_type_of(user_file_name)); VFile& self=GET_SELF(r, VFile); self.set(true/*tainted*/, file.str, file.length, user_file_name, vcontent_type); if(file.headers) - file.headers->for_each(_load_pass_param, &self.fields()); + file.headers->for_each(_load_pass_param, &self.fields()); +} + +static void _create(Request& r, MethodParams& params) { + Value& vmode_name=params. as_no_junction(0, "mode must not be code"); + if(!is_text_mode(vmode_name.as_string())) + throw Exception("parser.runtime", + 0, + "only text mode is currently supported"); + + const char* user_file_name_cstr=r.absolute( + params.as_no_junction(1, "file name must not be code").as_string()).cstr(String::L_FILE_SPEC); + + const String& content=params.as_string(2, "content must be string"); + const char* content_cstr=content.cstr(String::L_UNSPECIFIED); // explode content, honor tainting changes + + VString* vcontent_type=new VString(r.mime_type_of(user_file_name_cstr)); + + VFile& self=GET_SELF(r, VFile); + self.set(true/*tainted*/, content_cstr, strlen(content_cstr), user_file_name_cstr, vcontent_type); } static void _stat(Request& r, MethodParams& params) { @@ -205,9 +301,9 @@ static bool is_safe_env_key(const char* } #ifndef DOXYGEN struct Append_env_pair_info { + Request_charsets* charsets; HashStringString* env; Value* vstdin; - Value* vcharset; }; #endif static void append_env_pair( @@ -217,13 +313,13 @@ static void append_env_pair( if(akey==STDIN_EXEC_PARAM_NAME) { info->vstdin=avalue; } else if(akey==CHARSET_EXEC_PARAM_NAME) { - info->vcharset=avalue; + // ignore, already processed } else { if(!is_safe_env_key(akey.cstr())) throw Exception("parser.runtime", new String(akey, String::L_TAINTED), "not safe environment variable"); - info->env->put(akey, avalue->as_string()); + info->env->put(akey, avalue->as_string().cstr_to_string_body(String::L_UNSPECIFIED, 0, info->charsets)); } } #ifndef DOXYGEN @@ -237,10 +333,10 @@ static void pass_cgi_header_attribute( ArrayString::element_type astring, Pass_cgi_header_attribute_info* info) { size_t colon_pos=astring->pos(':'); - if(colon_pos==STRING_NOT_FOUND) { + if(colon_pos!=STRING_NOT_FOUND) { const String& key=astring->mid(0, colon_pos).change_case( *info->charset, String::CC_UPPER); - Value* value=new VString(astring->mid(colon_pos+1, astring->length())); + Value* value=new VString(astring->mid(colon_pos+1, astring->length()).trim()); info->fields->put(key, value); if(key=="CONTENT-TYPE") info->content_type=value; @@ -294,8 +390,19 @@ static void _exec_cgi(Request& r, Method if(params.count()>1) { Value& venv=params.as_no_junction(1, "env must not be code"); if(HashStringValue* user_env=venv.get_hash()) { - Append_env_pair_info info={&env}; - user_env->for_each(append_env_pair, &info); + // $.charset [previewing to handle URI pieces] + if(Value* vcharset=user_env->get(CHARSET_EXEC_PARAM_NAME)) + charset=&charsets.get(vcharset->as_string() + .change_case(r.charsets.source(), String::CC_UPPER)); + + // $.others + Append_env_pair_info info={&r.charsets, &env, 0}; + { + // influence tainting + // main target -- $.QUERY_STRING -- URLencoding of tainted pieces to String::L_URI lang + Temp_client_charset temp(r.charsets, charset? *charset: r.charsets.source()); + user_env->for_each(append_env_pair, &info); + } // $.stdin if(info.vstdin) { stdin_specified=true; @@ -309,18 +416,21 @@ static void _exec_cgi(Request& r, Method 0, STDIN_EXEC_PARAM_NAME " parameter must be string or file"); } - // $.charset - if(info.vcharset) - charset=&charsets.get(info.vcharset->as_string() - .change_case(r.charsets.source(), String::CC_UPPER)); } } // argv from params ArrayString argv; if(params.count()>2) { - for(size_t i=2; i 0) { + argv+=new String(param.cstr_to_string_body(String::L_UNSPECIFIED, 0, &r.charsets), String::L_AS_IS); + } + } } // transcode if necessary @@ -348,7 +458,6 @@ static void _exec_cgi(Request& r, Method VFile& self=GET_SELF(r, VFile); const String* body=real_out; // ^file:exec - Value* content_type=0; const char* eol_marker=0; size_t eol_marker_size; const String* header=0; if(cgi) { // ^file:cgi @@ -399,7 +508,7 @@ static void _exec_cgi(Request& r, Method ArrayString rows; size_t pos_after=0; header->split(rows, pos_after, eol_marker); - Pass_cgi_header_attribute_info info={0}; + Pass_cgi_header_attribute_info info={0, 0, 0}; info.charset=&r.charsets.source(); info.fields=&self.fields(); rows.for_each(pass_cgi_header_attribute, &info); @@ -444,8 +553,10 @@ static void _list(Request& r, MethodPara throw Exception(0, ®exp->mid(erroffset, regexp->length()), "regular expression syntax error - %s", errptr); - } else + } else { + regexp=0; // not used, just to calm down compiler regexp_code=0; + } const char* absolute_path_cstr=r.absolute(relative_path.as_string()).cstr(String::L_FILE_SPEC); @@ -500,19 +611,20 @@ static void lock_execute_body(int , void info.r->write_assign_lang(info.r->process(*info.body_code)); }; static void _lock(Request& r, MethodParams& params) { - Lock_execute_body_info info={0}; - info.r=&r; const String& file_spec=r.absolute(params.as_string(0, "file name must be string")); - info.body_code=¶ms.as_junction(1, "body must be code"); + Lock_execute_body_info info={ + &r, + ¶ms.as_junction(1, "body must be code") + }; file_write_action_under_lock(file_spec, "lock", lock_execute_body, &info); } static int lastposafter(const String& s, size_t after, const char* substr, size_t substr_size, bool beforelast=false) { - size_t size; + size_t size=0; // just to calm down compiler if(beforelast) size=s.length(); - int at; + size_t at; while((at=s.pos(String::Body(substr, substr_size), after))!=STRING_NOT_FOUND) { size_t newafter=at+substr_size/*skip substr*/; if(beforelast && newafter==size) @@ -532,7 +644,7 @@ static void _find(Request& r, MethodPara file_spec=&r.relative(r.request_info.uri, file_name); // easy way - if(file_readable(r.absolute(*file_spec))) { + if(file_exist(r.absolute(*file_spec))) { r.write_assign_lang(*file_spec); return; } @@ -547,7 +659,7 @@ static void _find(Request& r, MethodPara String test_name; test_name<<*(dirname=&dirname->mid(0, after_monkey_slash)); test_name<quote(self.value_ptr(), self.value_size()); + r.write_assign_lang(*new String(quoted)); +} + +#ifndef DOXYGEN +class File_sql_event_handlers: public SQL_Driver_query_event_handlers { + const String& statement_string; const char* statement_cstr; + int got_columns; + int got_cells; +public: + String::C value; + const String* user_file_name; + const String* user_content_type; +public: + File_sql_event_handlers( + const String& astatement_string, const char* astatement_cstr): + statement_string(astatement_string), statement_cstr(astatement_cstr), + got_columns(0), + got_cells(0), + user_file_name(0), + user_content_type(0) {} + + bool add_column(SQL_Error& error, const char* /*str*/, size_t /*length*/) { + if(got_columns++==3) { + error=SQL_Error("parser.runtime", "result must contain not more then 3 columns"); + return true; + } + return false; + } + bool before_rows(SQL_Error& /*error*/ ) { /* ignore */ return false; } + bool add_row(SQL_Error& /*error*/) { /* ignore */ return false; } + bool add_row_cell(SQL_Error& error, const char* str, size_t length) { + try { + switch(got_cells++) { + case 0: + value=String::C(str, length); + break; + case 1: + if(!user_file_name) // user not specified? + user_file_name=new String(str, length, true); + break; + case 2: + if(!user_content_type) // user not specified? + user_content_type=new String(str, length, true); + break; + default: + error=SQL_Error("parser.runtime", "result must not contain more then one row, three rows"); + return true; + } + return false; + } catch(...) { + error=SQL_Error("exception occured in File_sql_event_handlers::add_row_cell"); + return true; + } + } +}; +#endif +static void _sql(Request& r, MethodParams& params) { + Value& statement=params.as_junction(0, "statement must be code"); + + Temp_lang temp_lang(r, String::L_SQL); + const String& statement_string=r.process_to_string(statement); + const char* statement_cstr= + statement_string.cstr(String::L_UNSPECIFIED, r.connection()); + File_sql_event_handlers handlers(statement_string, statement_cstr); + + if(params.count()>1) + if(HashStringValue* options= + params.as_no_junction(1, "param must not be code").get_hash()) { + int valid_options=0; + if(Value* vfilename=options->get(NAME_NAME)) { + valid_options++; + handlers.user_file_name=&vfilename->as_string(); + } + if(Value* vcontent_type=options->get(CONTENT_TYPE_NAME)) { + valid_options++; + handlers.user_content_type=&vcontent_type->as_string(); + } + if(valid_options!=options->count()) + throw Exception("parser.runtime", + 0, + "called with invalid option"); + } + + + r.connection()->query( + statement_cstr, + 0, 0, + 0, 0, + handlers, + statement_string); + + if(!handlers.value) + throw Exception("parser.runtime", + 0, + "produced no result"); + + const char* user_file_name_cstr=handlers.user_file_name? handlers.user_file_name->cstr(): 0; + + VString* vcontent_type=handlers.user_content_type? + new VString(*handlers.user_content_type) + : user_file_name_cstr? + new VString(r.mime_type_of(user_file_name_cstr)) + : 0; + VFile& self=GET_SELF(r, VFile); + self.set(true/*tainted*/, handlers.value.str, handlers.value.length, user_file_name_cstr, vcontent_type); +} + +static void _base64(Request& r, MethodParams& params) { + bool dynamic = !(&r.get_self() == file_class); + if ( dynamic ){ + VFile& self=GET_SELF(r, VFile); + if(params.count()) { + // decode + const char* cstr=params.as_string(0, "parameter must be string").cstr(); + char* decoded_cstr=0; + size_t decoded_size=0; + pa_base64_decode(cstr, strlen(cstr), decoded_cstr, decoded_size); + if(decoded_cstr && decoded_size) + self.set(true/*tainted*/, decoded_cstr, decoded_size); + } else { + // encode + const char* encoded=pa_base64_encode(self.value_ptr(), self.value_size()); + r.write_assign_lang(*new String(encoded, 0, true/*once ?param=base64(something) was needed*/)); + } + } else { + // encode + const String& file_spec=params.as_string(0, "file name must be string"); + const char* encoded=pa_base64_encode(r.absolute(file_spec)); + r.write_assign_lang(*new String(encoded, 0, true/*once ?param=base64(something) was needed*/)); + } +} + +static void _crc32(Request& r, MethodParams& params) { + unsigned long crc32 = 0; + if(&r.get_self() == file_class) { + // ^file:crc32[file-name] + if(params.count()) { + const String& file_spec=params.as_string(0, "file name must be string"); + crc32=pa_crc32(r.absolute(file_spec)); + } else { + throw Exception("parser.runtime", + 0, + "file name must be defined"); + } + } else { + // ^file.crc32[] + VFile& self=GET_SELF(r, VFile); + crc32=pa_crc32(self.value_ptr(), self.value_size()); + } + r.write_no_lang(*new VInt(crc32)); +} + + +static void file_md5_file_action( + struct stat& finfo, + int f, + const String& , const char* /*fname*/, bool, + void *context) +{ + PA_MD5_CTX& md5context=*static_cast(context); + if(finfo.st_size) { + int nCount=0; + do { + unsigned char buffer[FILE_BUFFER_SIZE]; + nCount = file_block_read(f, buffer, sizeof(buffer)); + if ( nCount ){ + pa_MD5Update(&md5context, (const unsigned char*)buffer, nCount); + } + } while(nCount > 0); + } +} + +const char* pa_md5(const String& file_spec) +{ + PA_MD5_CTX context; + unsigned char digest[16]; + pa_MD5Init(&context); + file_read_action_under_lock(file_spec, "md5", file_md5_file_action, &context); + pa_MD5Final(digest, &context); + + return hex_string(digest, sizeof(digest), false); +} + +const char* pa_md5(const char *in, size_t in_size) +{ + PA_MD5_CTX context; + unsigned char digest[16]; + pa_MD5Init(&context); + pa_MD5Update(&context, (const unsigned char*)in, in_size); + pa_MD5Final(digest, &context); + + return hex_string(digest, sizeof(digest), false); +} + +static void _md5(Request& r, MethodParams& params) { + const char* md5; + if(&r.get_self() == file_class) { + // ^file:md5[file-name] + if(params.count()) { + const String& file_spec=params.as_string(0, "file name must be string"); + md5=pa_md5(r.absolute(file_spec)); + } else { + throw Exception("parser.runtime", + 0, + "file name must be defined"); + } + } else { + // ^file.md5[] + VFile& self=GET_SELF(r, VFile); + md5=pa_md5(self.value_ptr(), self.value_size()); + + } + r.write_no_lang(*new String(md5)); +} // constructor MFile::MFile(): Methoded("file") { - // ^save[mode;file-name] + // ^file::create[text;user-name;string] + // ^file::create[binary;user-name;SOMEDAY SOMETHING] + add_native_method("create", Method::CT_DYNAMIC, _create, 3, 3); + + // ^file.save[mode;file-name] add_native_method("save", Method::CT_DYNAMIC, _save, 2, 2); - // ^delete[file-name] + // ^file:delete[file-name] add_native_method("delete", Method::CT_STATIC, _delete, 1, 1); - // ^move[from-file-name;to-file-name] + // ^file:move[from-file-name;to-file-name] add_native_method("move", Method::CT_STATIC, _move, 2, 2); - // ^load[mode;disk-name] - // ^load[mode;disk-name;user-name] + // ^file::load[mode;disk-name] + // ^file::load[mode;disk-name;user-name] add_native_method("load", Method::CT_DYNAMIC, _load, 2, 3); - // ^stat[disk-name] + // ^file::stat[disk-name] add_native_method("stat", Method::CT_DYNAMIC, _stat, 1, 1); - // ^cgi[file-name] - // ^cgi[file-name;env hash] - // ^cgi[file-name;env hash;1cmd;2line;3ar;4g;5s] - add_native_method("cgi", Method::CT_DYNAMIC, _cgi, 1, 2+10); - - // ^exec[file-name] - // ^exec[file-name;env hash] - // ^exec[file-name;env hash;1cmd;2line;3ar;4g;5s] - add_native_method("exec", Method::CT_DYNAMIC, _exec, 1, 2+10); + // ^file::cgi[file-name] + // ^file::cgi[file-name;env hash] + // ^file::cgi[file-name;env hash;1cmd;2line;3ar;4g;5s] + add_native_method("cgi", Method::CT_DYNAMIC, _cgi, 1, 2+50); + + // ^file::exec[file-name] + // ^file::exec[file-name;env hash] + // ^file::exec[file-name;env hash;1cmd;2line;3ar;4g;5s] + add_native_method("exec", Method::CT_DYNAMIC, _exec, 1, 2+50); // ^file:list[path] // ^file:list[path][regexp] @@ -650,8 +984,8 @@ MFile::MFile(): Methoded("file") { // ^file:lock[path]{code} add_native_method("lock", Method::CT_STATIC, _lock, 2, 2); - // ^find[file-name] - // ^find[file-name]{when-not-found} + // ^file:find[file-name] + // ^file:find[file-name]{when-not-found} add_native_method("find", Method::CT_STATIC, _find, 1, 2); // ^file:dirname[/a/some.tar.gz]=/a @@ -665,4 +999,26 @@ MFile::MFile(): Methoded("file") { add_native_method("justext", Method::CT_STATIC, _justext, 1, 1); // /some/page.html: ^file:fullpath[a.gif] => /some/a.gif add_native_method("fullpath", Method::CT_STATIC, _fullpath, 1, 1); + + // ^file.sql-string[] + add_native_method("sql-string", Method::CT_DYNAMIC, _sql_string, 0, 0); + + // ^file::sql[[alt_name]]{} + add_native_method("sql", Method::CT_DYNAMIC, _sql, 1, 2); + + // ^file::base64[string] << decode + // ^file.base64[] << encode + // ^file:base64[file-name] << encode + add_native_method("base64", Method::CT_ANY, _base64, 0, 1); + + // ^file.crc32[] + // ^file:crc32[file-name] + add_native_method("crc32", Method::CT_ANY, _crc32, 0, 1); + + // ^file.md5[] + // ^file:md5[file-name] + add_native_method("md5", Method::CT_ANY, _md5, 0, 1); + + // ^file:copy[from-file-name;to-file-name] + add_native_method("copy", Method::CT_STATIC, _copy, 2, 2); }