--- parser3/src/classes/file.C 2001/03/19 20:46:35 1.2 +++ parser3/src/classes/file.C 2003/02/06 14:24:50 1.107.2.7 @@ -1,38 +1,658 @@ -/* - Parser - Copyright (c) 2001 ArtLebedev Group (http://www.artlebedev.com) - Author: Alexander Petrosyan (http://design.ru/paf) +/** @file + Parser: @b file parser class. - $Id: file.C,v 1.2 2001/03/19 20:46:35 paf Exp $ + Copyright (c) 2001-2003 ArtLebedev Group (http://www.artlebedev.com) + Author: Alexandr Petrosian (http://paf.design.ru) */ +static const char* IDENT_FILE_C="$Date: 2003/02/06 14:24:50 $"; + +#include "pa_config_includes.h" + +#include "pcre.h" + +#include "classes.h" +#include "pa_vmethod_frame.h" + #include "pa_request.h" -#include "_file.h" #include "pa_vfile.h" +#include "pa_table.h" +#include "pa_vint.h" +#include "pa_exec.h" +#include "pa_vdate.h" +#include "pa_dir.h" +#include "pa_vtable.h" +#include "pa_charset.h" + +// defines + +#define TEXT_MODE_NAME "text" +#define STDIN_EXEC_PARAM_NAME "stdin" + +// consts + +/// from apache-1.3|src|support|suexec.c +static const char* suexec_safe_env_lst[]={ + "AUTH_TYPE", + "CONTENT_LENGTH", + "CONTENT_TYPE", + "DATE_GMT", + "DATE_LOCAL", + "DOCUMENT_NAME", + "DOCUMENT_PATH_INFO", + "DOCUMENT_ROOT", + "DOCUMENT_URI", + "FILEPATH_INFO", + "GATEWAY_INTERFACE", + "LAST_MODIFIED", + "PATH_INFO", + "PATH_TRANSLATED", + "QUERY_STRING", + "QUERY_STRING_UNESCAPED", + "REMOTE_ADDR", + "REMOTE_HOST", + "REMOTE_IDENT", + "REMOTE_PORT", + "REMOTE_USER", + "REDIRECT_QUERY_STRING", + "REDIRECT_STATUS", + "REDIRECT_URL", + "REQUEST_METHOD", + "REQUEST_URI", + "SCRIPT_FILENAME", + "SCRIPT_NAME", + "SCRIPT_URI", + "SCRIPT_URL", + "SERVER_ADMIN", + "SERVER_NAME", + "SERVER_ADDR", + "SERVER_PORT", + "SERVER_PROTOCOL", + "SERVER_SOFTWARE", + "UNIQUE_ID", + "USER_NAME", + "TZ", + NULL +}; + +// statics + +static StringPtr adate_name(new String("adate")); +static StringPtr mdate_name(new String("mdate")); +static StringPtr cdate_name(new String("cdate")); + +// class + +class MFile : public Methoded { +public: // VStateless_class + + ValuePtr create_new_value() { return ValuePtr(new VFile()); } + +public: // Methoded + bool used_directly() { return true; } + +public: + MFile(); -// global var +}; -VStateless_class *file_base_class; +// global variable + +MethodedPtr file_class_ptr(new MFile); // methods -static void _save(Request& r, const String& method_name, Array *params) { - //\Pool& pool=r.pool(); - Value *vfile_name=static_cast(params->get(0)); - // forcing - // ^save{this body type} - r.fail_if_junction_(false, *vfile_name, method_name, "file name must be junction"); +static void _save(Request& r, StringPtr /*method_name*/, MethodParams& params) { + Pool& pool=r.pool(); + ValuePtr vmode_name=params. as_no_junction(0, "mode must not be code"); + ValuePtr vfile_name=params.as_no_junction(1, "file name must not be code"); + + // save + GET_SELF(r, VFile).save(r.absolute(vfile_name->as_string(&pool)), + *vmode_name->as_string(&pool)==TEXT_MODE_NAME); +} + +static void _delete(Request& r, StringPtr /*method_name*/, MethodParams& params) { + Pool& pool=r.pool(); + ValuePtr vfile_name=params.as_no_junction(0, "file name must not be code"); + + // unlink + file_delete(r.absolute(vfile_name->as_string(&pool))); +} + +static void _move(Request& r, StringPtr /*method_name*/, MethodParams& params) { + Pool& pool=r.pool(); + ValuePtr vfrom_file_name=params.as_no_junction(0, "from file name must not be code"); + ValuePtr vto_file_name=params.as_no_junction(1, "to file name must not be code"); + + // move + file_move( + r.absolute(vfrom_file_name->as_string(&pool)), + r.absolute(vto_file_name->as_string(&pool))); +} + +static void _load_pass_param( + HashStringValue::key_type key, + HashStringValue::value_type value, + HashStringValue *dest) { + dest->put(key, value); +} +static void _load(Request& r, StringPtr method_name, MethodParams& params) { + Pool& pool=r.pool(); + ValuePtr vmode_name=params. as_no_junction(0, "mode must not be code"); + StringPtr lfile_name=r.absolute(params.as_no_junction(1, "file name must not be code")->as_string(&pool)); + ValuePtr third_param=params.count()>2?params.as_no_junction(2, "filename or options must not be code") + :ValuePtr(0); + HashStringValue* third_param_hash=third_param?third_param->get_hash(method_name):0; + int alt_filename_param_index=2; + if(third_param_hash) + alt_filename_param_index++; + + char *data; size_t size; + File_read_result file=file_read(pool, r.charsets.source(), lfile_name, data, size, + *vmode_name->as_string(&pool)==TEXT_MODE_NAME, + third_param_hash + ); + + char *user_file_name=params.count()>alt_filename_param_index? + params.as_string(alt_filename_param_index, "filename must be string")->cstr(pool) + :lfile_name->cstr(pool, String::UL_FILE_SPEC); + + ValuePtr vcontent_type(0); + if(file.headers) + vcontent_type=file.headers->get(content_type_name); + if(!vcontent_type) + vcontent_type=ValuePtr(new VString(r.mime_type_of(user_file_name))); + + VFile& self=GET_SELF(r, VFile); + self.set(pool, true/*tainted*/, data, size, user_file_name, vcontent_type); + if(file.headers) + file.headers->for_each(_load_pass_param, &self.fields()); +} + +static void _stat(Request& r, StringPtr method_name, MethodParams& params) { + Pool& pool=r.pool(); + ValuePtr vfile_name=params.as_no_junction(0, "file name must not be code"); - { - Temp_lang temp_lang(r, String::Untaint_lang::FILE); - static_cast(r.self)->save( - r.absolute(r.process(*vfile_name).as_string().cstr())); + StringPtr lfile_name=vfile_name->as_string(&pool); + + size_t size; + time_t atime, mtime, ctime; + file_stat(r.absolute(lfile_name), + size, + atime, mtime, ctime); + + VFile& self=GET_SELF(r, VFile); + self.set(pool, true/*tainted*/, 0/*no bytes*/, size); + HashStringValue& ff=self.fields(); + ff.put(adate_name, ValuePtr(new VDate(atime))); + ff.put(mdate_name, ValuePtr(new VDate(mtime))); + ff.put(cdate_name, ValuePtr(new VDate(ctime))); + ff.put(content_type_name, ValuePtr(new VString(r.mime_type_of(lfile_name->cstr(String::UL_FILE_SPEC))))); +} + +static bool is_safe_env_key(CharPtr key) { + if(strncasecmp(key, "HTTP_", 5)==0) + return true; + if(strncasecmp(key, "CGI_", 4)==0) + return true; + for(int i=0; suexec_safe_env_lst[i]; i++) { + if(strcasecmp(key, suexec_safe_env_lst[i])==0) + return true; + } + return false; +} +#ifndef DOXYGEN +struct Append_env_pair_info { + Pool* pool; + HashStringString* env; + ValuePtr vstdin; +}; +#endif +static void append_env_pair( + HashStringValue::key_type akey, + HashStringValue::value_type avalue, + Append_env_pair_info *info) { + if(*akey==STDIN_EXEC_PARAM_NAME) { + info->vstdin=avalue; + } else { + if(!is_safe_env_key(akey->cstr())) + throw Exception("parser.runtime", + akey, + "not safe environment variable"); + info->env->put(akey, avalue->as_string(info->pool)); } } +#ifndef DOXYGEN +struct Pass_cgi_header_attribute_info { + Pool* pool; + Charset* charset; + HashStringValue* fields; + ValuePtr content_type; +}; +#endif +static void pass_cgi_header_attribute( + ArrayString::element_type astring, + Pass_cgi_header_attribute_info* info) { + int colon_pos=astring->pos(":", 1); + if(colon_pos>0) { + StringPtr key(astring->mid(0, colon_pos)->change_case( + *info->pool, *info->charset, String::CC_UPPER)); + ValuePtr value(new VString(astring->mid(colon_pos+1, astring->size()))); + info->fields->put(key, value); + if(*key=="CONTENT-TYPE") + info->content_type=value; + } +} +/// @todo fix `` in perl - they produced flipping consoles and no output to perl +static void _exec_cgi(Request& r, StringPtr method_name, MethodParams& params, + bool cgi) { + Pool& pool=r.pool(); + + ValuePtr vfile_name=params.as_no_junction(0, "file name must not be code"); + + StringPtr script_name=r.absolute(vfile_name->as_string(&pool)); + + HashStringString env; + #define ECSTR(name, value_cstr) \ + if(value_cstr) \ + env.put( \ + StringPtr(new String(#name)), \ + StringPtr(new String(value_cstr))); \ + // passing SAPI::environment + if(const char* const *pairs=SAPI::environment(r.sapi_info)) { + while(const char* pair=*pairs++) + if(const char* eq_at=strchr(pair, '=')) + env.put( + StringPtr(new String(pair, eq_at-pair)), + StringPtr(new String(eq_at+1))); + } + + // const + ECSTR(GATEWAY_INTERFACE, "CGI/1.1"); + // from Request.info + ECSTR(DOCUMENT_ROOT, r.request_info.document_root); + ECSTR(PATH_TRANSLATED, r.request_info.path_translated); + ECSTR(REQUEST_METHOD, r.request_info.method); + ECSTR(QUERY_STRING, r.request_info.query_string); + ECSTR(REQUEST_URI, r.request_info.uri); + ECSTR(CONTENT_TYPE, r.request_info.content_type); + char content_length_cstr[MAX_NUMBER]; + snprintf(content_length_cstr, MAX_NUMBER, "%u", r.request_info.content_length); + //String content_length(content_length_cstr); + ECSTR(CONTENT_LENGTH, content_length_cstr); + // SCRIPT_* + env.put(StringPtr(new String("SCRIPT_NAME")), script_name); + //env.put(*new(pool) String(pool, "SCRIPT_FILENAME"), ??&script_name); + + bool stdin_specified=false; + // environment & stdin from param + String in; + if(params.count()>1) { + ValuePtr venv=params.as_no_junction(1, "env must not be code"); + if(HashStringValue* user_env=venv->get_hash(method_name)) { + Append_env_pair_info info; + info.pool=&pool; + info.env=&env; + user_env->for_each(append_env_pair, &info); + if(info.vstdin) { + stdin_specified=true; + if(StringPtr sstdin=info.vstdin->get_string(&pool)) { + in.append(*sstdin, String::UL_CLEAN, true); + } else + if(VFile *vfile=static_cast(info.vstdin->as("file", false))) + in.APPEND_TAINTED((const char* )vfile->value_ptr(), vfile->value_size(), + "$.stdin[assigned]", 0); + else + throw Exception("parser.runtime", + method_name, + STDIN_EXEC_PARAM_NAME " parameter must be string or file"); + } + } + } + + // argv from params + ArrayString argv; + if(params.count()>2) { + for(int i=2; ipos("\r\n\r\n", 4); + int unix_pos=execution.out->pos("\n\n", 2); + + bool unix_header_break; + switch((dos_pos >= 0?10:00) + (unix_pos >= 0?01:00)) { + case 10: // dos + unix_header_break=false; + break; + case 01: // unix + unix_header_break=true; + break; + case 11: // dos & unix + unix_header_break=unix_possize(), execution.out->cstr(), + (uint)execution.err->size(), execution.err->cstr()); + break; //never reached + } + + int header_break_pos; + if(unix_header_break) { + header_break_pos=unix_pos; + eol_marker="\n"; eol_marker_size=1; + } else { + header_break_pos=dos_pos; + eol_marker="\r\n"; eol_marker_size=2; + } + + header=execution.out->mid(0, header_break_pos); + body=execution.out->mid(header_break_pos+eol_marker_size*2, execution.out->size()); + } + // body + self.set(pool, false/*not tainted*/, body->cstr(), body->size()); + + // $fields << header + if(header && eol_marker) { + ArrayString rows; + header->split(rows, 0, eol_marker, eol_marker_size); + Pass_cgi_header_attribute_info info; + info.pool=&pool; + info.charset=&r.charsets.source(); + info.fields=&self.fields(); + rows.for_each(pass_cgi_header_attribute, &info); + if(info.content_type) + self.fields().put(content_type_name, info.content_type); + } + + // $status + self.fields().put(file_status_name, ValuePtr(new VInt(execution.status))); + + // $stderr + if(execution.err->size()) + self.fields().put( + StringPtr(new String("stderr")), + ValuePtr(new VString(execution.err))); +} +static void _exec(Request& r, StringPtr method_name, MethodParams& params) { + _exec_cgi(r, method_name, params, false); +} +static void _cgi(Request& r, StringPtr method_name, MethodParams& params) { + _exec_cgi(r, method_name, params, true); +} + +static void _list(Request& r, StringPtr method_name, MethodParams& params) { + Pool& pool=r.pool(); + + ValuePtr relative_path=params.as_no_junction(0, "path must not be code"); + + StringPtr regexp; + pcre *regexp_code; + const int ovecsize=(1/*match*/)*3; + int ovector[ovecsize]; + if(params.count()>1) { + regexp=params.as_no_junction(1, "regexp must not be code")->as_string(&pool); + + const char* pattern=regexp->cstr(); + const char* errptr; + int erroffset; + regexp_code=pcre_compile(pattern, PCRE_EXTRA | PCRE_DOTALL, + &errptr, &erroffset, + r.charsets.source().pcre_tables); + + if(!regexp_code) + throw Exception(0, + regexp->mid(erroffset, regexp->size()), + "regular expression syntax error - %s", errptr); + } else + regexp_code=0; + + + CharPtr absolute_path_cstr=r.absolute(relative_path->as_string(&pool))-> + cstr(String::UL_FILE_SPEC); + + Table::columns_type columns(new ArrayString); + *columns+=StringPtr(new String("name")); + TablePtr table(new Table(method_name, columns)); + + LOAD_DIR(absolute_path_cstr, + const char* file_name_cstr=ffblk.ff_name; + size_t file_name_size=strlen(file_name_cstr); + bool suits=true; + if(regexp_code) { + int exec_result=pcre_exec(regexp_code, 0, + ffblk.ff_name, file_name_size, 0, + 0, ovector, ovecsize); + + if(exec_result==PCRE_ERROR_NOMATCH) + suits=false; + else if(exec_result<0) { + (*pcre_free)(regexp_code); + throw Exception(0, + regexp, + "regular expression execute (%d)", + exec_result); + } + } + + if(suits) { + StringPtr file_name(new String); + file_name->APPEND_TAINTED(pool.copy(file_name_cstr, file_name_size), file_name_size, + method_name->origin().file, method_name->origin().line); + + Table::element_type row(new ArrayString); + *row+=file_name; + *table+=row; + } + ); + + if(regexp_code) + pcre_free(regexp_code); + + // write out result + r.write_no_lang(ValuePtr(new VTable(table))); +} + +#ifndef DOXYGEN +struct Lock_execute_body_info { + Request* r; + ValuePtr body_code; +}; +#endif +static void lock_execute_body(int , void *ainfo) { + Lock_execute_body_info& info=*static_cast(ainfo); + // execute body + info.r->write_assign_lang(info.r->process(info.body_code)); +}; +static void _lock(Request& r, StringPtr method_name, MethodParams& params) { + Lock_execute_body_info info; + info.r=&r; + StringPtr file_spec=r.absolute(params.as_string(0, "file name must be string")); + info.body_code=params.as_junction(1, "body must be code"); + + file_write_action_under_lock(file_spec, "lock", lock_execute_body, &info); +} + +static int lastposafter(const String& s, int after, const char* substr, size_t substr_size, bool beforelast=false) { + size_t size; + if(beforelast) + size=s.size(); + int at; + while((at=s.pos(substr, substr_size, after))>=0) { + size_t newafter=at+substr_size/*skip substr*/; + if(beforelast && newafter==size) + break; + after=newafter; + } + + return after; +} + +static void _find(Request& r, StringPtr method_name, MethodParams& params) { + Pool& pool=r.pool(); + StringPtr file_name=params.as_no_junction(0, "file name must not be code")->as_string(&pool); + StringPtr file_spec; + if(file_name->first_char()=='/') + file_spec=file_name; + else + file_spec=r.relative(r.request_info.uri, file_name); + + // easy way + if(file_readable(r.absolute(file_spec))) { + r.write_assign_lang(*file_spec); + return; + } + + // monkey way + int after_base_slash=lastposafter(*file_spec, 0, "/", 1); + StringPtr dirname=file_spec->mid(0, after_base_slash); + StringPtr basename=file_spec->mid(after_base_slash, file_spec->size()); + + int after_monkey_slash; + while((after_monkey_slash=lastposafter(*dirname, 0, "/", 1, true))>0) { + StringPtr test_name(new String); + *test_name<<*(dirname=dirname->mid(0, after_monkey_slash)); + *test_name< /a + // /a/b/ > /a + int afterslash=lastposafter(*file_spec, 0, "/", 1, true); + if(afterslash>0) + r.write_assign_lang(*file_spec->mid(0, afterslash==1?1:afterslash-1)); + else + r.write_assign_lang(String(".", 1)); +} + +static void _basename(Request& r, StringPtr method_name, MethodParams& params) { + StringPtr file_spec=params.as_string(0, "file name must be string"); + // /a/some.tar.gz > some.tar.gz + int afterslash=lastposafter(*file_spec, 0, "/", 1); + r.write_assign_lang(*file_spec->mid(afterslash, file_spec->size())); +} + +static void _justname(Request& r, StringPtr method_name, MethodParams& params) { + StringPtr file_spec=params.as_string(0, "file name must be string"); + // /a/some.tar.gz > some.tar + int afterslash=lastposafter(*file_spec, 0, "/", 1); + int afterdot=lastposafter(*file_spec, afterslash, ".", 1); + r.write_assign_lang(*file_spec->mid(afterslash, afterdot!=afterslash?afterdot-1:file_spec->size())); +} +static void _justext(Request& r, StringPtr method_name, MethodParams& params) { + StringPtr file_spec=params.as_string(0, "file name must be string"); + // /a/some.tar.gz > gz + int afterdot=lastposafter(*file_spec, 0, ".", 1); + if(afterdot>0) + r.write_assign_lang(*file_spec->mid(afterdot, file_spec->size())); +} + +static void _fullpath(Request& r, StringPtr method_name, MethodParams& params) { + StringPtr file_spec=params.as_string(0, "file name must be string"); + StringPtr result(new String); + if(file_spec->first_char()=='/') + result=file_spec; + else { + // /some/page.html: ^file:fullpath[a.gif] => /some/a.gif + StringPtr full_disk_path=r.absolute(file_spec); + size_t document_root_length=strlen(r.request_info.document_root); + + if(document_root_length>0) { + char last_char=r.request_info.document_root[document_root_length-1]; + if(last_char == '/' || last_char == '\\') + --document_root_length; + } + result=full_disk_path->mid(document_root_length, full_disk_path->size()); + } + r.write_assign_lang(*result); +} + + +// constructor + +MFile::MFile(): Methoded("file") { + // ^save[mode;file-name] + add_native_method("save", Method::CT_DYNAMIC, _save, 2, 2); + + // ^delete[file-name] + add_native_method("delete", Method::CT_STATIC, _delete, 1, 1); + + // ^move[from-file-name;to-file-name] + add_native_method("move", Method::CT_STATIC, _move, 2, 2); + + // ^load[mode;disk-name] + // ^load[mode;disk-name;user-name] + add_native_method("load", Method::CT_DYNAMIC, _load, 2, 3); + + // ^stat[disk-name] + add_native_method("stat", Method::CT_DYNAMIC, _stat, 1, 1); + + // ^cgi[file-name] + // ^cgi[file-name;env hash] + // ^cgi[file-name;env hash;1cmd;2line;3ar;4g;5s] + add_native_method("cgi", Method::CT_DYNAMIC, _cgi, 1, 2+10); + + // ^exec[file-name] + // ^exec[file-name;env hash] + // ^exec[file-name;env hash;1cmd;2line;3ar;4g;5s] + add_native_method("exec", Method::CT_DYNAMIC, _exec, 1, 2+10); + + // ^file:list[path] + // ^file:list[path][regexp] + add_native_method("list", Method::CT_STATIC, _list, 1, 2); + + // ^file:lock[path]{code} + add_native_method("lock", Method::CT_STATIC, _lock, 2, 2); -// initialize + // ^find[file-name] + // ^find[file-name]{when-not-found} + add_native_method("find", Method::CT_STATIC, _find, 1, 2); -void initialize_file_base_class(Pool& pool, VStateless_class& vclass) { - // ^save[file-name] - vclass.add_native_method("save", _save, 1, 1); + // ^file:dirname[/a/some.tar.gz]=/a + // ^file:dirname[/a/b/]=/a + add_native_method("dirname", Method::CT_STATIC, _dirname, 1, 1); + // ^file:basename[/a/some.tar.gz]=some.tar.gz + add_native_method("basename", Method::CT_STATIC, _basename, 1, 1); + // ^file:justname[/a/some.tar.gz]=some.tar + add_native_method("justname", Method::CT_STATIC, _justname, 1, 1); + // ^file:justext[/a/some.tar.gz]=gz + add_native_method("justext", Method::CT_STATIC, _justext, 1, 1); + // /some/page.html: ^file:fullpath[a.gif] => /some/a.gif + add_native_method("fullpath", Method::CT_STATIC, _fullpath, 1, 1); }