--- parser3/src/classes/file.C 2002/06/11 14:14:15 1.83 +++ parser3/src/classes/file.C 2003/04/04 14:42:38 1.110 @@ -1,12 +1,14 @@ /** @file Parser: @b file parser class. - Copyright (c) 2001, 2002 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001, 2003 ArtLebedev Group (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) - $Id: file.C,v 1.83 2002/06/11 14:14:15 paf Exp $ + portions by Victor Fedoseev" [January 23, 2003] */ +static const char* IDENT_FILE_C="$Date: 2003/04/04 14:42:38 $"; + #include "pa_config_includes.h" #include "pcre.h" @@ -21,10 +23,13 @@ #include "pa_dir.h" #include "pa_vtable.h" #include "pa_charset.h" +#include "pa_charsets.h" // defines #define TEXT_MODE_NAME "text" +#define STDIN_EXEC_PARAM_NAME "stdin" +#define CHARSET_EXEC_PARAM_NAME "charset" // consts @@ -87,10 +92,6 @@ public: }; -// consts - -const int FIND_MONKEY_MAX_HOPS=10; - // methods static void _save(Request& r, const String&, MethodParams *params) { @@ -98,7 +99,7 @@ static void _save(Request& r, const Stri Value& vfile_name=params->as_no_junction(1, "file name must not be code"); // save - static_cast(r.self)->save(r.absolute(vfile_name.as_string()), + static_cast(r.get_self())->save(r.absolute(vfile_name.as_string()), vmode_name.as_string()==TEXT_MODE_NAME); } @@ -119,54 +120,42 @@ static void _move(Request& r, const Stri r.absolute(vto_file_name.as_string())); } -static void _find(Request& r, const String& method_name, MethodParams *params) { - Pool& pool=r.pool(); - Value& vfile_name=params->as_no_junction(0, "file name must not be code"); - - const String &lfile_name=vfile_name.as_string(); - - // passed file name simply exists in current dir - if(file_readable(r.absolute(lfile_name))) { - r.write_no_lang(lfile_name); - return; - } - - // scan .. dirs for result - for(int i=0; isize()==2) { - Value& not_found_code=params->as_junction(1, "not-found param must be code"); - r.write_pass_lang(r.process(not_found_code)); - } +static void _load_pass_param(const Hash::Key& key, Hash::Val *value, void *info) { + Hash& dest=*static_cast(info); + dest.put(key, value); } - static void _load(Request& r, const String& method_name, MethodParams *params) { Pool& pool=r.pool(); Value& vmode_name=params-> as_no_junction(0, "mode must not be code"); - Value& vfile_name=params->as_no_junction(1, "file name must not be code"); - - const String& lfile_name=vfile_name.as_string(); + const String& lfile_name=r.absolute(params->as_no_junction(1, "file name must not be code").as_string()); + Value *third_param=params->size()>2?¶ms->as_no_junction(2, "filename or options must not be code"):0; + Hash *third_param_hash=third_param?third_param->get_hash(&method_name):0; + int alt_filename_param_index=2; + if(third_param_hash) + alt_filename_param_index++; void *data; size_t size; - file_read(pool, r.absolute(lfile_name), data, size, - vmode_name.as_string()==TEXT_MODE_NAME); + Hash *fields=0; + file_read(pool, lfile_name, data, size, + vmode_name.as_string()==TEXT_MODE_NAME, + third_param_hash, + &fields + ); - char *user_file_name=params->size()>2? - params->as_string(2, "filename must be string").cstr(String::UL_FILE_SPEC) + char *user_file_name=params->size()>alt_filename_param_index? + params->as_string(alt_filename_param_index, "filename must be string").cstr(String::UL_FILE_SPEC) :lfile_name.cstr(String::UL_FILE_SPEC); + + Value *vcontent_type=0; + if(fields) + vcontent_type=static_cast(fields->get(*content_type_name)); + if(!vcontent_type) + vcontent_type=new(pool) VString(r.mime_type_of(user_file_name)); - static_cast(r.self)->set(true/*tainted*/, data, size, - user_file_name, new(pool) VString(r.mime_type_of(user_file_name))); + VFile& self=*static_cast(r.get_self()); + self.set(true/*tainted*/, data, size, user_file_name, vcontent_type); + if(fields) + fields->for_each(_load_pass_param, &self.fields()); } static void _stat(Request& r, const String& method_name, MethodParams *params) { @@ -181,40 +170,69 @@ static void _stat(Request& r, const Stri size, atime, mtime, ctime); - VFile& vfile=*static_cast(r.self); + VFile& vfile=*static_cast(r.get_self()); vfile.set(true/*tainted*/, 0/*no bytes*/, size); Hash& ff=vfile.fields(); ff.put(*new(pool) String(pool, "adate"), new(pool) VDate(pool, atime)); ff.put(*new(pool) String(pool, "mdate"), new(pool) VDate(pool, mtime)); ff.put(*new(pool) String(pool, "cdate"), new(pool) VDate(pool, ctime)); + ff.put(*content_type_name, new(pool) VString(r.mime_type_of(lfile_name.cstr(String::UL_FILE_SPEC)))); } static bool is_safe_env_key(const char *key) { - if(strncmp(key, "HTTP_", 5)==0) + if(strncasecmp(key, "HTTP_", 5)==0) return true; - if(strncmp(key, "CGI_", 4)==0) + if(strncasecmp(key, "CGI_", 4)==0) return true; for(int i=0; suexec_safe_env_lst[i]; i++) { - if(strncmp(key, suexec_safe_env_lst[i], strlen(suexec_safe_env_lst[i]))==0) + if(strcasecmp(key, suexec_safe_env_lst[i])==0) return true; } return false; } -static void append_env_pair(const Hash::Key& key, Hash::Val *value, void *info) { - Hash& hash=*static_cast(info); - if(is_safe_env_key(key.cstr())) - hash.put(key, &static_cast(value)->as_string()); +#ifndef DOXYGEN +struct Append_env_pair_info { + Hash* hash; + Value* vstdin; + Value* vcharset; +}; +#endif +static void append_env_pair(const Hash::Key& key, Hash::Val *avalue, void *info) { + Append_env_pair_info& pi=*static_cast(info); + Value& value=*static_cast(avalue); + + if(key==STDIN_EXEC_PARAM_NAME) { + pi.vstdin=&value; + } else if(key==CHARSET_EXEC_PARAM_NAME) { + pi.vcharset=&value; + } else { + if(!is_safe_env_key(key.cstr())) + throw Exception("parser.runtime", + &key, + "not safe environment variable"); + pi.hash->put(key, &value.as_string()); + } } -static void pass_cgi_header_attribute(Array::Item *value, void *info) { +#ifndef DOXYGEN +struct Pass_cgi_header_attribute_info { + Hash *hash; + Value *content_type; +}; +#endif +static void pass_cgi_header_attribute(Array::Item *value, void *ainfo) { String& string=*static_cast(value); - Hash& hash=*static_cast(info); + Pool& pool=string.pool(); + Pass_cgi_header_attribute_info& info=*static_cast(ainfo); int colon_pos=string.pos(":", 1); - if(colon_pos>0) - hash.put(string.mid(0, colon_pos), - new(string.pool()) VString(string.mid(colon_pos+1, string.size()))); + if(colon_pos>0) { + const String& key=string.mid(0, colon_pos).change_case(pool, String::CC_UPPER); + Value *value=new(pool) VString(string.mid(colon_pos+1, string.size())); + info.hash->put(key, value); + if(key=="CONTENT-TYPE") + info.content_type=value; + } } -/** @todo fix `` in perl - they produced flipping consoles and no output to perl -*/ +/// @todo fix `` in perl - they produced flipping consoles and no output to perl static void _exec_cgi(Request& r, const String& method_name, MethodParams *params, bool cgi) { Pool& pool=r.pool(); @@ -258,12 +276,34 @@ static void _exec_cgi(Request& r, const env.put(*new(pool) String(pool, "SCRIPT_NAME"), &script_name); //env.put(*new(pool) String(pool, "SCRIPT_FILENAME"), ??&script_name); + // environment & stdin from param + String raw_in(pool); + Charset *charset=0; // default script works raw_in 'source' charset = no transcoding needed if(params->size()>1) { Value& venv=params->as_no_junction(1, "env must not be code"); - if(Hash *user_env=venv.get_hash(&method_name)) - user_env->for_each(append_env_pair, &env); + if(Hash *user_env=venv.get_hash(&method_name)) { + Append_env_pair_info info={&env}; + user_env->for_each(append_env_pair, &info); + // $.stdin + if(info.vstdin) { + if(const String *sstdin=info.vstdin->get_string()) { + raw_in.append(*sstdin, String::UL_CLEAN, true); + } else + if(VFile *vfile=static_cast(info.vstdin->as("file", false))) + raw_in.APPEND_TAINTED((const char *)vfile->value_ptr(), vfile->value_size(), + "$.stdin[assigned]", 0); + else + throw Exception("parser.runtime", + &method_name, + STDIN_EXEC_PARAM_NAME " parameter must be string or file"); + } + // $.charset + if(info.vcharset) + charset=&charsets->get_charset(info.vcharset->as_string()); + } } + // argv from params Array *argv=0; if(params->size()>2) { argv=new(pool) Array(pool, params->size()-2); @@ -271,61 +311,81 @@ static void _exec_cgi(Request& r, const *argv+=¶ms->as_string(i, "parameter must be string"); } - String in(pool); - in.APPEND(r.post_data, r.post_size, String::UL_CLEAN, "passing post data", 0); - String out(pool); - //out.APPEND_CONST("content-type:text/plain\nheader:test-header\n\ntest-body"); - //out<(r.self); + // exec! + String raw_out(pool); + String& raw_err=*new(pool) String(pool); + int status=0; + + const String *body=0; + const String *header=0; + const char *eol_marker=0; size_t eol_marker_size; int header_break_pos; - const String *body=&out; // ^file:exec + String *real_out=&raw_out; + String *real_err=&raw_err; if(cgi) { // ^file:cgi - // construct with 'out' body and header - int delim_size; - const char *eol_marker="\r\n"; size_t eol_marker_size=2; - int pos=out.pos("\r\n\r\n", delim_size=4); - if(pos<0) { - eol_marker="\n"; eol_marker_size=1; - pos=out.pos("\n\n", delim_size=2); + status = pa_exec(false/*forced_allow*/, script_name, &env, argv, *real_in, raw_out, raw_err, &header_break_pos, &eol_marker, &eol_marker_size); + // transcode if necessary + if(charset) { + real_out=&Charset::transcode(pool, *charset, pool.get_source_charset(), raw_out); + real_err=&Charset::transcode(pool, *charset, pool.get_source_charset(), raw_err); } - if(pos<0) { - delim_size=0; // calm down, compiler + if(header_break_pos == -1) throw Exception(0, &method_name, - "output does not contain CGI header; exit status=%d; stdoutsize=%u; stdout: \"%s\"; stderrsize=%u; stderr: \"%s\"", + "output does not contain CGI header; " + "exit status=%d; stdoutsize=%u; stdout: \"%s\"; stderrsize=%u; stderr: \"%s\"", status, - (uint)out.size(), out.cstr(), - (uint)err.size(), err.cstr()); - } + (uint)real_out->size(), real_out->cstr(), + (uint)real_err->size(), real_err->cstr()); - const String& header=out.mid(0, pos); - body=&out.mid(pos+delim_size, out.size()); - - // header to $fields - { - Array rows(pool); - header.split(rows, 0, eol_marker, eol_marker_size, String::UL_CLEAN); - rows.for_each(pass_cgi_header_attribute, &self.fields()); + header=&real_out->mid(0, header_break_pos); + body=&real_out->mid(header_break_pos+eol_marker_size*2, real_out->size()); + }else{ // ^file:exec + status = pa_exec(false/*forced_allow*/, script_name, &env, argv, *real_in, raw_out, raw_err); + // transcode if necessary + if(charset) { + real_out=&Charset::transcode(pool, *charset, pool.get_source_charset(), raw_out); + real_err=&Charset::transcode(pool, *charset, pool.get_source_charset(), raw_err); } + body=real_out; } + + + + VFile& self=*static_cast(r.get_self()); + // body self.set(false/*not tainted*/, body->cstr(), body->size()); + // $fields << header + if(header && eol_marker) { + Array rows(pool); + header->split(rows, 0, eol_marker, eol_marker_size); + Pass_cgi_header_attribute_info info={&self.fields()}; + rows.for_each(pass_cgi_header_attribute, &info); + if(info.content_type) + self.fields().put(*content_type_name, info.content_type); + } + // $status self.fields().put( - *new(pool) String(pool, "status"), + *file_status_name, new(pool) VInt(pool, status)); // $stderr - if(err.size()) { + if(real_err->size()) { self.fields().put( *new(pool) String(pool, "stderr"), - new(pool) VString(err)); - - SAPI::log(pool, "file:%s: %s", cgi?"cgi":"exec", err.cstr()); + new(pool) VString(*real_err)); } } static void _exec(Request& r, const String& method_name, MethodParams *params) { @@ -392,7 +452,7 @@ static void _list(Request& r, const Stri char *file_name_cstr=(char *)pool.malloc(file_name_size); memcpy(file_name_cstr, ffblk.ff_name, file_name_size); String &file_name=*new(pool) String(pool); - file_name.APPEND(file_name_cstr, file_name_size, String::UL_FILE_SPEC, + file_name.APPEND_TAINTED(file_name_cstr, file_name_size, method_name.origin().file, method_name.origin().line); Array& row=*new(pool) Array(pool); @@ -429,6 +489,114 @@ static void _lock(Request& r, const Stri file_write_action_under_lock(file_spec, "lock", lock_execute_body, &info); } +static int lastposafter(const String& s, int after, const char *substr, size_t substr_size, bool beforelast=false) { + size_t size; + if(beforelast) + size=s.size(); + int at; + while((at=s.pos(substr, substr_size, after))>=0) { + size_t newafter=at+substr_size/*skip substr*/; + if(beforelast && newafter==size) + break; + after=newafter; + } + + return after; +} + +static void _find(Request& r, const String& method_name, MethodParams *params) { + Pool& pool=r.pool(); + const String &file_name=params->as_no_junction(0, "file name must not be code").as_string(); + const String *file_spec; + if(file_name.first_char()=='/') + file_spec=&file_name; + else + file_spec=&r.relative(r.info.uri, file_name); + + // easy way + if(file_readable(r.absolute(*file_spec))) { + r.write_assign_lang(*file_spec); + return; + } + + // monkey way + int after_base_slash=lastposafter(*file_spec, 0, "/", 1); + const String *dirname=&file_spec->mid(0, after_base_slash); + const String& basename=file_spec->mid(after_base_slash, file_spec->size()); + + int after_monkey_slash; + while((after_monkey_slash=lastposafter(*dirname, 0, "/", 1, true))>0) { + String local_test_name(pool); + local_test_name<<*(dirname=&dirname->mid(0, after_monkey_slash)); + local_test_name<size()==2) { + Value& not_found_code=params->as_junction(1, "not-found param must be code"); + r.write_pass_lang(r.process(not_found_code)); + } +} + +static void _dirname(Request& r, const String& method_name, MethodParams *params) { + Pool& pool=r.pool(); + const String& file_spec=params->as_string(0, "file name must be string"); + // /a/some.tar.gz > /a + // /a/b/ > /a + int afterslash=lastposafter(file_spec, 0, "/", 1, true); + if(afterslash>0) + r.write_assign_lang(file_spec.mid(0, afterslash==1?1:afterslash-1)); + else + r.write_assign_lang(*new(pool) String(pool, ".", 1)); +} + +static void _basename(Request& r, const String& method_name, MethodParams *params) { + const String& file_spec=params->as_string(0, "file name must be string"); + // /a/some.tar.gz > some.tar.gz + int afterslash=lastposafter(file_spec, 0, "/", 1); + r.write_assign_lang(file_spec.mid(afterslash, file_spec.size())); +} + +static void _justname(Request& r, const String& method_name, MethodParams *params) { + const String& file_spec=params->as_string(0, "file name must be string"); + // /a/some.tar.gz > some.tar + int afterslash=lastposafter(file_spec, 0, "/", 1); + int afterdot=lastposafter(file_spec, afterslash, ".", 1); + r.write_assign_lang(file_spec.mid(afterslash, afterdot!=afterslash?afterdot-1:file_spec.size())); +} +static void _justext(Request& r, const String& method_name, MethodParams *params) { + const String& file_spec=params->as_string(0, "file name must be string"); + // /a/some.tar.gz > gz + int afterdot=lastposafter(file_spec, 0, ".", 1); + if(afterdot>0) + r.write_assign_lang(file_spec.mid(afterdot, file_spec.size())); +} + +static void _fullpath(Request& r, const String& method_name, MethodParams *params) { + const String& file_spec=params->as_string(0, "file name must be string"); + const String *result; + if(file_spec.first_char()=='/') + result=&file_spec; + else { + // /some/page.html: ^file:fullpath[a.gif] => /some/a.gif + const String& full_disk_path=r.absolute(file_spec); + size_t document_root_length=strlen(r.info.document_root); + + if(document_root_length>0) { + char last_char=r.info.document_root[document_root_length-1]; + if(last_char == '/' || last_char == '\\') + --document_root_length; + } + result=&full_disk_path.mid(document_root_length, full_disk_path.size()); + } + r.write_assign_lang(*result); +} + + // constructor MFile::MFile(Pool& apool) : Methoded(apool, "file") { @@ -441,10 +609,6 @@ MFile::MFile(Pool& apool) : Methoded(apo // ^move[from-file-name;to-file-name] add_native_method("move", Method::CT_STATIC, _move, 2, 2); - // ^find[file-name] - // ^find[file-name]{when-not-found} - add_native_method("find", Method::CT_STATIC, _find, 1, 2); - // ^load[mode;disk-name] // ^load[mode;disk-name;user-name] add_native_method("load", Method::CT_DYNAMIC, _load, 2, 3); @@ -469,6 +633,21 @@ MFile::MFile(Pool& apool) : Methoded(apo // ^file:lock[path]{code} add_native_method("lock", Method::CT_STATIC, _lock, 2, 2); + // ^find[file-name] + // ^find[file-name]{when-not-found} + add_native_method("find", Method::CT_STATIC, _find, 1, 2); + + // ^file:dirname[/a/some.tar.gz]=/a + // ^file:dirname[/a/b/]=/a + add_native_method("dirname", Method::CT_STATIC, _dirname, 1, 1); + // ^file:basename[/a/some.tar.gz]=some.tar.gz + add_native_method("basename", Method::CT_STATIC, _basename, 1, 1); + // ^file:justname[/a/some.tar.gz]=some.tar + add_native_method("justname", Method::CT_STATIC, _justname, 1, 1); + // ^file:justext[/a/some.tar.gz]=gz + add_native_method("justext", Method::CT_STATIC, _justext, 1, 1); + // /some/page.html: ^file:fullpath[a.gif] => /some/a.gif + add_native_method("fullpath", Method::CT_STATIC, _fullpath, 1, 1); } // global variable