--- parser3/src/targets/cgi/parser3.C 2001/03/23 08:47:49 1.35 +++ parser3/src/targets/cgi/parser3.C 2004/07/30 10:55:22 1.232 @@ -1,88 +1,210 @@ /** @file Parser: scripting and CGI main. - Copyright (c) 2001 ArtLebedev Group (http://www.artlebedev.com) + Copyright(c) 2001-2004 ArtLebedev Group (http://www.artlebedev.com) + Author: Alexandr Petrosian (http://paf.design.ru) +*/ - Author: Alexander Petrosyan (http://design.ru/paf) +static const char * const IDENT_PARSER3_C="$Date: 2004/07/30 10:55:22 $"; - $Id: parser3.C,v 1.35 2001/03/23 08:47:49 paf Exp $ -*/ +#include "pa_config_includes.h" -#ifdef HAVE_CONFIG_H -# include "pa_config.h" +#if _MSC_VER +# include #endif +#include "pa_sapi.h" +#include "classes.h" +#include "pa_common.h" +#include "pa_request.h" +#include "pa_socks.h" +#include "pa_version.h" #ifdef WIN32 # include -# include +# include "getopt.h" #else -# include +# include #endif -//\ifwin32 -#include -//#include -//\endifwin32 - -#include -#include -#include -#include +// defines -#include "pa_common.h" -#include "pa_globals.h" -#include "pa_request.h" +#if _MSC_VER && !defined(_DEBUG) +# define PA_SUPPRESS_SYSTEM_EXCEPTION +#endif -Pool pool; // global pool -bool cgi; ///< we were started as CGI? +//#define DEBUG_MAILRECEIVE "mailreceive.eml" + +// consts + +#define REDIRECT_PREFIX "REDIRECT_" +#define PARSER_CONFIG_ENV_NAME "CGI_PARSER_CONFIG" +#define PARSER_LOG_ENV_NAME "CGI_PARSER_LOG" + +/// IIS refuses to read bigger chunks +const size_t READ_POST_CHUNK_SIZE=0x400*0x400; // 1M + +static const char* argv0; +static const char* config_filespec_cstr=0; +static bool fail_on_config_read_problem=true; + +static bool cgi; ///< we were started as CGI? +static bool mail_received=false; ///< we were started with -m option? [asked to parse incoming message to $mail:received] + +// for signal handlers +Request *request=0; +Request_info *request_info=0; +bool execution_canceled=false; + +// SAPI + +class SAPI_Info{} SAPI_info; + +static void log(const char* fmt, va_list args) { + bool opened=false; + FILE *f=0; + + const char* log_by_env=getenv(PARSER_LOG_ENV_NAME); + if(!log_by_env) + log_by_env=getenv(REDIRECT_PREFIX PARSER_LOG_ENV_NAME); + if(log_by_env) { + f=fopen(log_by_env, "at"); + opened=f!=0; + } + + if(!opened && config_filespec_cstr) { + char beside_config_path[MAX_STRING]; + strncpy(beside_config_path, config_filespec_cstr, MAX_STRING-1); beside_config_path[MAX_STRING-1]=0; + if(!( + rsplit(beside_config_path, '/') || + rsplit(beside_config_path, '\\'))) { // strip filename + // no path, just filename + beside_config_path[0]='.'; beside_config_path[1]=0; + } + + char file_spec[MAX_STRING]; + snprintf(file_spec, MAX_STRING, + "%s/parser3.log", beside_config_path); + f=fopen(file_spec, "at"); + opened=f!=0; + } + // fallback to stderr + if(!opened) + f=stderr; + + // use no memory [so that we could log out-of-memory error] + setbuf(f, 0); // stderr stream is unbuffered by default, but still... + + // prefix + time_t t=time(0); + if(const char* stamp=ctime(&t)) { // never saw that + if(size_t len=strlen(stamp)) // saw once stamp being ="" + fprintf(f, "[%.*s] ", len-1, stamp); + } + // message -#ifdef WIN32 -# if _MSC_VER -// intercept global system errors -static LONG WINAPI TopLevelExceptionFilter ( - struct _EXCEPTION_POINTERS *ExceptionInfo - ) { char buf[MAX_STRING]; - if(ExceptionInfo && ExceptionInfo->ExceptionRecord) { - struct _EXCEPTION_RECORD *rr=ExceptionInfo->ExceptionRecord; - snprintf(buf, MAX_STRING, "Exception %#X at %p", - er->ExceptionCode, - er->ExceptionAddress); - } else - strcpy(buf, "Exception "); - - PTHROW(0, 0, - 0, - buf); + size_t size=vsnprintf(buf, MAX_STRING, fmt, args); + remove_crlf(buf, buf+size); - return EXCEPTION_EXECUTE_HANDLER; // never reached + fwrite(buf, size, 1, f); + // newline + fprintf(f, "\n"); + + if(opened) + fclose(f); + else + fflush(f); } -# endif +// appends to parser3.log located beside my binary if openable, to stderr otherwize +void SAPI::log(SAPI_Info&, const char* fmt, ...) { + va_list args; + va_start(args,fmt); + ::log(fmt, args); + va_end(args); +} + +static void die_or_abort(const char* fmt, va_list args, bool write_core) { + // log + + // logging is more important than user + // she can cancel download, we'd get SIGPIPE, + // nothing would be logged then + ::log(fmt, args); + + // inform user + + char body[MAX_STRING]; + int content_length=vsnprintf(body, MAX_STRING, fmt, args); + + // prepare header + // let's be honest, that's bad we couldn't produce valid output + SAPI::add_header_attribute(SAPI_info, "status", "500"); + SAPI::add_header_attribute(SAPI_info, "content-type", "text/plain"); + char content_length_cstr[MAX_NUMBER]; + snprintf(content_length_cstr, sizeof(content_length_cstr), "%u", content_length); + SAPI::add_header_attribute(SAPI_info, "content-length", content_length_cstr); + + // send header + SAPI::send_header(SAPI_info); + + // body + SAPI::send_body(SAPI_info, body, content_length); + + // exit & try to produce core dump[unix] or invoke debugger[Win32 Debug version] + if(write_core) { +#if defined(WIN32) && !defined(_DEBUG) + // IIS with abort failes to show STDOUT, it just barks "abnormal program termination" + exit(1); +#else +#if _MSC_VER + _asm int 3; #endif + abort(); +#endif + } + else + exit(1); +} -//\if -static void fix_slashes(char *s) { - if(s) - for(; *s; s++) - if(*s=='\\') - *s='/'; +void SAPI::die(const char* fmt, ...) { + va_list args; + va_start(args, fmt); + die_or_abort(fmt, args, false/*write core?*/); + va_end(args); } -//\endif -// service funcs +void SAPI::abort(const char* fmt, ...) { + va_list args; + va_start(args, fmt); + die_or_abort(fmt, args, true/*write core?*/); + va_end(args); +} -static const char *get_env(Pool& pool, const char *name) { - return getenv(name); +char* SAPI::get_env(SAPI_Info& , const char* name) { + if(char *local=getenv(name)) + return pa_strdup(local); + else + return 0; } -static uint read_post(char *buf, uint max_bytes) { - int read_size=0; +const char* const *SAPI::environment(SAPI_Info&) { +#ifdef _MSC_VER + extern char **_environ; + return _environ; +#else + extern char **environ; + return environ; +#endif +} + +size_t SAPI::read_post(SAPI_Info& , char *buf, size_t max_bytes) { + size_t read_size=0; do { - int chunk_size=read - (fileno(stdin), buf+read_size, min(0x400*0x400, max_bytes-read_size)); - if(chunk_size<0) + ssize_t chunk_size=read(fileno(stdin), + buf+read_size, min(READ_POST_CHUNK_SIZE, max_bytes-read_size)); + if(chunk_size<=0) break; read_size+=chunk_size; } while(read_sizeuri, + request_info->method, + request_info->content_length); + else + SAPI::log(SAPI_info, "%s received before or after processing request", + signal_name); +} -// main +#ifdef SIGUSR1 +static void SIGUSR1_handler(int /*sig*/){ + log_signal("SIGUSR1"); +} +#endif -int main(int argc, char *argv[]) { - umask(2); +#ifdef SIGPIPE +static void SIGPIPE_handler(int /*sig*/){ + log_signal("SIGPIPE"); + execution_canceled=true; + if(request) + request->set_interrupted(true); +} +#endif #ifdef WIN32 - setmode(fileno(stdin), _O_BINARY); - setmode(fileno(stdout), _O_BINARY); - setmode(fileno(stderr), _O_BINARY); +const char* maybe_reconstruct_IIS_status_in_qs(const char* original) +{ + // 404;http://servername/page[?param=value...] + // ';' should be urlencoded by HTTP standard, so we shouldn't get it from browser + // and can consider that as an indication that this is IIS way to report errors + + if(original + && isdigit((unsigned char)original[0]) + && isdigit((unsigned char)original[1]) + && isdigit((unsigned char)original[2]) + && original[3]==';') + { + size_t original_len=strlen(original); + char* reconstructed=new(PointerFreeGC) char[original_len + +12/*IIS-STATUS=&*/ + +14/*IIS-DOCUMENT=&*/ + +1]; + char* cur=reconstructed; + memcpy(cur, "IIS-STATUS=", 11); cur+=11; + memcpy(cur, original, 3); cur+=3; + *cur++='&'; + + const char* qmark_at=strchr(original, '?'); + memcpy(cur, "IIS-DOCUMENT=", 13); cur+=13; + { + size_t value_len=(qmark_at? qmark_at-original: original_len)-4; + memcpy(cur, original+4, value_len); cur+=value_len; + } + + if(qmark_at) { + *cur++='&'; + strcpy(cur, qmark_at+1/*skip ? itself*/); + } else + *cur=0; + + return reconstructed; + } + + return original; +} +#endif + +/** +main workhorse + + @todo + IIS: remove trailing default-document[index.html] from $request.uri. + to do that we need to consult metabase, + wich is tested but seems slow. +*/ +static void real_parser_handler(const char* filespec_to_process, + const char* request_method, bool header_only) +{ + // init socks + pa_socks_init(); + + // init global variables + pa_globals_init(); + + if(!filespec_to_process || !*filespec_to_process) + SAPI::die("Parser/%s", PARSER_VERSION); + + // Request info + Request_info request_info; memset(&request_info, 0, sizeof(request_info)); + char document_root_buf[MAX_STRING]; + if(cgi) { + if(const char* env_document_root=getenv("DOCUMENT_ROOT")) + request_info.document_root=env_document_root; + else if(const char* path_info=getenv("PATH_INFO")) { + // IIS + size_t len=min(sizeof(document_root_buf)-1, strlen(filespec_to_process)-strlen(path_info)); + memcpy(document_root_buf, filespec_to_process, len); document_root_buf[len]=0; + request_info.document_root=document_root_buf; + } else + throw Exception("parser.runtime", + 0, + "CGI: no PATH_INFO defined(in reinventing DOCUMENT_ROOT)"); + } else { + full_file_spec("", document_root_buf, sizeof(document_root_buf)); + request_info.document_root=document_root_buf; + } + request_info.path_translated=filespec_to_process; + request_info.method=request_method ? request_method : "GET"; + const char* query_string= +#ifdef WIN32 + maybe_reconstruct_IIS_status_in_qs #endif + (getenv("QUERY_STRING")); + request_info.query_string=query_string; + if(cgi) { + // few absolute obligatory + const char* path_info=getenv("PATH_INFO"); + if(!path_info) + SAPI::die("CGI: illegal call (missing PATH_INFO)"); + const char* script_name=getenv("SCRIPT_NAME"); + if(!script_name) + SAPI::die("CGI: illegal call (missing SCRIPT_NAME)"); + + const char* env_request_uri=getenv("REQUEST_URI"); + if(env_request_uri) + request_info.uri=env_request_uri; + else + if(query_string) { + char* reconstructed_uri=new(PointerFreeGC) char[ + strlen(path_info)+1/*'?'*/+ + strlen(query_string)+1/*0*/]; + strcpy(reconstructed_uri, path_info); + strcat(reconstructed_uri, "?"); + strcat(reconstructed_uri, query_string); + request_info.uri=reconstructed_uri; + } else + request_info.uri=path_info; + + if(env_request_uri) { // apache & others stuck to standards + /* + http://parser3/env.html?123 =OK + $request:uri=/env.html?123 + REQUEST_URI='/env.html?123' + SCRIPT_NAME='/cgi-bin/parser3' + PATH_INFO='/env.html' + + http://parser3/cgi-bin/parser3/env.html?123 =ERROR + $request:uri=/cgi-bin/parser3/env.html?123 + REQUEST_URI='/cgi-bin/parser3/env.html?123' + SCRIPT_NAME='/cgi-bin/parser3' + PATH_INFO='/env.html' + */ + size_t script_name_len=strlen(script_name); + size_t uri_len=strlen(env_request_uri); + if(strncmp(env_request_uri, script_name, script_name_len)==0 && + script_name_len != uri_len) // under IIS they are the same + SAPI::die("CGI: illegal call (1)"); + } else { // seen on IIS5 + /* + http://nestle/env.html?123 =OK + $request:uri=/env.html?123 + REQUEST_URI='' + SCRIPT_NAME='/env.html' + PATH_INFO='/env.html' + + http://nestle/cgi-bin/parser3.exe/env.html =ERROR + $request:uri=/env.html + REQUEST_URI='' + SCRIPT_NAME='/cgi-bin/parser3.exe' + PATH_INFO='/env.html' + */ + if(strcmp(script_name, path_info)!=0) + SAPI::die("CGI: illegal call (2)"); + } + } else + request_info.uri=""; + + request_info.content_type=getenv("CONTENT_TYPE"); + const char* content_length=getenv("CONTENT_LENGTH"); + request_info.content_length=(content_length?atoi(content_length):0); + request_info.cookie=getenv("HTTP_COOKIE"); + request_info.mail_received=mail_received; + + // get request_info ptr for signal handlers + ::request_info=&request_info; + if(execution_canceled) + SAPI::die("Execution canceled"); + + // prepare to process request + Request request(SAPI_info, request_info, + cgi ? String::Language(String::L_HTML|String::L_OPTIMIZE_BIT) : String::L_AS_IS, + true /* status_allowed */); + + // get request ptr for signal handlers + ::request=&request; + + char config_filespec_buf[MAX_STRING]; + if(!config_filespec_cstr) { + const char* config_by_env=getenv(PARSER_CONFIG_ENV_NAME); + if(!config_by_env) + config_by_env=getenv(REDIRECT_PREFIX PARSER_CONFIG_ENV_NAME); + if(config_by_env) + config_filespec_cstr=config_by_env; + else { + // beside by binary + char beside_binary_path[MAX_STRING]; + strncpy(beside_binary_path, argv0, MAX_STRING-1); beside_binary_path[MAX_STRING-1]=0; // filespec of my binary + if(!( + rsplit(beside_binary_path, '/') || + rsplit(beside_binary_path, '\\'))) { // strip filename + // no path, just filename + // @todo full path, not ./! + beside_binary_path[0]='.'; beside_binary_path[1]=0; + } + snprintf(config_filespec_buf, MAX_STRING, + "%s/%s", + beside_binary_path, AUTO_FILE_NAME); + config_filespec_cstr=config_filespec_buf; + fail_on_config_read_problem=entry_exists(config_filespec_cstr); + } + } + + // process the request + request.core( + config_filespec_cstr, fail_on_config_read_problem, + header_only); + + // no request [prevent signal handlers from accessing invalid memory] + ::request=0; + + // finalize global variables + pa_globals_done(); + + // + pa_socks_done(); +} + +#ifdef PA_SUPPRESS_SYSTEM_EXCEPTION +static const Exception +call_real_parser_handler__do_PEH_return_it( + const char* filespec_to_process, + const char* request_method, bool header_only) +{ + try { + real_parser_handler( + filespec_to_process, + request_method, header_only); + } catch(const Exception& e) { + return e; + } + + return Exception(); +} +static void call_real_parser_handler__supress_system_exception( + const char* filespec_to_process, + const char* request_method, bool header_only) +{ + Exception parser_exception; + LPEXCEPTION_POINTERS system_exception=0; + + __try { + parser_exception=call_real_parser_handler__do_PEH_return_it( + filespec_to_process, + request_method, header_only); + } __except ( + (system_exception=GetExceptionInformation()), + EXCEPTION_EXECUTE_HANDLER) + { + + if(system_exception) + if(_EXCEPTION_RECORD *er=system_exception->ExceptionRecord) + throw Exception("system", + 0, + "0x%08X at 0x%08X", er->ExceptionCode, er->ExceptionAddress); + else + throw Exception("system", + 0, + ""); + else + throw Exception("system", + 0, + ""); + } + + if(parser_exception) + throw Exception(parser_exception); +} +#endif + +static void usage(const char* program) { + printf( + "Parser/%s Copyright(c) 2001-2004 ArtLebedev Group (http://www.artlebedev.com)\n" + "Author: Alexandr Petrosian (http://paf.design.ru)\n" + "\n" + "Usage: %s [options] file\n" + "Options are:\n" +#ifdef WITH_MAILRECEIVE + " -m Parse mail, put received letter to $mail:received\n" +#endif + " -f config_file Use this config file (/path/to/auto.p)\n" + " -h Display usage information (this message)\n" + , PARSER_VERSION, + program); + exit(EINVAL); +} + +int main(int argc, char *argv[]) { + //_asm int 3; + GC_java_finalization=0; + +#ifndef PA_DEBUG_DISABLE_GC + // Dont collect unless explicitly requested + // this is quicker (~30% ), but less memory-efficient(~8%) + // so deciding for speed + GC_dont_gc=1; +#endif +/* + + Array test; + test+=3; + test+=4; +// int a=test.count(); + int i=0; + scanf("%d", &i); + int b=test.get(i); +// int b=test.get(10); + printf("%d", b);//test.count());*/ + +#ifdef SIGUSR1 + if(signal(SIGUSR1, SIGUSR1_handler)==SIG_ERR) + SAPI::die("Can not set handler for SIGUSR1"); +#endif +#ifdef SIGPIPE + if(signal(SIGPIPE, SIGPIPE_handler)==SIG_ERR) + SAPI::die("Can not set handler for SIGPIPE"); +#endif + + +#ifdef DEBUG_MAILRECEIVE + if(FILE *fake_in=fopen(DEBUG_MAILRECEIVE, "rt")) { + dup2(fake_in->_file, 0/*STDIN_FILENO*/); + } +#endif + +#ifdef _DEBUG + //_crtBreakAlloc=46; +#endif + argv0=argv[0]; + + umask(2); // were we started as CGI? cgi= @@ -137,113 +614,109 @@ int main(int argc, char *argv[]) { getenv("GATEWAY_INTERFACE") || getenv("REQUEST_METHOD"); - if(!cgi) { - if(argc<2) { - char *binary=argv[0]; - printf("Usage: %s \n", binary?binary:"parser3"); - exit(1); + char *raw_filespec_to_process; + if(cgi) { + raw_filespec_to_process=getenv("PATH_TRANSLATED"); + if(raw_filespec_to_process && !*raw_filespec_to_process) + raw_filespec_to_process=0; + } else { + optind = 1; + opterr = 0; + int c; + while((c = getopt(argc, argv, "hf:" +#ifdef WITH_MAILRECEIVE + "m" +#endif + )) > 0) { + switch (c) { + case 'h': + usage(argv[0]); + break; + case 'f': + config_filespec_cstr=optarg; + break; +#ifdef WITH_MAILRECEIVE + case 'm': + mail_received=true; + break; +#endif + default: + fprintf(stderr, "%s: invalid option '%c'\n", argv[0], optopt); + usage(argv[0]); + break; + } } + if (optind != argc - 1) { + fprintf(stderr, "%s: file not specified\n", argv[0]); + usage(argv[0]); + } + + raw_filespec_to_process=argv[optind++]; } - char *filespec_to_process=cgi?getenv("PATH_TRANSLATED"):argv[1]; -//\#ifdef WIN32 - fix_slashes(filespec_to_process); -//\#endif - - const char *request_method=getenv("REQUEST_METHOD"); - bool header_only=request_method && strcasecmp(request_method, "HEAD")==0; - PTRY { // global try - // must be first in PTRY{}PCATCH #ifdef WIN32 -# if _MSC_VER - SetUnhandledExceptionFilter(&TopLevelExceptionFilter); - //TODO: initSocks(); -# endif + setmode(fileno(stdin), _O_BINARY); + setmode(fileno(stdout), _O_BINARY); + setmode(fileno(stderr), _O_BINARY); #endif - // init global variables - globals_init(pool); +#if defined(_MSC_VER) && defined(_DEBUG) + // Get current flag + int tmpFlag = _CrtSetDbgFlag( _CRTDBG_REPORT_FLAG ); + + // Turn on leak-checking bit + tmpFlag |= _CRTDBG_LEAK_CHECK_DF; + + // Set flag to the new value + _CrtSetDbgFlag( tmpFlag ); +// _CrtSetBreakAlloc(61); - if(!filespec_to_process) - PTHROW(0, 0, - 0, - "no file to process"); + _CrtSetReportMode( _CRT_WARN, _CRTDBG_MODE_FILE ); + _CrtSetReportFile( _CRT_WARN, _CRTDBG_FILE_STDERR ); +#endif - // Request info - Request::Info request_info; - const char *document_root=getenv("DOCUMENT_ROOT"); - if(!document_root) { - static char fake_document_root[MAX_STRING]; - strncpy(fake_document_root, filespec_to_process, MAX_STRING); - rsplit(fake_document_root, '/'); rsplit(fake_document_root, '\\');// strip filename - document_root=fake_document_root; - } - request_info.document_root=document_root; - request_info.path_translated=filespec_to_process; - request_info.method=request_method; - request_info.query_string=getenv("QUERY_STRING"); - request_info.uri=getenv("REQUEST_URI"); - request_info.content_type=getenv("CONTENT_TYPE"); - const char *content_length=getenv("CONTENT_LENGTH"); - request_info.content_length=(content_length?atoi(content_length):0); - request_info.cookie=getenv("HTTP_COOKIE"); - - // prepare to process request - Pool request_pool; - Request request(request_pool, - request_info, - cgi ? String::UL_HTML_TYPO : String::UL_NO - ); - - // some root-controlled location - char *root_auto_path; -#ifdef WIN32 - // c:\windows - root_auto_path=(char *)pool.malloc(MAX_STRING); - GetWindowsDirectory(root_auto_path, MAX_STRING); + char filespec_to_process[MAX_STRING]; + full_file_spec(raw_filespec_to_process, filespec_to_process, sizeof(filespec_to_process)); + + const char* request_method=getenv("REQUEST_METHOD"); + bool header_only=request_method && strcasecmp(request_method, "HEAD")==0; + + try { // global try +#ifdef PA_SUPPRESS_SYSTEM_EXCEPTION + call_real_parser_handler__supress_system_exception( #else - // ~nobody - root_auto_path=getenv("HOME"); -#endif - - // beside by binary - char *site_auto_path=(char *)pool.malloc(MAX_STRING); - strncpy(site_auto_path, argv[0], MAX_STRING); // filespec of my binary - rsplit(site_auto_path, '/'); rsplit(site_auto_path, '\\');// strip filename - - // process the request - request.core( - root_auto_path, false, - site_auto_path, false, - header_only); - - // must be last in PTRY{}PCATCH -#ifdef WIN32 -# if _MSC_VER - SetUnhandledExceptionFilter(0); -# endif + real_parser_handler( #endif - // successful finish - return 0; - } PCATCH(e) { // global problem - const char *body=e.comment(); - int content_length=strlen(body); + filespec_to_process, + request_method, header_only); + } catch(const Exception& e) { // global problem + // don't allocate anything on pool here: + // possible pool' exception not catch-ed now + // and there could be out-of-memory exception + char buf[MAX_STRING]; + snprintf(buf, MAX_STRING, "Unhandled exception %s", + e.comment()); + // log it + SAPI::log(SAPI_info, "%s", buf); + + // + int content_length=strlen(buf); // prepare header - add_header_attribute("content-type", "text/plain"); + SAPI::add_header_attribute(SAPI_info, "content-type", "text/plain"); char content_length_cstr[MAX_NUMBER]; - snprintf(content_length_cstr, MAX_NUMBER, "%lu", content_length); - add_header_attribute("content-length", content_length_cstr); + snprintf(content_length_cstr, MAX_NUMBER, "%u", content_length); + SAPI::add_header_attribute(SAPI_info, "content-length", content_length_cstr); // send header - send_header(pool); + SAPI::send_header(SAPI_info); - // body + // send body if(!header_only) - send_body(body, content_length); + SAPI::send_body(SAPI_info, buf, content_length); // unsuccessful finish - return 1; } - PEND_CATCH + + return 0; }