--- parser3/src/main/pa_common.C 2009/01/12 07:15:45 1.239 +++ parser3/src/main/pa_common.C 2010/05/25 09:30:30 1.259 @@ -1,7 +1,7 @@ /** @file Parser: commonly functions. - Copyright(c) 2001-2005 ArtLebedev Group (http://www.artlebedev.com) + Copyright(c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) * BASE64 part @@ -26,7 +26,7 @@ * */ -static const char * const IDENT_COMMON_C="$Date: 2009/01/12 07:15:45 $"; +static const char * const IDENT_COMMON_C="$Date: 2010/05/25 09:30:30 $"; #include "pa_common.h" #include "pa_exception.h" @@ -36,6 +36,7 @@ static const char * const IDENT_COMMON_C #include "pa_http.h" #include "pa_request_charsets.h" #include "pcre.h" +#include "pa_request.h" // some maybe-undefined constants @@ -66,6 +67,31 @@ const String file_status_name(FILE_STATU // functions +bool capitalized(const char* s){ + bool upper=true; + for(const char* c=s; *c; c++){ + if(*c != (upper ? toupper((unsigned char)*c) : tolower((unsigned char)*c))) + return false; + upper=strchr("-_ ", *c) != 0; + } + return true; +} + +const char* capitalize(const char* s){ + if(!s || capitalized(s)) + return s; + + char* result=pa_strdup(s); + if(result){ + bool upper=true; + for(char* c=result; *c; c++){ + *c=upper ? (char)toupper((unsigned char)*c) : (char)tolower((unsigned char)*c); + upper=strchr("-_ ", *c) != 0; + } + } + return (const char*)result; +} + void fix_line_breaks(char *str, size_t& length) { //_asm int 3; const char* const eob=str+length; @@ -102,6 +128,16 @@ char* file_read_text(Request_charsets& c return file.success?file.str:0; } +char* file_load_text(Request& r, + const String& file_spec, + bool fail_on_read_problem, + HashStringValue* params, + bool transcode_result) { + File_read_result file= + file_load(r, file_spec, true, params, fail_on_read_problem, 0, 0, 0, transcode_result); + return file.success?file.str:0; +} + /// these options were handled but not checked elsewhere, now check them int pa_get_valid_file_options_count(HashStringValue& options) { int result=0; @@ -139,7 +175,7 @@ static void file_read_action( lseek(f, info.offset, SEEK_SET); *info.data=info.buf ? info.buf - : new(PointerFreeGC) char[to_read_size+(as_text?1:0)]; + : (char *)pa_malloc_atomic(to_read_size+1); *info.data_size=(size_t)read(f, *info.data, to_read_size); if(ssize_t(*info.data_size)<0 || *info.data_size>to_read_size) @@ -149,53 +185,42 @@ static void file_read_action( *info.data_size, to_read_size); } else { // empty file // for both, text and binary: for text we need that terminator, for binary we need nonzero pointer to be able to save such files - *info.data=new(PointerFreeGC) char[1]; + *info.data=(char *)pa_malloc_atomic(1); *(char*)(*info.data)=0; *info.data_size=0; return; } } + File_read_result file_read(Request_charsets& charsets, const String& file_spec, bool as_text, HashStringValue *params, bool fail_on_read_problem, char* buf, size_t offset, size_t count, bool transcode_text_result) { File_read_result result={false, 0, 0, 0}; - if(file_spec.starts_with("http://")) { - if(offset || count) + if(params){ + int valid_options=pa_get_valid_file_options_count(*params); + if(valid_options!=params->count()) throw Exception(PARSER_RUNTIME, 0, - "offset and load options are not supported for HTTP:// file load"); + INVALID_OPTION_PASSED); + } - // fail on read problem - File_read_http_result http=pa_internal_file_read_http(charsets, file_spec, as_text, params, transcode_text_result); - result.success=true; - result.str=http.str; - result.length=http.length; - result.headers=http.headers; - } else { - if(params){ - int valid_options=pa_get_valid_file_options_count(*params); - if(valid_options!=params->count()) - throw Exception(PARSER_RUNTIME, - 0, - "invalid option passed"); - } + File_read_action_info info={&result.str, &result.length, buf, offset, count}; - File_read_action_info info={&result.str, &result.length, - buf, offset, count}; - result.success=file_read_action_under_lock(file_spec, - "read", file_read_action, &info, - as_text, fail_on_read_problem); + result.success=file_read_action_under_lock(file_spec, + "read", file_read_action, &info, + as_text, fail_on_read_problem); - if(as_text && result.success){ + if(as_text){ + if(result.success){ if(result.length>=3 && strncmp(result.str, "\xEF\xBB\xBF", 3)==0){ - // skip UTF-8 signature: EF BB BF (BOM code) + // skip UTF-8 signature (BOM code) result.str+=3; result.length-=3; } if(result.length && transcode_text_result && params){ // must be checked because transcode returns CONST string in case length==0, which contradicts hacking few lines below - if( Value* vcharset_name=params->get(PA_CHARSET_NAME) ){ + if(Value* vcharset_name=params->get(PA_CHARSET_NAME)){ Charset asked_charset=::charsets.get(vcharset_name->as_string(). change_case(charsets.source(), String::CC_UPPER)); @@ -207,16 +232,41 @@ File_read_result file_read(Request_chars } } } + if(result.length) + fix_line_breaks(result.str, result.length); } - - if(as_text && result.length) - fix_line_breaks(result.str, result.length); return result; } +File_read_result file_load(Request& r, const String& file_spec, + bool as_text, HashStringValue *params, + bool fail_on_read_problem, + char* buf, size_t offset, size_t count, bool transcode_text_result) { + + File_read_result result={false, 0, 0, 0}; + if(file_spec.starts_with("http://")) { + if(offset || count) + throw Exception(PARSER_RUNTIME, + 0, + "offset and load options are not supported for HTTP:// file load"); + + // fail on read problem + File_read_http_result http=pa_internal_file_read_http(r, file_spec, as_text, params, transcode_text_result); + result.success=true; + result.str=http.str; + result.length=http.length; + result.headers=http.headers; + } else + result= + file_read(r.charsets, file_spec, as_text, params, fail_on_read_problem, buf, offset, count, transcode_text_result); + + return result; +} + + #ifdef PA_SAFE_MODE -void check_safe_mode(struct stat finfo, const String& file_spec, const char* fname) { +void check_safe_mode(struct stat finfo, const String& file_spec, const char* fname) { if(finfo.st_uid/*foreign?*/!=geteuid() && finfo.st_gid/*foreign?*/!=getegid()) throw Exception(PARSER_RUNTIME, @@ -228,15 +278,20 @@ void check_safe_mode(struct stat finfo, "fuid(%d)!=euid(%d) or fgid(%d)!=egid(%d)", fname, finfo.st_uid, geteuid(), - finfo.st_gid, getegid()); -} -#endif + finfo.st_gid, getegid()); +} +#else +void check_safe_mode(struct stat, const String&, const char*) { +} +#endif + + bool file_read_action_under_lock(const String& file_spec, const char* action_name, File_read_action action, void *context, bool as_text, bool fail_on_read_problem) { - const char* fname=file_spec.cstr(String::L_FILE_SPEC); + const char* fname=file_spec.taint_cstr(String::L_FILE_SPEC); int f; // first open, next stat: @@ -257,15 +312,13 @@ bool file_read_action_under_lock(const S strerror(errno), errno, fname); struct stat finfo; - if(stat(fname, &finfo)!=0) + if(fstat(f, &finfo)!=0) throw Exception("file.missing", // hardly possible: we just opened it OK &file_spec, "stat failed: %s (%d), actual filename '%s'", strerror(errno), errno, fname); -#ifdef PA_SAFE_MODE check_safe_mode(finfo, file_spec, fname); -#endif action(finfo, f, file_spec, fname, as_text, context); } catch(...) { @@ -279,7 +332,8 @@ bool file_read_action_under_lock(const S return true; } else { if(fail_on_read_problem) - throw Exception(errno==EACCES?"file.access":errno==ENOENT?"file.missing":0, + throw Exception(errno==EACCES?"file.access" + :(errno==ENOENT || errno==ENOTDIR || errno==ENODEV)?"file.missing":0, &file_spec, "%s failed: %s (%d), actual filename '%s'", action_name, strerror(errno), errno, fname); @@ -291,7 +345,7 @@ void create_dir_for_file(const String& f size_t pos_after=1; size_t pos_before; while((pos_before=file_spec.pos('/', pos_after))!=STRING_NOT_FOUND) { - mkdir(file_spec.mid(0, pos_before).cstr(String::L_FILE_SPEC), 0775); + mkdir(file_spec.mid(0, pos_before).taint_cstr(String::L_FILE_SPEC), 0775); pos_after=pos_before+1; } } @@ -305,7 +359,7 @@ bool file_write_action_under_lock( bool do_append, bool do_block, bool fail_on_lock_problem) { - const char* fname=file_spec.cstr(String::L_FILE_SPEC); + const char* fname=file_spec.taint_cstr(String::L_FILE_SPEC); int f; if(access(fname, W_OK)!=0) // no create_dir_for_file(file_spec); @@ -326,7 +380,12 @@ bool file_write_action_under_lock( } try { - action(f, context); +#if (defined(HAVE_FCHMOD) && defined(PA_SAFE_MODE)) + struct stat finfo; + if(fstat(f, &finfo)==0 && finfo.st_mode & 0111) + fchmod(f, finfo.st_mode & 0666/*clear executable bits*/); // backward: ignore errors if any +#endif + action(f, context); } catch(...) { #ifdef HAVE_FTRUNCATE if(!do_append) @@ -352,7 +411,8 @@ bool file_write_action_under_lock( #ifndef DOXYGEN struct File_write_action_info { - const char* str; size_t length; + const char* str; + size_t length; }; #endif static void file_write_action(int f, void *context) { @@ -366,10 +426,21 @@ static void file_write_action(int f, voi } } void file_write( - const String& file_spec, - const char* data, size_t size, + Request_charsets& charsets, + const String& file_spec, + const char* data, + size_t size, bool as_text, - bool do_append) { + bool do_append, + Charset* asked_charset) { + + if(as_text && asked_charset){ + String::C body=String::C(data, size); + body=Charset::transcode(body, charsets.source(), *asked_charset); + data=body.str; + size=body.length; + }; + File_write_action_info info={data, size}; file_write_action_under_lock( @@ -387,11 +458,11 @@ static void rmdir(const String& file_spe if((pos_before=file_spec.pos('/', pos_after))!=STRING_NOT_FOUND) rmdir(file_spec, pos_before+1); - rmdir(file_spec.mid(0, pos_after-1/* / */).cstr(String::L_FILE_SPEC)); + rmdir(file_spec.mid(0, pos_after-1/* / */).taint_cstr(String::L_FILE_SPEC)); } bool file_delete(const String& file_spec, bool fail_on_problem) { - const char* fname=file_spec.cstr(String::L_FILE_SPEC); + const char* fname=file_spec.taint_cstr(String::L_FILE_SPEC); if(unlink(fname)!=0) if(fail_on_problem) throw Exception(errno==EACCES?"file.access":errno==ENOENT?"file.missing":0, @@ -406,8 +477,8 @@ bool file_delete(const String& file_spec } void file_move(const String& old_spec, const String& new_spec) { - const char* old_spec_cstr=old_spec.cstr(String::L_FILE_SPEC); - const char* new_spec_cstr=new_spec.cstr(String::L_FILE_SPEC); + const char* old_spec_cstr=old_spec.taint_cstr(String::L_FILE_SPEC); + const char* new_spec_cstr=new_spec.taint_cstr(String::L_FILE_SPEC); create_dir_for_file(new_spec); @@ -430,12 +501,12 @@ bool entry_exists(const char* fname, str } bool entry_exists(const String& file_spec) { - const char* fname=file_spec.cstr(String::L_FILE_SPEC); + const char* fname=file_spec.taint_cstr(String::L_FILE_SPEC); return entry_exists(fname, 0); } static bool entry_readable(const String& file_spec, bool need_dir) { - char* fname=file_spec.cstrm(String::L_FILE_SPEC); + char* fname=file_spec.taint_cstrm(String::L_FILE_SPEC); if(need_dir) { size_t size=strlen(fname); while(size) { @@ -470,7 +541,7 @@ const String* file_exist(const String& p } bool file_executable(const String& file_spec) { - return access(file_spec.cstr(String::L_FILE_SPEC), X_OK)==0; + return access(file_spec.taint_cstr(String::L_FILE_SPEC), X_OK)==0; } bool file_stat(const String& file_spec, @@ -479,7 +550,7 @@ bool file_stat(const String& file_spec, time_t& rmtime, time_t& rctime, bool fail_on_read_problem) { - const char* fname=file_spec.cstr(String::L_FILE_SPEC); + const char* fname=file_spec.taint_cstr(String::L_FILE_SPEC); struct stat finfo; if(stat(fname, &finfo)!=0) if(fail_on_read_problem) @@ -659,7 +730,7 @@ enum EscapeState { }; // @todo prescan for reduce required size (unescaped sting in 1 byte charset requires less memory usually) -char* unescape_chars(const char* cp, int len, Charset* charset, bool ignore_plus){ +char* unescape_chars(const char* cp, int len, Charset* charset, bool js){ char* s=new(PointerFreeGC) char[len+1]; // must be enough (%uXXXX==6 bytes, max utf-8 char length==6 bytes) char* dst=s; EscapeState escapeState=EscapeRest; @@ -668,12 +739,12 @@ char* unescape_chars(const char* cp, int short int jsCnt=0; while(srcPos> 6)]; *outptr++ = base64_alphabet [c3 & 0x3f]; /* this is a bit ugly ... */ - if ((++already) >= 19) { + if ((++already) >= BASE64_GROUPS_IN_LINE) { *outptr++ = '\n'; already = 0; } @@ -1182,16 +1222,18 @@ g_mime_utils_base64_decode_step (const u char* pa_base64_encode(const char *in, size_t in_size){ - /* wont go to more than 2x size (overly conservative) */ - char* result=new(PointerFreeGC) char[in_size * 2 + 6]; + size_t new_size = ((in_size / 3 + 1) * 4); + new_size += new_size / (BASE64_GROUPS_IN_LINE * 4)/*new lines*/ + 1/*zero terminator*/; + char* result = new(PointerFreeGC) char[new_size]; int state=0; int save=0; #ifndef NDEBUG size_t filled= #endif - g_mime_utils_base64_encode_close ((const unsigned char*)in, in_size, - (unsigned char*)result, &state, &save); - assert(filled <= in_size * 2 + 6); + g_mime_utils_base64_encode_close ((const unsigned char*)in, in_size, (unsigned char*)result, &state, &save); + + //throw Exception(PARSER_RUNTIME, 0, "%d %d %d", in_size, new_size, filled); + assert(filled <= new_size); return result; } @@ -1258,7 +1300,7 @@ int file_block_read(const int f, unsigne const unsigned long pa_crc32(const char *in, size_t in_size){ unsigned long crc32=0xFFFFFFFF; - InitCrc32Table(); + InitCrc32Table(); for(size_t i = 0; i