--- parser3/src/main/pa_common.C 2001/11/09 11:47:55 1.87 +++ parser3/src/main/pa_common.C 2013/07/04 10:27:49 1.271 @@ -1,20 +1,44 @@ /** @file Parser: commonly functions. - Copyright(c) 2001 ArtLebedev Group(http://www.artlebedev.com) - Author: Alexander Petrosyan (http://paf.design.ru) + Copyright (c) 2000-2012 Art. Lebedev Studio (http://www.artlebedev.com) + Author: Alexandr Petrosian (http://paf.design.ru) - $Id: pa_common.C,v 1.87 2001/11/09 11:47:55 paf Exp $ -*/ + * BASE64 part + * Authors: Michael Zucchi + * Jeffrey Stedfast + * + * Copyright 2000-2004 Ximian, Inc. (www.ximian.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA. + * + */ #include "pa_common.h" -#include "pa_types.h" #include "pa_exception.h" -#include "pa_pool.h" -#include "pa_globals.h" -#include "pa_value.h" #include "pa_hash.h" -#include "pa_string.h" +#include "pa_globals.h" +#include "pa_charsets.h" +#include "pa_http.h" +#include "pa_request_charsets.h" +#include "pcre.h" +#include "pa_request.h" + +volatile const char * IDENT_PA_COMMON_C="$Id: pa_common.C,v 1.271 2013/07/04 10:27:49 moko Exp $" IDENT_PA_COMMON_H IDENT_PA_HASH_H IDENT_PA_ARRAY_H IDENT_PA_STACK_H; + +// some maybe-undefined constants #ifndef _O_TEXT # define _O_TEXT 0 @@ -22,242 +46,531 @@ #ifndef _O_BINARY # define _O_BINARY 0 #endif -#ifndef O_TRUNC -# define O_TRUNC 0 + +#ifdef HAVE_FTRUNCATE +# define PA_O_TRUNC 0 +#else +# ifdef _O_TRUNC +# define PA_O_TRUNC _O_TRUNC +# else +# error you must have either ftruncate function or _O_TRUNC bit declared +# endif #endif -#if _MSC_VER +// defines for globals -int __vsnprintf(char *b, size_t s, const char *f, va_list l) { - int r=_vsnprintf(b, --s, f, l); - b[s]=0; - return r; -} -int __snprintf(char *b, size_t s, const char *f, ...) { - va_list l; - va_start(l, f); - int r=__vsnprintf(b, s, f, l); - va_end(l); - return r; -} +#define FILE_STATUS_NAME "status" -#endif +// globals + +const String file_status_name(FILE_STATUS_NAME); + +// functions -static char *strnchr(char *buf, size_t size, char c) { - for(; size-->0; buf++) { - if(*buf==c) - return buf; +bool capitalized(const char* s){ + bool upper=true; + for(const char* c=s; *c; c++){ + if(*c != (upper ? toupper((unsigned char)*c) : tolower((unsigned char)*c))) + return false; + upper=strchr("-_ ", *c) != 0; } + return true; +} - return 0; +const char* capitalize(const char* s){ + if(!s || capitalized(s)) + return s; + + char* result=pa_strdup(s); + if(result){ + bool upper=true; + for(char* c=result; *c; c++){ + *c=upper ? (char)toupper((unsigned char)*c) : (char)tolower((unsigned char)*c); + upper=strchr("-_ ", *c) != 0; + } + } + return (const char*)result; } -void fix_line_breaks(char *buf, size_t& size) { +void fix_line_breaks(char *str, size_t& length) { //_asm int 3; - const char * const eob=buf+size; - char *dest=buf; + const char* const eob=str+length; + char* dest=str; // fix DOS: \r\n -> \n // fix Macintosh: \r -> \n - char *bol=buf; - while(char *eol=strnchr(bol, eob -bol, '\r')) { + char* bol=str; + while(char* eol=(char*)memchr(bol, '\r', eob -bol)) { size_t len=eol-bol; if(dest!=bol) - memcpy(dest, bol, len); + memmove(dest, bol, len); dest+=len; - *dest++='\n'; + *dest++='\n'; - if(&eol[1](context); + size_t to_read_size=info.count; + if(!to_read_size) + to_read_size=(size_t)finfo.st_size; + assert( !(info.buf && as_text) ); + if(to_read_size) { + if(info.offset) + lseek(f, info.offset, SEEK_SET); + *info.data=info.buf ? info.buf : (char *)pa_malloc_atomic(to_read_size+1); + ssize_t result=read(f, *info.data, to_read_size); + if(result<0) + throw Exception("file.read", &file_spec, "read failed: %s (%d)", strerror(errno), errno); + *info.data_size=result; + } else { // empty file + // for both, text and binary: for text we need that terminator, for binary we need nonzero pointer to be able to save such files + *info.data=(char *)pa_malloc_atomic(1); + *(char*)(*info.data)=0; + *info.data_size=0; + return; + } +} + +File_read_result file_read(Request_charsets& charsets, const String& file_spec, + bool as_text, HashStringValue *params, + bool fail_on_read_problem, + char* buf, size_t offset, size_t count, bool transcode_text_result) { + File_read_result result={false, 0, 0, 0}; + if(params){ + int valid_options=pa_get_valid_file_options_count(*params); + if(valid_options!=params->count()) + throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); + } + + File_read_action_info info={&result.str, &result.length, buf, offset, count}; + + result.success=file_read_action_under_lock(file_spec, + "read", file_read_action, &info, + as_text, fail_on_read_problem); + + if(as_text){ + if(result.success){ + Charset* asked_charset=0; + if(result.length>=3 && strncmp(result.str, "\xEF\xBB\xBF", 3)==0){ + // skip UTF-8 signature (BOM code) + result.str+=3; + result.length-=3; + asked_charset=&UTF8_charset; + } + + if(params) + if(Value* vcharset_name=params->get(PA_CHARSET_NAME)) + asked_charset=&::charsets.get(vcharset_name->as_string().change_case(charsets.source(), String::CC_UPPER)); + + if(result.length && transcode_text_result && asked_charset){ // length must be checked because transcode returns CONST string in case length==0, which contradicts hacking few lines below + String::C body=String::C(result.str, result.length); + body=Charset::transcode(body, *asked_charset, charsets.source()); + + result.str=const_cast(body.str); // hacking a little + result.length=body.length; + } + } + if(result.length) + fix_line_breaks(result.str, result.length); + } + + return result; +} + +File_read_result file_load(Request& r, const String& file_spec, + bool as_text, HashStringValue *params, + bool fail_on_read_problem, + char* buf, size_t offset, size_t count, bool transcode_text_result) { + + File_read_result result={false, 0, 0, 0}; + if(file_spec.starts_with("http://")) { + if(offset || count) + throw Exception(PARSER_RUNTIME, + 0, + "offset and load options are not supported for HTTP:// file load"); + + // fail on read problem + File_read_http_result http=pa_internal_file_read_http(r, file_spec, as_text, params, transcode_text_result); + result.success=true; + result.str=http.str; + result.length=http.length; + result.headers=http.headers; + } else + result= + file_read(r.charsets, file_spec, as_text, params, fail_on_read_problem, buf, offset, count, transcode_text_result); + + return result; } -char *file_read_text(Pool& pool, const String& file_spec, bool fail_on_read_problem) { - void *result; size_t size; - return file_read(pool, file_spec, result, size, true, fail_on_read_problem)?(char *)result:0; -} -bool file_read(Pool& pool, const String& file_spec, - void*& data, size_t& data_size, bool as_text, - bool fail_on_read_problem, - size_t offset, size_t limit) { - const char *fname=file_spec.cstr(String::UL_FILE_SPEC); -//printf("file_read(%s)\n", fname); + +#ifdef PA_SAFE_MODE +void check_safe_mode(struct stat finfo, const String& file_spec, const char* fname) { + if(finfo.st_uid/*foreign?*/!=geteuid() + && finfo.st_gid/*foreign?*/!=getegid()) + throw Exception(PARSER_RUNTIME, + &file_spec, + "parser is in safe mode: " + "reading files of foreign group and user disabled " + "[recompile parser with --disable-safe-mode configure option], " + "actual filename '%s', " + "fuid(%d)!=euid(%d) or fgid(%d)!=egid(%d)", + fname, + finfo.st_uid, geteuid(), + finfo.st_gid, getegid()); +} +#else +void check_safe_mode(struct stat, const String&, const char*) { +} +#endif + + + +bool file_read_action_under_lock(const String& file_spec, + const char* action_name, File_read_action action, void *context, + bool as_text, + bool fail_on_read_problem) { + const char* fname=file_spec.taint_cstr(String::L_FILE_SPEC); int f; - struct stat finfo; // first open, next stat: // directory update of NTFS hard links performed on open. // ex: // a.html:^test[] and b.html hardlink to a.html // user inserts ! before ^test in a.html - // directory entry of b.html in NTFS not updated at once, + // directory entry of b.html in NTFS not updated at once, // they delay update till open, so we would receive "!^test[" string // if would do stat, next open. - if( - (f=open(fname, O_RDONLY|(as_text?_O_BINARY/*_O_TEXT*/:_O_BINARY)))>=0 && - stat(fname, &finfo)==0) { - /*if(exclusive) - flock(f, LOCK_EX);*/ - size_t max_size=limit?min(offset+limit, finfo.st_size)-offset:finfo.st_size; - int read_size; - if(!max_size) { // eof - if(as_text) { - data=pool.malloc(1); - *(char*)data=0; - } else - data=0; - read_size=0; - } else { - data=pool.malloc(max_size+(as_text?1:0), 3); - if(offset) - lseek(f, offset, SEEK_SET); - read_size=read(f, data, max_size); - } - /*if(exclusive) - flock(f, LOCK_UN);*/ - close(f); - if(!max_size) // eof - return true; - - if(read_size<0 || read_size>max_size) - throw Exception(0, 0, - &file_spec, - "read failed: actually read %d bytes count not in [0..%lu] valid range", - read_size, (unsigned long)max_size); //never + // later: it seems, even this does not help sometimes + if((f=open(fname, O_RDONLY|(as_text?_O_TEXT:_O_BINARY)))>=0) { + try { + if(pa_lock_shared_blocking(f)!=0) + throw Exception("file.lock", + &file_spec, + "shared lock failed: %s (%d), actual filename '%s'", + strerror(errno), errno, fname); + + struct stat finfo; + if(fstat(f, &finfo)!=0) + throw Exception("file.missing", // hardly possible: we just opened it OK + &file_spec, + "stat failed: %s (%d), actual filename '%s'", + strerror(errno), errno, fname); + + check_safe_mode(finfo, file_spec, fname); + + action(finfo, f, file_spec, fname, as_text, context); + } catch(...) { + pa_unlock(f);close(f); + if(fail_on_read_problem) + rethrow; + return false; + } - data_size=read_size; - if(as_text) { - fix_line_breaks((char *)data, data_size); - // note: after fixing - ((char*&)data)[data_size]=0; - } + pa_unlock(f);close(f); return true; - } - if(fail_on_read_problem) - throw Exception(0, 0, - &file_spec, - "read failed: %s (%d), actual filename '%s'", - strerror(errno), errno, fname); - return false; + } else { + if(fail_on_read_problem) + throw Exception(errno==EACCES?"file.access" + :(errno==ENOENT || errno==ENOTDIR || errno==ENODEV)?"file.missing":0, + &file_spec, + "%s failed: %s (%d), actual filename '%s'", + action_name, strerror(errno), errno, fname); + return false; + } } -static void create_dir_for_file(const String& file_spec) { +void create_dir_for_file(const String& file_spec) { size_t pos_after=1; - int pos_before; - while((pos_before=file_spec.pos("/", 1, pos_after))>=0) { - mkdir(file_spec.mid(0, pos_before).cstr(String::UL_FILE_SPEC), 0775); + size_t pos_before; + while((pos_before=file_spec.pos('/', pos_after))!=STRING_NOT_FOUND) { + mkdir(file_spec.mid(0, pos_before).taint_cstr(String::L_FILE_SPEC), 0775); pos_after=pos_before+1; } } -void file_write(Pool& pool, +bool file_write_action_under_lock( const String& file_spec, - const void *data, size_t size, - bool as_text, - bool do_append/*, - bool exclusive*/) { - const char *fname=file_spec.cstr(String::UL_FILE_SPEC); + const char* action_name, + File_write_action action, + void *context, + bool as_text, + bool do_append, + bool do_block, + bool fail_on_lock_problem) { + const char* fname=file_spec.taint_cstr(String::L_FILE_SPEC); int f; if(access(fname, W_OK)!=0) // no - create_dir_for_file(file_spec); + create_dir_for_file(file_spec); if((f=open(fname, O_CREAT|O_RDWR |(as_text?_O_TEXT:_O_BINARY) - |(do_append?O_APPEND:O_TRUNC), 0666))>=0) { - /*if(exclusive) - flock(f, LOCK_EX);*/ + |(do_append?O_APPEND:PA_O_TRUNC), 0664))>=0) { + if((do_block?pa_lock_exclusive_blocking(f):pa_lock_exclusive_nonblocking(f))!=0) { + Exception e("file.lock", + &file_spec, + "shared lock failed: %s (%d), actual filename '%s'", + strerror(errno), errno, fname); + close(f); + if(fail_on_lock_problem) + throw e; + return false; + } + + try { +#if (defined(HAVE_FCHMOD) && defined(PA_SAFE_MODE)) + struct stat finfo; + if(fstat(f, &finfo)==0 && finfo.st_mode & 0111) + fchmod(f, finfo.st_mode & 0666/*clear executable bits*/); // backward: ignore errors if any +#endif + action(f, context); + } catch(...) { +#ifdef HAVE_FTRUNCATE + if(!do_append) + ftruncate(f, lseek(f, 0, SEEK_CUR)); // one can not use O_TRUNC, read lower +#endif + pa_unlock(f);close(f); + rethrow; + } - if(size) write(f, data, size); -#if O_TRUNC==0 - ftruncate(f, size); -#endif - /*if(exclusive) - flock(f, LOCK_UN);*/ - close(f); +#ifdef HAVE_FTRUNCATE + if(!do_append) + ftruncate(f, lseek(f, 0, SEEK_CUR)); // O_TRUNC truncates even exclusevely write-locked file [thanks to Igor Milyakov for discovering] +#endif + pa_unlock(f);close(f); + return true; } else - throw Exception(0, 0, + throw Exception(errno==EACCES?"file.access":0, &file_spec, - "write failed: %s (%d), actual filename '%s'", - strerror(errno), errno, fname); + "%s failed: %s (%d), actual filename '%s'", + action_name, strerror(errno), errno, fname); + // here should be nothing, see rethrow above +} + +#ifndef DOXYGEN +struct File_write_action_info { + const char* str; + size_t length; +}; +#endif + +static void file_write_action(int f, void *context) { + File_write_action_info& info=*static_cast(context); + if(info.length) { + ssize_t written=write(f, info.str, info.length); + if(written<0) + throw Exception("file.write", 0, "write failed: %s (%d)", strerror(errno), errno); + if(written!=info.length) + throw Exception("file.write", 0, "write failed: %u of %u bytes written", written, info.length); + } +} + +void file_write( + Request_charsets& charsets, + const String& file_spec, + const char* data, + size_t size, + bool as_text, + bool do_append, + Charset* asked_charset) { + + if(as_text && asked_charset){ + String::C body=String::C(data, size); + body=Charset::transcode(body, charsets.source(), *asked_charset); + data=body.str; + size=body.length; + }; + + File_write_action_info info={data, size}; + + file_write_action_under_lock( + file_spec, + "write", + file_write_action, + &info, + as_text, + do_append); +} + +static size_t get_dir(char* fname, size_t helper_length){ + bool dir=false; + size_t pos=0; + for(pos=helper_length; pos; pos--){ + char c=fname[pos-1]; + if(c=='/' || c=='\\'){ + fname[pos-1]=0; + dir=true; + } else if(dir) break; + } + return pos; +} + +static bool entry_readable(char* fname, bool need_dir) { + if(need_dir){ + size_t size=strlen(fname); + while(size) { + char c=fname[size-1]; + if(c=='/' || c=='\\') + fname[--size]=0; + else + break; + } + } + + struct stat finfo; + if(access(fname, R_OK)==0 && entry_exists(fname, &finfo)) { + bool is_dir=(finfo.st_mode&S_IFDIR) != 0; + return is_dir==need_dir; + } + return false; +} + +static bool entry_readable(const String& file_spec, bool need_dir) { + return entry_readable(file_spec.taint_cstrm(String::L_FILE_SPEC), need_dir); } // throws nothing! [this is required in file_move & file_delete] -static void rmdir(const String& file_spec, size_t pos_after) { - int pos_before; - if((pos_before=file_spec.pos("/", 1, pos_after))>=0) - rmdir(file_spec, pos_before+1); - - rmdir(file_spec.mid(0, pos_after-1/* / */).cstr(String::UL_FILE_SPEC)); +static void rmdir(const String& file_spec, size_t pos_after=0) { + char* dir_spec=file_spec.taint_cstrm(String::L_FILE_SPEC); + size_t length=strlen(dir_spec); + while( (length=get_dir(dir_spec, length)) && (length > pos_after) ){ +#ifdef WIN32 + if(!entry_readable(dir_spec, true)) + break; + DWORD attrs=GetFileAttributes(dir_spec); + if( + (attrs==INVALID_FILE_ATTRIBUTES) + || !(attrs & FILE_ATTRIBUTE_DIRECTORY) + || (attrs & FILE_ATTRIBUTE_REPARSE_POINT) + ) + break; +#endif + if( rmdir(dir_spec) ) + break; + }; } -void file_delete(Pool& pool, const String& file_spec) { - const char *fname=file_spec.cstr(String::UL_FILE_SPEC); + +bool file_delete(const String& file_spec, bool fail_on_problem, bool keep_empty_dirs) { + const char* fname=file_spec.taint_cstr(String::L_FILE_SPEC); if(unlink(fname)!=0) - throw Exception(0, 0, - &file_spec, - "unlink failed: %s (%d), actual filename '%s'", - strerror(errno), errno, fname); + if(fail_on_problem) + throw Exception(errno==EACCES?"file.access":errno==ENOENT?"file.missing":0, + &file_spec, + "unlink failed: %s (%d), actual filename '%s'", + strerror(errno), errno, fname); + else + return false; - rmdir(file_spec, 1); + if(!keep_empty_dirs) + rmdir(file_spec, 1); + + return true; } -void file_move(Pool& pool, const String& old_spec, const String& new_spec) { - const char *old_spec_cstr=old_spec.cstr(String::UL_FILE_SPEC); - const char *new_spec_cstr=new_spec.cstr(String::UL_FILE_SPEC); + +void file_move(const String& old_spec, const String& new_spec, bool keep_empty_dirs) { + const char* old_spec_cstr=old_spec.taint_cstr(String::L_FILE_SPEC); + const char* new_spec_cstr=new_spec.taint_cstr(String::L_FILE_SPEC); - create_dir_for_file(new_spec); + create_dir_for_file(new_spec); if(rename(old_spec_cstr, new_spec_cstr)!=0) - throw Exception(0, 0, + throw Exception(errno==EACCES?"file.access":errno==ENOENT?"file.missing":0, &old_spec, "rename failed: %s (%d), actual filename '%s' to '%s'", strerror(errno), errno, old_spec_cstr, new_spec_cstr); - rmdir(old_spec, 1); + if(!keep_empty_dirs) + rmdir(old_spec, 1); } -static bool entry_readable(const String& file_spec, bool need_dir) { - const char *fname=file_spec.cstr(String::UL_FILE_SPEC); - struct stat finfo; - if(access(fname, R_OK)==0 && stat(fname, &finfo)==0) { - bool is_dir=finfo.st_mode&S_IFDIR != 0; - return is_dir==need_dir; - } - return false; +bool entry_exists(const char* fname, struct stat *afinfo) { + struct stat lfinfo; + bool result=stat(fname, &lfinfo)==0; + if(afinfo) + *afinfo=lfinfo; + return result; } -bool file_readable(const String& file_spec) { - return entry_readable(file_spec, false); + +bool entry_exists(const String& file_spec) { + const char* fname=file_spec.taint_cstr(String::L_FILE_SPEC); + return entry_exists(fname, 0); } -bool dir_readable(const String& file_spec) { - return entry_readable(file_spec, true); + +bool file_exist(const String& file_spec) { + return entry_readable(file_spec, false); +} + +bool dir_exists(const String& file_spec) { + return entry_readable(file_spec, true); } -String *file_readable(const String& path, const String& name) { - String *result=new(path.pool()) String(path); - *result << "/"; - *result << name; - return file_readable(*result)?result:0; + +const String* file_exist(const String& path, const String& name) { + String& result=*new String(path); + if(path.last_char() != '/') + result << "/"; + result << name; + return file_exist(result)?&result:0; } + bool file_executable(const String& file_spec) { - return access(file_spec.cstr(String::UL_FILE_SPEC), X_OK)==0; + return access(file_spec.taint_cstr(String::L_FILE_SPEC), X_OK)==0; } bool file_stat(const String& file_spec, - size_t& rsize, - time_t& ratime, - time_t& rmtime, - time_t& rctime, - bool fail_on_read_problem) { - Pool& pool=file_spec.pool(); - const char *fname=file_spec.cstr(String::UL_FILE_SPEC); - struct stat finfo; + size_t& rsize, + time_t& ratime, + time_t& rmtime, + time_t& rctime, + bool fail_on_read_problem) { + const char* fname=file_spec.taint_cstr(String::L_FILE_SPEC); + struct stat finfo; if(stat(fname, &finfo)!=0) if(fail_on_read_problem) - throw Exception(0, 0, + throw Exception("file.missing", &file_spec, "getting file size failed: %s (%d), real filename '%s'", strerror(errno), errno, fname); @@ -270,219 +583,799 @@ bool file_stat(const String& file_spec, return true; } -char *getrow(char **row_ref, char delim) { - char *result=*row_ref; - if(result) { - *row_ref=strchr(result, delim); +char* getrow(char* *row_ref, char delim) { + char* result=*row_ref; + if(result) { + *row_ref=strchr(result, delim); if(*row_ref) *((*row_ref)++)=0; else if(!*result) return 0; - } - return result; + } + return result; } -char *lsplit(char *string, char delim) { - if(string) { - char *v=strchr(string, delim); +char* lsplit(char* string, char delim) { + if(string) { + char* v=strchr(string, delim); if(v) { *v=0; return v+1; } - } - return 0; + } + return 0; } -char *lsplit(char **string_ref, char delim) { - char *result=*string_ref; - char *next=lsplit(*string_ref, delim); - *string_ref=next; - return result; +char* lsplit(char* *string_ref, char delim) { + char* result=*string_ref; + char* next=lsplit(*string_ref, delim); + *string_ref=next; + return result; } -char *rsplit(char *string, char delim) { - if(string) { - char *v=strrchr(string, delim); +char* rsplit(char* string, char delim) { + if(string) { + char* v=strrchr(string, delim); if(v) { *v=0; return v+1; } - } - return NULL; + } + return NULL; } -/// @todo less stupid type detection -char *format(Pool& pool, double value, char *fmt) { - char *result=(char *)pool.malloc(MAX_NUMBER, 4); - if(fmt) - if(strpbrk(fmt, "diouxX")) - if(strpbrk(fmt, "ouxX")) - snprintf(result, MAX_NUMBER, fmt, (uint)value ); - else - snprintf(result, MAX_NUMBER, fmt, (int)value ); - else - snprintf(result, MAX_NUMBER, fmt, value); - else - snprintf(result, MAX_NUMBER, "%d", (int)value); - + +// format: %[flags][width][.precision]type http://msdn.microsoft.com/ru-ru/library/56e442dc(en-us,VS.80).aspx +// flags: '-', '+', ' ', '#', '0' http://msdn.microsoft.com/ru-ru/library/8aky45ct(en-us,VS.80).aspx +// width, precision: non negative decimal number +enum FormatType { + FormatInvalid, + FormatInt, + FormatUInt, + FormatDouble +}; +FormatType format_type(char* fmt){ + enum FormatState { + Percent, + Flags, + Width, + Precision, + Done + } state=Percent; + + FormatType result=FormatInvalid; + + char* pos=fmt; + while(char c=*(pos++)){ + switch(state){ + case Percent: + if(c=='%'){ + state=Flags; + } else { + return FormatInvalid; // 1st char must be '%' only + } + break; + case Flags: + if(strchr("-+ #0", c)!=0){ + break; + } + // go to the next step + case Width: + if(c=='.'){ + state=Precision; + break; + } + // go to the next step + case Precision: + if(c>='0' && c<='9'){ + if(state == Flags) state=Width; // no more flags + break; + } else if(c=='d' || c=='i'){ + result=FormatInt; + } else if(strchr("feEgG", c)!=0){ + result=FormatDouble; + } else if(strchr("uoxX", c)!=0){ + result=FormatUInt; + } else { + return FormatInvalid; // invalid char + } + state=Done; + break; + case Done: + return FormatInvalid; // no chars allowed after 'type' + } + } return result; } + +const char* format(double value, char* fmt) { + char local_buf[MAX_NUMBER]; + int size=-1; + + if(fmt && strlen(fmt)){ + switch(format_type(fmt)){ + case FormatDouble: + size=snprintf(local_buf, sizeof(local_buf), fmt, value); + break; + case FormatInt: + size=snprintf(local_buf, sizeof(local_buf), fmt, (int)value); + break; + case FormatUInt: + size=snprintf(local_buf, sizeof(local_buf), fmt, (uint)value); + break; + case FormatInvalid: + throw Exception(PARSER_RUNTIME, + 0, + "Incorrect format string '%s' was specified.", fmt); + } + } else + size=snprintf(local_buf, sizeof(local_buf), "%d", (int)value); + + if(size < 0 || size >= MAX_NUMBER-1){ // on win32 we manually reduce max size while printing + throw Exception(PARSER_RUNTIME, + 0, + "Error occure white executing snprintf with format string '%s'.", fmt); + } + + return pa_strdup(local_buf, (size_t)size); +} + size_t stdout_write(const void *buf, size_t size) { #ifdef WIN32 + size_t to_write = size; do{ - int chunk_written=fwrite(buf, 1, min(8*0x400, size), stdout); + int chunk_written=fwrite(buf, 1, min((size_t)8*0x400, size), stdout); if(chunk_written<=0) break; size-=chunk_written; buf=((const char*)buf)+chunk_written; - } while(size>0); + } while(size>0); - return size; + return to_write-size; #else - return fwrite(buf, 1, size, stdout); + return fwrite(buf, 1, size, stdout); #endif } -char *unescape_chars(Pool& pool, const char *cp, int len) { - char *s=(char *)pool.malloc(len + 1, 5); - enum EscapeState { - EscapeRest, - EscapeFirst, - EscapeSecond - } escapeState=EscapeRest; - int escapedValue=0; +enum EscapeState { + EscapeRest, + EscapeFirst, + EscapeSecond, + EscapeUnicode +}; + +// @todo prescan for reduce required size (unescaped sting in 1 byte charset requires less memory usually) +char* unescape_chars(const char* cp, int len, Charset* charset, bool js){ + char* s=new(PointerFreeGC) char[len+1]; // must be enough (%uXXXX==6 bytes, max utf-8 char length==6 bytes) + char* dst=s; + EscapeState escapeState=EscapeRest; + uint escapedValue=0; int srcPos=0; - int dstPos=0; - while(srcPos < len) { - int ch=cp[srcPos]; - switch(escapeState) { - case EscapeRest: - if(ch=='%') { - escapeState=EscapeFirst; - } else if(ch=='+') { - s[dstPos++]=' '; - } else { - s[dstPos++]=ch; + short int jsCnt=0; + while(srcPosstore_Char((XMLByte*&)dst, (XMLCh)escapedValue, '?'); + escapeState=EscapeRest; + } + } else { + // not full unicode value + escapeState=EscapeRest; + } + break; } - break; - case EscapeFirst: - escapedValue=hex_value[ch] << 4; - escapeState=EscapeSecond; - break; - case EscapeSecond: - escapedValue +=hex_value[ch]; - s[dstPos++]=escapedValue; - escapeState=EscapeRest; - break; } + srcPos++; } - s[dstPos]=0; + + *dst=0; // zero-termination return s; } -/// used by attributed_meaning_to_string / append_attribute_subattribute -struct Attributed_meaning_info { - String *header; // header line being constructed - String::Untaint_lang lang; // language in which to append to that line -}; -static void append_attribute_subattribute(const Hash::Key& akey, Hash::Val *avalue, - void *info) { - if(akey==VALUE_NAME) - return; - - Attributed_meaning_info& ami=*static_cast(info); - - // ...; charset=windows1251 - *ami.header << "; "; - ami.header->append(akey, ami.lang); - *ami.header << "="; - ami.header->append(static_cast(avalue)->as_string(), ami.lang); -} -const String& attributed_meaning_to_string(Value& meaning, - String::Untaint_lang lang) { - String &result=*new(meaning.pool()) String(meaning.pool()); - if(Hash *hash=meaning.get_hash(0)) { - // $value(value) $subattribute(subattribute value) - if(Value *value=static_cast(hash->get(*value_name))) - result.append(value->as_string(), lang, true); - - Attributed_meaning_info attributed_meaning_info={ - &result, - lang - }; - hash->for_each(append_attribute_subattribute, &attributed_meaning_info); - } else // result value - result.append(meaning.as_string(), lang, true); - +char *search_stop(char*& current, char cstop_at) { + // sanity check + if(!current) + return 0; + + // skip leading WS + while(*current==' ' || *current=='\t') + current++; + if(!*current) + return current=0; + + char *result=current; + if(char *pstop_at=strchr(current, cstop_at)) { + *pstop_at=0; + current=pstop_at+1; + } else + current=0; return result; } #ifdef WIN32 -void back_slashes_to_slashes(char *s) { +void back_slashes_to_slashes(char* s) { if(s) for(; *s; s++) if(*s=='\\') - *s='/'; + *s='/'; } /* -void slashes_to_back_slashes(char *s) { +void slashes_to_back_slashes(char* s) { if(s) for(; *s; s++) if(*s=='/') - *s='\\'; + *s='\\'; } */ #endif -bool StrEqNc(const char *s1, const char *s2, bool strict) { +bool StrStartFromNC(const char* str, const char* substr, bool equal){ while(true) { - if(!(*s1)) { - if(!(*s2)) + if(!(*substr)){ + if(!(*str)) return true; else - return !strict; - } else if(!(*s2)) - return !strict; - if(isalpha(*s1)) { - if(tolower(*s1) !=tolower(*s2)) + return !equal; + } + if(!(*str)) + return false; + if(isalpha((unsigned char)*str)) { + if(tolower((unsigned char)*str)!=tolower((unsigned char)*substr)) return false; - } else if((*s1) !=(*s2)) + } else if((*str) != (*substr)) return false; - s1++; - s2++; + str++; + substr++; + } +} + +size_t strpos(const char *str, const char *substr) { + const char *p = strstr(str, substr); + return (p==0)?STRING_NOT_FOUND:p-str; +} + +// content-type: xxx; charset=WE-NEED-THIS +// content-type: xxx; charset="WE-NEED-THIS" +// content-type: xxx; charset="WE-NEED-THIS"; +Charset* detect_charset(const char* content_type){ + if(content_type){ + char* CONTENT_TYPE=pa_strdup(content_type); + + for(char *p=CONTENT_TYPE; *p; p++) + *p=(char)toupper((unsigned char)*p); + + if(const char* begin=strstr(CONTENT_TYPE, "CHARSET=")){ + begin+=8; // skip "CHARSET=" + char* end=0; + if(*begin && (*begin=='"' || *begin =='\'')){ + char quote=*begin; + begin++; + end=(char*)strchr(begin, quote); + } + if(!end) + end=(char*)strchr(begin, ';'); + + if(end) + *end=0; // terminator + + return *begin?&charsets.get(begin):0; + } } + return 0; } + static bool isLeap(int year) { - return !( - (year % 4) || ((year % 400) && !(year % 100)) - ); + return !( + (year % 4) || ((year % 400) && !(year % 100)) + ); } int getMonthDays(int year, int month) { - int monthDays[]={ - 31, - isLeap(year) ? 29 : 28, - 31, - 30, - 31, - 30, - 31, - 31, - 30, - 31, - 30, - 31 - }; - return monthDays[month]; -} - -void remove_crlf(char *start, char *end) { - for(char *p=start; ps) + r=s; +#endif + b[r]=0; + return r; +} + +int __snprintf(char* b, size_t s, const char* f, ...) { + va_list l; + va_start(l, f); + int r=__vsnprintf(b, s, f, l); + va_end(l); + return r; +} + +/* mime64 functions are from libgmime[http://spruce.sourceforge.net/gmime/] lib */ +/* + * Authors: Michael Zucchi + * Jeffrey Stedfast + * + * Copyright 2000 Helix Code, Inc. (www.helixcode.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA. + * + */ +static const char *base64_alphabet = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +/** + * g_mime_utils_base64_encode_step: + * @in: input stream + * @inlen: length of the input + * @out: output string + * @state: holds the number of bits that are stored in @save + * @save: leftover bits that have not yet been encoded + * + * Base64 encodes a chunk of data. Performs an 'encode step', only + * encodes blocks of 3 characters to the output at a time, saves + * left-over state in state and save (initialise to 0 on first + * invocation). + * + * Returns the number of bytes encoded. + **/ + +#define BASE64_GROUPS_IN_LINE 19 + +static size_t +g_mime_utils_base64_encode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save) +{ + register const unsigned char *inptr; + register unsigned char *outptr; + + if (inlen <= 0) + return 0; + + inptr = in; + outptr = out; + + if (inlen + ((unsigned char *)save)[0] > 2) { + const unsigned char *inend = in + inlen - 2; + register int c1 = 0, c2 = 0, c3 = 0; + register int already; + + already = *state; + + switch (((char *)save)[0]) { + case 1: c1 = ((unsigned char *)save)[1]; goto skip1; + case 2: c1 = ((unsigned char *)save)[1]; + c2 = ((unsigned char *)save)[2]; goto skip2; + } + + /* yes, we jump into the loop, no i'm not going to change it, its beautiful! */ + while (inptr < inend) { + c1 = *inptr++; + skip1: + c2 = *inptr++; + skip2: + c3 = *inptr++; + *outptr++ = base64_alphabet [c1 >> 2]; + *outptr++ = base64_alphabet [(c2 >> 4) | ((c1 & 0x3) << 4)]; + *outptr++ = base64_alphabet [((c2 & 0x0f) << 2) | (c3 >> 6)]; + *outptr++ = base64_alphabet [c3 & 0x3f]; + /* this is a bit ugly ... */ + if ((++already) >= BASE64_GROUPS_IN_LINE) { + *outptr++ = '\n'; + already = 0; + } + } + + ((unsigned char *)save)[0] = 0; + inlen = 2 - (inptr - inend); + *state = already; + } + + //d(printf ("state = %d, inlen = %d\n", (int)((char *)save)[0], inlen)); + + if (inlen > 0) { + register char *saveout; + + /* points to the slot for the next char to save */ + saveout = & (((char *)save)[1]) + ((char *)save)[0]; + + /* inlen can only be 0 1 or 2 */ + switch (inlen) { + case 2: *saveout++ = *inptr++; + case 1: *saveout++ = *inptr++; } + *(char *)save = *(char *)save+(char)inlen; + } + + /*d(printf ("mode = %d\nc1 = %c\nc2 = %c\n", + (int)((char *)save)[0], + (int)((char *)save)[1], + (int)((char *)save)[2]));*/ + + return (outptr - out); +} + +/** + * g_mime_utils_base64_encode_close: + * @in: input stream + * @inlen: length of the input + * @out: output string + * @state: holds the number of bits that are stored in @save + * @save: leftover bits that have not yet been encoded + * + * Base64 encodes the input stream to the output stream. Call this + * when finished encoding data with g_mime_utils_base64_encode_step to + * flush off the last little bit. + * + * Returns the number of bytes encoded. + **/ +static size_t +g_mime_utils_base64_encode_close (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save) +{ + unsigned char *outptr = out; + int c1, c2; + + if (inlen > 0) + outptr += g_mime_utils_base64_encode_step (in, inlen, outptr, state, save); + + c1 = ((unsigned char *)save)[1]; + c2 = ((unsigned char *)save)[2]; + + switch (((unsigned char *)save)[0]) { + case 2: + outptr[2] = base64_alphabet [(c2 & 0x0f) << 2]; + goto skip; + case 1: + outptr[2] = '='; + skip: + outptr[0] = base64_alphabet [c1 >> 2]; + outptr[1] = base64_alphabet [c2 >> 4 | ((c1 & 0x3) << 4)]; + outptr[3] = '='; + outptr += 4; + break; + } + + *outptr++ = 0; + + *save = 0; + *state = 0; + + return (outptr - out); } + +static unsigned char gmime_base64_rank[256] = { + 255,255,255,255,255,255,255,255,255,254,254,255,255,254,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 254,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255, 0,255,255, + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255, + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +}; + +/** + * g_mime_utils_base64_decode_step: + * @in: input stream + * @inlen: max length of data to decode + * @out: output stream + * @state: holds the number of bits that are stored in @save + * @save: leftover bits that have not yet been decoded + * @strict: only base64 and whitespace chars are allowed + * + * Decodes a chunk of base64 encoded data. + * + * Returns the number of bytes decoded (which have been dumped in @out). + **/ +size_t +g_mime_utils_base64_decode_step(const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save, bool strict=false) +{ + const unsigned char *inptr; + unsigned char *outptr; + const unsigned char *inend; + int saved; + unsigned char c; + int i; + + inend = in + inlen; + outptr = out; + + /* convert 4 base64 bytes to 3 normal bytes */ + saved = *save; + i = *state; + inptr = in; + while (inptr < inend) { + c = gmime_base64_rank[*inptr++]; + switch(c) { + case 0xff: // non-base64 and non-whitespace chars. not allowed in strict mode + if(strict) + throw Exception(BASE64_FORMAT, 0, "Invalid base64 char on position %d is detected", inptr-in-1); + case 0xfe: // whitespace chars 0x09, 0x0A, 0x0D, 0x20 are allowed in any mode + break; + default: + saved = (saved << 6) | c; + i++; + if (i == 4) { + *outptr++ = (unsigned char)(saved >> 16); + *outptr++ = (unsigned char)(saved >> 8); + *outptr++ = (unsigned char)(saved); + i = 0; + } + } + } + + *save = saved; + *state = i; + + /* quick scan back for '=' on the end somewhere */ + /* fortunately we can drop 1 output char for each trailing = (upto 2) */ + i = 2; + while (inptr > in && i) { + inptr--; + if (gmime_base64_rank[*inptr] <= 0xfe) { + if (*inptr == '=' && outptr > out) + outptr--; + i--; + } + } + + /* if i != 0 then there is a truncation error! */ + return (outptr - out); +} + + +char* pa_base64_encode(const char *in, size_t in_size){ + size_t new_size = ((in_size / 3 + 1) * 4); + new_size += new_size / (BASE64_GROUPS_IN_LINE * 4)/*new lines*/ + 1/*zero terminator*/; + char* result = new(PointerFreeGC) char[new_size]; + int state=0; + int save=0; +#ifndef NDEBUG + size_t filled= +#endif + g_mime_utils_base64_encode_close ((const unsigned char*)in, in_size, (unsigned char*)result, &state, &save); + + //throw Exception(PARSER_RUNTIME, 0, "%d %d %d", in_size, new_size, filled); + assert(filled <= new_size); + + return result; +} + + +char* pa_base64_encode(const String& file_spec){ + unsigned char* base64=0; + File_base64_action_info info={&base64}; + + file_read_action_under_lock(file_spec, + "pa_base64_encode", file_base64_file_action, &info); + + return (char*)base64; +} + + +static void file_base64_file_action( + struct stat& finfo, + int f, + const String&, const char* /*fname*/, bool, + void *context) { + + if(finfo.st_size) { + File_base64_action_info& info=*static_cast(context); + *info.base64=new(PointerFreeGC) unsigned char[finfo.st_size * 2 + 6]; + unsigned char* base64 = *info.base64; + int state=0; + int save=0; + int nCount; + do { + unsigned char buffer[FILE_BUFFER_SIZE]; + nCount = file_block_read(f, buffer, sizeof(buffer)); + if( nCount ){ + size_t filled=g_mime_utils_base64_encode_step ((const unsigned char*)buffer, nCount, base64, &state, &save); + base64+=filled; + } + } while(nCount > 0); + g_mime_utils_base64_encode_close (0, 0, base64, &state, &save); + } +} + +void pa_base64_decode(const char *in, size_t in_size, char*& result, size_t& result_size, bool strict) { + // every 4 base64 bytes are converted into 3 normal bytes + // not full set (tail) of 4-bytes set is ignored + size_t new_size=in_size/4*3; + result=new(PointerFreeGC) char[new_size+1/*terminator*/]; + + int state=0; + int save=0; + result_size= + g_mime_utils_base64_decode_step ((const unsigned char*)in, in_size, + (unsigned char*)result, &state, &save, strict); + assert(result_size <= new_size); + result[result_size]=0; // for text files + + if(strict && state!=0) + throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars"); +} + + +int file_block_read(const int f, unsigned char* buffer, const size_t size){ + int nCount = read(f, buffer, size); + if (nCount < 0) + throw Exception("file.read", + 0, + "read failed: %s (%d)", strerror(errno), errno); + return nCount; +} + +const unsigned long pa_crc32(const char *in, size_t in_size){ + unsigned long crc32=0xFFFFFFFF; + + InitCrc32Table(); + for(size_t i = 0; i(context); + if(finfo.st_size) { + InitCrc32Table(); + int nCount=0; + do { + unsigned char buffer[FILE_BUFFER_SIZE]; + nCount = file_block_read(f, buffer, sizeof(buffer)); + for(int i = 0; i < nCount; i++) CalcCrc32(buffer[i], crc32); + } while(nCount > 0); + } +} +