--- parser3/src/main/pa_common.C 2004/09/09 13:57:27 1.194 +++ parser3/src/main/pa_common.C 2024/09/11 21:07:36 1.331 @@ -1,50 +1,35 @@ /** @file Parser: commonly functions. - Copyright(c) 2001-2004 ArtLebedev Group (http://www.artlebedev.com) - Author: Alexandr Petrosian (http://paf.design.ru) + Copyright (c) 2000-2023 Art. Lebedev Studio (http://www.artlebedev.com) + Authors: Konstantin Morshnev , Alexandr Petrosian */ -static const char * const IDENT_COMMON_C="$Date: 2004/09/09 13:57:27 $"; - #include "pa_common.h" #include "pa_exception.h" #include "pa_hash.h" #include "pa_globals.h" -#include "pa_request_charsets.h" #include "pa_charsets.h" +#include "pa_http.h" +#include "pa_request_charsets.h" +#include "pa_request.h" -#define PA_HTTP +#include "pa_idna.h" +#include "pa_convert_utf.h" -#ifdef PA_HTTP -#include "pa_vstring.h" -#include "pa_vint.h" -#include "pa_vhash.h" -#include "pa_vtable.h" - -#ifdef CYGWIN -#define _GNU_H_WINDOWS32_SOCKETS -// for PASCAL +#ifdef _MSC_VER #include -// SOCKET -typedef u_int SOCKET; -int PASCAL closesocket(SOCKET); -#else -# if defined(WIN32) -# include -# else -# define closesocket close -# endif +#include #endif +#ifdef _MSC_VER +#define pa_mkdir(path, mode) _mkdir(path) #else - -# if defined(WIN32) -# include -# endif - +#define pa_mkdir(path, mode) mkdir(path, mode) #endif +volatile const char * IDENT_PA_COMMON_C="$Id: pa_common.C,v 1.331 2024/09/11 21:07:36 moko Exp $" IDENT_PA_COMMON_H IDENT_PA_HASH_H IDENT_PA_ARRAY_H IDENT_PA_STACK_H; + // some maybe-undefined constants #ifndef _O_TEXT @@ -64,10 +49,6 @@ int PASCAL closesocket(SOCKET); # endif #endif -# ifndef INADDR_NONE -# define INADDR_NONE ((ulong) -1) -# endif - // defines for globals #define FILE_STATUS_NAME "status" @@ -76,698 +57,192 @@ int PASCAL closesocket(SOCKET); const String file_status_name(FILE_STATUS_NAME); -// defines +String sql_bind_name(SQL_BIND_NAME); +String sql_limit_name(PA_SQL_LIMIT_NAME); +String sql_offset_name(PA_SQL_OFFSET_NAME); +String sql_default_name(SQL_DEFAULT_NAME); +String sql_distinct_name(SQL_DISTINCT_NAME); +String sql_value_type_name(SQL_VALUE_TYPE_NAME); -#define HTTP_METHOD_NAME "method" -#define HTTP_FORM_NAME "form" -#define HTTP_BODY_NAME "body" -#define HTTP_TIMEOUT_NAME "timeout" -#define HTTP_HEADERS_NAME "headers" -#define HTTP_ANY_STATUS_NAME "any-status" -#define HTTP_CHARSET_NAME "charset" -#define HTTP_TABLES_NAME "tables" -#define HTTP_USER "user" -#define HTTP_PASSWORD "password" +// forwards -// defines - -#define DEFAULT_USER_AGENT "parser3" +const UTF16* pa_utf16_encode(const char* in, Charset& source_charset); // functions -void fix_line_breaks(char *str, size_t& length) { - //_asm int 3; - const char* const eob=str+length; - char* dest=str; - // fix DOS: \r\n -> \n - // fix Macintosh: \r -> \n - char* bol=str; - while(char* eol=(char*)memchr(bol, '\r', eob -bol)) { - size_t len=eol-bol; - if(dest!=bol) - memcpy(dest, bol, len); - dest+=len; - *dest++='\n'; - - if(&eol[1]sin_family=AF_INET; - addr->sin_port=htons(port); - if(host) { - ulong packed_ip=inet_addr(host); - if(packed_ip!=INADDR_NONE) - memcpy(&addr->sin_addr, &packed_ip, sizeof(packed_ip)); - else { - struct hostent *hostIP=gethostbyname(host); - if(hostIP) - memcpy(&addr->sin_addr, hostIP->h_addr, hostIP->h_length); - else - return false; - } - } else - addr->sin_addr.s_addr=INADDR_ANY; - return true; +int pa_stat(const char *pathname, struct stat *buffer){ + const UTF16* utf16name=pa_utf16_encode(pathname, pa_thread_request().charsets.source()); + return _wstat64((const wchar_t *)utf16name, buffer); } -static int http_read_response(char*& response, size_t& response_size, int sock, bool fail_on_status_ne_200){ - response=(char*)pa_malloc_atomic(1/*terminator*/); // setting memory block type - response[(response_size=0)]=0; - int result=0; - char* EOLat=0; - while(true) { - char buf[MAX_STRING*10]; - ssize_t received_size=recv(sock, buf, sizeof(buf), 0); - if(received_size<=0) - break; - response=(char*)pa_realloc(response, response_size+received_size+1/*terminator*/); - memcpy(response+response_size, buf, received_size); - response_size+=received_size; - response[response_size]=0; - - if(!result && (EOLat=strstr(response, "\n"))) { // checking status in first response - const String status_line(pa_strdup(response, EOLat-response)); - ArrayString astatus; - size_t pos_after=0; - status_line.split(astatus, pos_after, " "); - const String& status_code=*astatus.get(astatus.count()>1?1:0); - result=status_code.as_int(); - - if(fail_on_status_ne_200 && result!=200) - throw Exception("http.status", - &status_code, - "invalid HTTP response status"); - } - } - if(result) - return result; - else - throw Exception("http.response", - 0, - "bad response from host - no status found (size=%u)", response_size); +int pa_open(const char *pathname, int flags, int mode){ + const UTF16* utf16name=pa_utf16_encode(pathname, pa_thread_request().charsets.source()); + return _wopen((const wchar_t *)utf16name, flags, mode); } -/* ********************** request *************************** */ - -#if defined(SIGALRM) && defined(HAVE_SIGSETJMP) && defined(HAVE_SIGLONGJMP) -# define PA_USE_ALARM -#endif - -#ifdef PA_USE_ALARM -static sigjmp_buf timeout_env; -static void timeout_handler(int sig){ - siglongjmp(timeout_env, 1); +FILE *pa_fopen(const char *pathname, const char *mode){ + const UTF16* utf16name=pa_utf16_encode(pathname, pa_thread_request().charsets.source()); + const UTF16* utf16mode=pa_utf16_encode(mode, pa_thread_request().charsets.source()); + return _wfopen((const wchar_t *)utf16name, (const wchar_t *)utf16mode); } -#endif -static int http_request(char*& response, size_t& response_size, - const char* host, short port, - const char* request, - int -#ifdef PA_USE_ALARM - timeout #endif - , - bool fail_on_status_ne_200) { - if(!host) - throw Exception("http.host", - 0, - "zero hostname"); //never -#ifdef PA_USE_ALARM - signal(SIGALRM, timeout_handler); -#endif - int sock=-1; -#ifdef PA_USE_ALARM - if(sigsetjmp(timeout_env, 1)) { - // stupid gcc [2.95.4] generated bad code - // which failed to handle sigsetjmp+throw: crashed inside of pre-throw code. - // rewritten simplier [though duplicating closesocket code] - if(sock>=0) - closesocket(sock); - throw Exception("http.timeout", - origin_string, - "timeout occured while retrieving document"); - return 0; // never - } else { - alarm(timeout); -#endif - try { - int result; - struct sockaddr_in dest; - - if(!set_addr(&dest, host, port)) - throw Exception("http.host", - 0, - "can not resolve hostname \"%s\"", host); - - if((sock=socket(AF_INET, SOCK_STREAM, IPPROTO_TCP/*0*/))<0) - throw Exception("http.connect", - 0, - "can not make socket: %s (%d)", strerror(errno), errno); - if(connect(sock, (struct sockaddr *)&dest, sizeof(dest))) - throw Exception("http.connect", - 0, - "can not connect to host \"%s\": %s (%d)", host, strerror(errno), errno); - size_t request_size=strlen(request); - if(send(sock, request, request_size, 0)!=(ssize_t)request_size) - throw Exception("http.connect", - 0, - "error sending request: %s (%d)", strerror(errno), errno); - - result=http_read_response(response, response_size, sock, fail_on_status_ne_200); - closesocket(sock); -#ifdef PA_USE_ALARM - alarm(0); -#endif - return result; - } catch(...) { -#ifdef PA_USE_ALARM - alarm(0); -#endif - if(sock>=0) - closesocket(sock); - rethrow; - } -#ifdef PA_USE_ALARM - } -#endif +/// these options were handled but not checked elsewhere, now check them +int pa_get_valid_file_options_count(HashStringValue& options) { + int result=0; + if(options.get(PA_SQL_LIMIT_NAME)) + result++; + if(options.get(PA_SQL_OFFSET_NAME)) + result++; + if(options.get(PA_COLUMN_SEPARATOR_NAME)) + result++; + if(options.get(PA_COLUMN_ENCLOSER_NAME)) + result++; + if(options.get(PA_CHARSET_NAME)) + result++; + return result; } #ifndef DOXYGEN -struct Http_pass_header_info { - Request_charsets* charsets; - String* request; - bool user_agent_specified; -}; +struct File_read_action_info { + char **data; size_t *data_size; + char* buf; uint64_t offset; size_t limit; +}; #endif -static void http_pass_header(HashStringValue::key_type key, - HashStringValue::value_type value, - Http_pass_header_info *info) { - *info->request <charsets->source(), String::CC_UPPER)=="USER-AGENT") - info->user_agent_specified=true; -} - -static Charset* detect_charset(Charset& source_charset, const String& content_type_value) { - const String::Body CONTENT_TYPE_VALUE= - content_type_value.change_case(source_charset, String::CC_UPPER); - // content-type: xxx/xxx; source_charset=WE-NEED-THIS - // content-type: xxx/xxx; source_charset="WE-NEED-THIS" - // content-type: xxx/xxx; source_charset="WE-NEED-THIS"; - size_t before_charseteq_pos=CONTENT_TYPE_VALUE.pos("CHARSET="); - if(before_charseteq_pos!=STRING_NOT_FOUND) { - size_t charset_begin=before_charseteq_pos+8/*CHARSET="*/; - size_t open_quote_pos=CONTENT_TYPE_VALUE.pos('"', charset_begin); - bool quoted=open_quote_pos==charset_begin; - if(quoted) - charset_begin++; // skip opening '"' - size_t charset_end=CONTENT_TYPE_VALUE.length(); - if(quoted) { - size_t close_quote_pos=CONTENT_TYPE_VALUE.pos('"', charset_begin); - if(close_quote_pos!=STRING_NOT_FOUND) - charset_end=close_quote_pos; - } else { - size_t delim_pos=CONTENT_TYPE_VALUE.pos(';', charset_begin); - if(delim_pos!=STRING_NOT_FOUND) - charset_end=delim_pos; - } - const String::Body CHARSET_NAME_BODY= - CONTENT_TYPE_VALUE.mid(charset_begin, charset_end); - - return &charsets.get(CHARSET_NAME_BODY); +static void file_read_action(struct stat& finfo, int f, const String& file_spec, void *context) { + File_read_action_info& info = *static_cast(context); + size_t to_read_size = check_file_size(info.limit && info.limit < (size_t)finfo.st_size ? info.limit : (size_t)finfo.st_size, &file_spec); + if(to_read_size) { + if(info.offset) + pa_lseek(f, info.offset, SEEK_SET); // seek never fails as POSIX allows the file offset to be set beyond the EOF + *info.data = info.buf ? info.buf : (char *)pa_malloc_atomic(to_read_size+1); + ssize_t result = read(f, *info.data, to_read_size); + if(result<0) + throw Exception("file.read", &file_spec, "read failed: %s (%d)", strerror(errno), errno); + *info.data_size = result; + } else { // empty file + // for both, text and binary: for text we need that terminator, for binary we need nonzero pointer to be able to save such files + *info.data = (char *)pa_malloc_atomic(1); + *(char*)(*info.data) = 0; + *info.data_size = 0; + return; } - - return 0; } -static const String* basic_authorization_field(const char* user, const char* pass) { - if(!user&& !pass) - return 0; +File_read_result file_read_binary(const String& file_spec, bool fail_on_read_problem, char* buf, uint64_t offset, size_t limit) { + File_read_result result = {false, 0, 0, 0}; + File_read_action_info info = {&result.str, &result.length, buf, offset, limit}; - String combined; - if(user) - combined<key, *row->get(0), info->result); -} -static void form_value2string( - HashStringValue::key_type key, - HashStringValue::value_type value, - String* result) -{ - if(const String* svalue=value->get_string()) - form_string_value2string(key, *svalue, *result); - else if(Table* tvalue=value->get_table()) { - Form_table_value2string_info info(key, *result); - tvalue->for_each(form_table_value2string, &info); - } else - throw Exception(0, - new String(key, String::L_TAINTED), - "is %s, "HTTP_FORM_NAME" option value must either string or table", value->type()); -} -static const char* form2string(HashStringValue& form) { - String string; - form.for_each(form_value2string, &string); - return string.cstr(String::L_UNSPECIFIED); -} -#ifndef DOXYGEN -struct File_read_http_result { - char *str; size_t length; - HashStringValue* headers; -}; -#endif -static void find_headers_end(char* p, - char*& headers_end_at, - char*& raw_body) -{ - raw_body=p; - // \n\n - // \r\n\r\n - while((p=strchr(p, '\n'))) { - headers_end_at=++p; // \n>.< - if(*p=='\r') // \r\n>\r?<\n - p++; - if(*p=='\n') { // \r\n\r>\n?< - raw_body=p+1; - return; - } - } - headers_end_at=0; -} - -/// @todo build .cookies field. use ^file.tables.SET-COOKIES.menu{ for now -static File_read_http_result file_read_http(Request_charsets& charsets, - const String& file_spec, - bool as_text, - HashStringValue *options=0) { - File_read_http_result result; - char host[MAX_STRING]; - const char* uri; - short port; - const char* method="GET"; bool method_is_get; - HashStringValue* form=0; - const char* body_cstr=0; - int timeout=2; - bool fail_on_status_ne_200=true; - Value* vheaders=0; - Charset *asked_remote_charset=0; - const char* user_cstr=0; - const char* password_cstr=0; - - if(options) { - int valid_options=0; - if(Value* vmethod=options->get(HTTP_METHOD_NAME)) { - valid_options++; - method=vmethod->as_string().cstr(); - } - if(Value* vform=options->get(HTTP_FORM_NAME)) { - valid_options++; - form=vform->get_hash(); - } - if(Value* vbody=options->get(HTTP_BODY_NAME)) { - valid_options++; - body_cstr=vbody->as_string().cstr(String::L_UNSPECIFIED); - } - if(Value* vtimeout=options->get(HTTP_TIMEOUT_NAME)) { - valid_options++; - timeout=vtimeout->as_int(); - } - if((vheaders=options->get(HTTP_HEADERS_NAME))) { - valid_options++; - } - if(Value* vany_status=options->get(HTTP_ANY_STATUS_NAME)) { - valid_options++; - fail_on_status_ne_200=!vany_status->as_bool(); - } - if(Value* vcharset_name=options->get(HTTP_CHARSET_NAME)) { - valid_options++; - asked_remote_charset=&::charsets.get(vcharset_name->as_string(). - change_case(charsets.source(), String::CC_UPPER)); - } - if(Value* vuser=options->get(HTTP_USER)) { - valid_options++; - user_cstr=vuser->as_string().cstr(); - } - if(Value* vpassword=options->get(HTTP_PASSWORD)) { - valid_options++; - password_cstr=vpassword->as_string().cstr(); - } - - if(valid_options!=options->count()) - throw Exception("parser.runtime", - 0, - "invalid option passed"); - } - if(!asked_remote_charset) // defaulting to $request:charset - asked_remote_charset=&charsets.source(); - - method_is_get=strcmp(method, "GET")==0; - if(method_is_get && body_cstr) - throw Exception("parser.runtime", - 0, - "you can not use $."HTTP_BODY_NAME" option with method GET"); - - //preparing request - String& connect_string=*new String; - // not in ^sql{... L_SQL ...} spirit, but closer to ^file::load one - connect_string.append(file_spec, String::L_URI); // tainted pieces -> URI pieces - - String request_head_and_body; - { - // influence URLencoding of tainted pieces to String::L_URI lang - Temp_client_charset temp(charsets, *asked_remote_charset); - - const char* connect_string_cstr=connect_string.cstr(String::L_UNSPECIFIED); - - const char* current=connect_string_cstr; - if(strncmp(current, "http://", 7)!=0) - throw Exception(0, - &connect_string, - "does not start with http://"); //never - current+=7; - - strncpy(host, current, sizeof(host)-1); host[sizeof(host)-1]=0; - char* host_uri=lsplit(host, '/'); - uri=host_uri?current+(host_uri-1-host):"/"; - char* port_cstr=lsplit(host, ':'); - char* error_pos=0; - port=port_cstr?(short)strtol(port_cstr, &error_pos, 0):80; - - if(strchr(uri, '?') && form) - throw Exception("parser.runtime", - 0, - "use either uri with ?params or $."HTTP_FORM_NAME" option"); - - //making request head - String head; - head << method; - head << " " << uri; - if(form) - if(method_is_get) - head << "?" << form2string(*form); - head <<" HTTP/1.0" CRLF - "host: "<< host << CRLF; - if(form && !method_is_get) { - head << "content-type: application/x-www-form-urlencoded" CRLF; - body_cstr = form2string(*form); - } - - // http://www.ietf.org/rfc/rfc2617.txt - if(const String* authorization_field_value=basic_authorization_field(user_cstr, password_cstr)) - head<<"authorization: "<<*authorization_field_value<is_string()) { // allow empty - if(HashStringValue *headers=vheaders->get_hash()) { - Http_pass_header_info info={&charsets, &head, false}; - headers->for_each(http_pass_header, &info); - user_agent_specified=info.user_agent_specified; - } else - throw Exception("parser.runtime", - &connect_string, - "headers param must be hash"); - }; - if(!user_agent_specified) // defaulting - head << "user-agent: " DEFAULT_USER_AGENT CRLF; - - if(body_cstr) { - // recode those pieces which are not in String::L_URI lang - // [those violating HTTP standard, but widly used] - body_cstr=Charset::transcode( - String::C(body_cstr, strlen(body_cstr)), - charsets.source(), - *asked_remote_charset); - - head << "content-length: " << format(strlen(body_cstr), "%u") << CRLF; - } - - const char* head_cstr=head.cstr(String::L_UNSPECIFIED); - - // recode those pieces which are not in String::L_URI lang - // [those violating HTTP standard, but widly used] - head_cstr=Charset::transcode( - String::C(head_cstr, strlen(head_cstr)), - charsets.source(), - *asked_remote_charset); - - // head + end of header - request_head_and_body << head_cstr << CRLF; - // body - if(body_cstr) - request_head_and_body << body_cstr; - } - - //sending request - char* response; - size_t response_size; - int status_code=http_request(response, response_size, - host, port, request_head_and_body.cstr(), - timeout, fail_on_status_ne_200); - - //processing results - char* raw_body; size_t raw_body_size; - char* headers_end_at; - find_headers_end(response, - headers_end_at, - raw_body); - raw_body_size=response_size-(raw_body-response); - - result.headers=new HashStringValue; - VHash* vtables=new VHash; - result.headers->put(HTTP_TABLES_NAME, vtables); - Charset* real_remote_charset=0; // undetected, yet - - if(headers_end_at) { - *headers_end_at=0; - const String header_block(String::C(response, headers_end_at-response), true); - - ArrayString aheaders; - HashStringValue& tables=vtables->hash(); - - size_t pos_after=0; - header_block.split(aheaders, pos_after, "\n"); - - //processing headers - size_t aheaders_count=aheaders.count(); - for(size_t i=1; icount()) + throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); + } + + File_read_action_info info = {&result.str, &result.length, 0, offset, limit}; + + result.success = file_read_action_under_lock(file_spec, "read", file_read_action, &info, as_text, fail_on_read_problem); + + if(as_text){ + if(result.success){ + Charset* asked_charset = 0; + if(options) + if(Value* vcharset_name = options->get(PA_CHARSET_NAME)) + asked_charset = &pa_charsets.get(vcharset_name->as_string()); + + asked_charset = pa_charsets.checkBOM(result.str, result.length, asked_charset); + + if(result.length && transcode_text_result && asked_charset){ // length must be checked because transcode returns CONST string in case length==0, which contradicts hacking few lines below + String::C body = String::C(result.str, result.length); + body=Charset::transcode(body, *asked_charset, charsets.source()); - // tables - { - Value *valready=(Value *)tables.get(HEADER_NAME); - bool existed=valready!=0; - Table *table; - if(existed) { - // second+ appearence - table=valready->get_table(); - } else { - // first appearence - Table::columns_type columns =new ArrayString(1); - *columns+=new String("value"); - table=new Table(columns); - } - // this string becomes next row - ArrayString& row=*new ArrayString(1); - row+=&header_value; - *table+=&row; - // not existed before? add it - if(!existed) - tables.put(HEADER_NAME, new VTable(table)); + result.str = const_cast(body.str); // hacking a little + result.length = body.length; } - - result.headers->put(HEADER_NAME, new VString(header_value)); } + if(result.length) + fix_line_breaks(result.str, result.length); } - // output response - String::C real_body=String::C(raw_body, raw_body_size); - if(as_text && raw_body_size) { // must be checked because transcode returns CONST string in case length==0, which contradicts hacking few lines below - // defaulting to used-asked charset [it's never empty!] - if(!real_remote_charset) - real_remote_charset=asked_remote_charset; - real_body=Charset::transcode(real_body, *real_remote_charset, charsets.source()); - } - - result.str=const_cast(real_body.str); // hacking a little - result.length=real_body.length; - result.headers->put(file_status_name, new VInt(status_code)); return result; } -#endif - -#ifndef DOXYGEN -struct File_read_action_info { - char **data; size_t *data_size; - char* buf; size_t offset; size_t count; -}; -#endif -static void file_read_action( - struct stat& finfo, - int f, - const String& file_spec, const char* /*fname*/, bool as_text, - void *context) { - File_read_action_info& info=*static_cast(context); - size_t to_read_size=info.count; - if(!to_read_size) - to_read_size=(size_t)finfo.st_size; - assert( !(info.buf && as_text) ); - if(to_read_size) { - if(info.offset) - lseek(f, info.offset, SEEK_SET); - *info.data=info.buf - ? info.buf - : new(PointerFreeGC) char[to_read_size+(as_text?1:0)]; - *info.data_size=(size_t)read(f, *info.data, to_read_size); - - if(ssize_t(*info.data_size)<0 || *info.data_size>to_read_size) - throw Exception(0, - &file_spec, - "read failed: actually read %u bytes count not in [0..%u] valid range", - *info.data_size, to_read_size); - } else { // empty file - if(as_text) { - *info.data=new(PointerFreeGC) char[1]; - *(char*)(*info.data)=0; - } else - *info.data=0; - *info.data_size=0; - return; +File_read_result file_load(Request& r, const String& file_spec, + bool as_text, HashStringValue *options, + bool fail_on_read_problem, + bool transcode_text_result) { + + size_t offset = 0; + size_t limit = 0; + + if(options){ + if(Value *voffset = (Value *)options->get(sql_offset_name)) + offset = r.process(*voffset).as_int(); + if(Value *vlimit = (Value *)options->get(sql_limit_name)) + limit = r.process(*vlimit).as_int(); + // no check on options count here } -} -File_read_result file_read(Request_charsets& charsets, const String& file_spec, - bool as_text, HashStringValue *params, - bool fail_on_read_problem, - char* buf, size_t offset, size_t count) { - File_read_result result={false, 0, 0, 0}; -#ifdef PA_HTTP + if(file_spec.starts_with("http://")) { + if(offset || limit) + throw Exception(PARSER_RUNTIME, 0, "offset and load options are not supported for HTTP:// file load"); + // fail on read problem - File_read_http_result http=file_read_http(charsets, file_spec, as_text, params); - result.success=true; - result.str=http.str; - result.length=http.length; - result.headers=http.headers; - } else { -#endif - if(params && params->count()) - throw Exception("parser.runtime", - 0, - "invalid option passed"); - - File_read_action_info info={&result.str, &result.length, - buf, offset, count}; - result.success=file_read_action_under_lock(file_spec, - "read", file_read_action, &info, - as_text, fail_on_read_problem); -#ifdef PA_HTTP - } -#endif + File_read_http_result http = pa_internal_file_read_http(r, file_spec, as_text, options, transcode_text_result); - if(result.success && as_text) { - // UTF-8 signature: EF BB BF - if(result.length>=3) { - char *in=(char *)result.str; - if(strncmp(in, "\xEF\xBB\xBF", 3)==0) { - result.str=in+3; result.length-=3;// skip prefix - } - } + File_read_result result = {true, http.str, http.length, http.headers}; + return result; + } else + return file_read(r.charsets, file_spec, as_text, options, fail_on_read_problem, offset, limit, transcode_text_result); +} - fix_line_breaks((char *)(result.str), result.length); - } +char* file_read_text(Request_charsets& charsets, const String& file_spec, bool fail_on_read_problem) { + File_read_result file = file_read(charsets, file_spec, true, 0, fail_on_read_problem); + return file.success ? file.str : 0; +} - return result; +char* file_load_text(Request& r, const String& file_spec, bool fail_on_read_problem, HashStringValue* options, bool transcode_result) { + File_read_result file = file_load(r, file_spec, true, options, fail_on_read_problem, transcode_result); + return file.success ? file.str : 0; } #ifdef PA_SAFE_MODE -void check_safe_mode(struct stat finfo, const String& file_spec, const char* fname) { +void check_safe_mode(struct stat finfo, const String& file_spec, const char* fname) { if(finfo.st_uid/*foreign?*/!=geteuid() && finfo.st_gid/*foreign?*/!=getegid()) - throw Exception("parser.runtime", - &file_spec, - "parser is in safe mode: " - "reading files of foreign group and user disabled " - "[recompile parser with --disable-safe-mode configure option], " - "actual filename '%s', " - "fuid(%d)!=euid(%d) or fgid(%d)!=egid(%d)", - fname, - finfo.st_uid, geteuid(), - finfo.st_gid, getegid()); -} -#endif + throw Exception(PARSER_RUNTIME, + &file_spec, + "parser is in safe mode: reading files of foreign group and user disabled " + "[recompile parser with --disable-safe-mode configure option], " + "actual filename '%s', fuid(%d)!=euid(%d) or fgid(%d)!=egid(%d)", + fname, finfo.st_uid, geteuid(), finfo.st_gid, getegid() + ); +} +#else +void check_safe_mode(struct stat, const String&, const char*) { +} +#endif + + bool file_read_action_under_lock(const String& file_spec, const char* action_name, File_read_action action, void *context, bool as_text, bool fail_on_read_problem) { - const char* fname=file_spec.cstr(String::L_FILE_SPEC); + const char* fname=file_spec.taint_cstr(String::L_FILE_SPEC); int f; // first open, next stat: @@ -779,75 +254,67 @@ bool file_read_action_under_lock(const S // they delay update till open, so we would receive "!^test[" string // if would do stat, next open. // later: it seems, even this does not help sometimes - if((f=open(fname, O_RDONLY|(as_text?_O_TEXT:_O_BINARY)))>=0) { + if((f=pa_open(fname, O_RDONLY|(as_text?_O_TEXT:_O_BINARY)))>=0) { try { if(pa_lock_shared_blocking(f)!=0) - throw Exception("file.lock", - &file_spec, - "shared lock failed: %s (%d), actual filename '%s'", - strerror(errno), errno, fname); + throw Exception("file.lock", &file_spec, "shared lock failed: %s (%d), actual filename '%s'", strerror(errno), errno, fname); struct stat finfo; - if(stat(fname, &finfo)!=0) + if(pa_fstat(f, &finfo)!=0) throw Exception("file.missing", // hardly possible: we just opened it OK - &file_spec, - "stat failed: %s (%d), actual filename '%s'", - strerror(errno), errno, fname); + &file_spec, "stat failed: %s (%d), actual filename '%s'", strerror(errno), errno, fname); -#ifdef PA_SAFE_MODE check_safe_mode(finfo, file_spec, fname); -#endif - action(finfo, f, file_spec, fname, as_text, context); + action(finfo, f, file_spec, context); } catch(...) { pa_unlock(f);close(f); if(fail_on_read_problem) rethrow; - return false; + return false; } pa_unlock(f);close(f); return true; - } else { + } else { if(fail_on_read_problem) - throw Exception(errno==EACCES?"file.access":errno==ENOENT?"file.missing":0, - &file_spec, - "%s failed: %s (%d), actual filename '%s'", - action_name, strerror(errno), errno, fname); + throw Exception(errno==EACCES ? "file.access" : (errno==ENOENT || errno==ENOTDIR || errno==ENODEV) ? "file.missing" : 0, + &file_spec, "%s failed: %s (%d), actual filename '%s'", action_name, strerror(errno), errno, fname); return false; } } -static void create_dir_for_file(const String& file_spec) { - size_t pos_after=1; - size_t pos_before; - while((pos_before=file_spec.pos('/', pos_after))!=STRING_NOT_FOUND) { - mkdir(file_spec.mid(0, pos_before).cstr(String::L_FILE_SPEC), 0775); - pos_after=pos_before+1; +void create_dir_for_file(const String& file_spec) { + const char *str=file_spec.taint_cstr(String::L_FILE_SPEC); + if(str[0]){ + const char *pos=str+1; + while((pos=strchr(pos,'/')) && pos[1]) { // to avoid trailing /, see #1166 + pa_mkdir(pa_strdup(str,pos-str), 0775); + pos++; + } } } bool file_write_action_under_lock( const String& file_spec, - const char* action_name, File_write_action action, void *context, + const char* action_name, + File_write_action action, + void *context, bool as_text, bool do_append, bool do_block, bool fail_on_lock_problem) { - const char* fname=file_spec.cstr(String::L_FILE_SPEC); + const char* fname=file_spec.taint_cstr(String::L_FILE_SPEC); int f; if(access(fname, W_OK)!=0) // no create_dir_for_file(file_spec); - if((f=open(fname, + if((f=pa_open(fname, O_CREAT|O_RDWR |(as_text?_O_TEXT:_O_BINARY) |(do_append?O_APPEND:PA_O_TRUNC), 0664))>=0) { if((do_block?pa_lock_exclusive_blocking(f):pa_lock_exclusive_nonblocking(f))!=0) { - Exception e("file.lock", - &file_spec, - "shared lock failed: %s (%d), actual filename '%s'", - strerror(errno), errno, fname); + Exception e("file.lock", &file_spec, "shared lock failed: %s (%d), actual filename '%s'", strerror(errno), errno, fname); close(f); if(fail_on_lock_problem) throw e; @@ -855,7 +322,12 @@ bool file_write_action_under_lock( } try { - action(f, context); +#if (defined(HAVE_FCHMOD) && defined(PA_SAFE_MODE)) + struct stat finfo; + if(pa_fstat(f, &finfo)==0 && finfo.st_mode & 0111) + fchmod(f, finfo.st_mode & 0666/*clear executable bits*/); // backward: ignore errors if any +#endif + action(f, context); } catch(...) { #ifdef HAVE_FTRUNCATE if(!do_append) @@ -872,265 +344,561 @@ bool file_write_action_under_lock( pa_unlock(f);close(f); return true; } else - throw Exception(errno==EACCES?"file.access":0, - &file_spec, - "%s failed: %s (%d), actual filename '%s'", - action_name, strerror(errno), errno, fname); + throw Exception(errno==EACCES ? "file.access" : 0, &file_spec, "%s failed: %s (%d), actual filename '%s'", action_name, strerror(errno), errno, fname); // here should be nothing, see rethrow above } #ifndef DOXYGEN struct File_write_action_info { - const char* str; size_t length; + const char* str; + size_t length; }; #endif + static void file_write_action(int f, void *context) { File_write_action_info& info=*static_cast(context); if(info.length) { - int written=write(f, info.str, info.length); + ssize_t written=write(f, info.str, info.length); if(written<0) - throw Exception(0, - 0, - "write failed: %s (%d)", strerror(errno), errno); + throw Exception("file.write", 0, "write failed: %s (%d)", strerror(errno), errno); + if((size_t)written!=info.length) + throw Exception("file.write", 0, "write failed: %u of %u bytes written", written, info.length); } } + void file_write( - const String& file_spec, - const char* data, size_t size, + Request_charsets& charsets, + const String& file_spec, + const char* data, + size_t size, bool as_text, - bool do_append) { + bool do_append, + Charset* asked_charset) { + + if(as_text && asked_charset){ + String::C body=String::C(data, size); + body=Charset::transcode(body, charsets.source(), *asked_charset); + data=body.str; + size=body.length; + }; + File_write_action_info info={data, size}; + file_write_action_under_lock( file_spec, - "write", file_write_action, &info, + "write", + file_write_action, + &info, as_text, do_append); } -// throws nothing! [this is required in file_move & file_delete] -static void rmdir(const String& file_spec, size_t pos_after) { - size_t pos_before; - if((pos_before=file_spec.pos('/', pos_after))!=STRING_NOT_FOUND) - rmdir(file_spec, pos_before+1); - - rmdir(file_spec.mid(0, pos_after-1/* / */).cstr(String::L_FILE_SPEC)); -} -bool file_delete(const String& file_spec, bool fail_on_problem) { - const char* fname=file_spec.cstr(String::L_FILE_SPEC); - if(unlink(fname)!=0) - if(fail_on_problem) - throw Exception(errno==EACCES?"file.access":errno==ENOENT?"file.missing":0, - &file_spec, - "unlink failed: %s (%d), actual filename '%s'", - strerror(errno), errno, fname); - else - return false; - - rmdir(file_spec, 1); - return true; -} -void file_move(const String& old_spec, const String& new_spec) { - const char* old_spec_cstr=old_spec.cstr(String::L_FILE_SPEC); - const char* new_spec_cstr=new_spec.cstr(String::L_FILE_SPEC); - - create_dir_for_file(new_spec); - - if(rename(old_spec_cstr, new_spec_cstr)!=0) - throw Exception(errno==EACCES?"file.access":errno==ENOENT?"file.missing":0, - &old_spec, - "rename failed: %s (%d), actual filename '%s' to '%s'", - strerror(errno), errno, old_spec_cstr, new_spec_cstr); - - rmdir(old_spec, 1); +static size_t get_dir(char* fname, size_t helper_length){ + bool dir=false; + size_t pos=0; + for(pos=helper_length; pos; pos--){ + char c=fname[pos-1]; + if(c=='/' || c=='\\'){ + fname[pos-1]=0; + dir=true; + } else if(dir) break; + } + return pos; } - bool entry_exists(const char* fname, struct stat *afinfo) { struct stat lfinfo; - bool result=stat(fname, &lfinfo)==0; + bool result=pa_stat(fname, &lfinfo)==0; if(afinfo) *afinfo=lfinfo; return result; } bool entry_exists(const String& file_spec) { - const char* fname=file_spec.cstr(String::L_FILE_SPEC); - return entry_exists(fname, 0); + return entry_exists(file_spec.taint_cstr(String::L_FILE_SPEC), 0); } -static bool entry_readable(const String& file_spec, bool need_dir) { - char* fname=file_spec.cstrm(String::L_FILE_SPEC); - if(need_dir) { - size_t size=strlen(fname); +static bool entry_ifdir(char *fname, bool need_dir) { + if(need_dir){ + size_t size=strlen(fname); while(size) { - char c=fname[size-1]; + char c=fname[size-1]; if(c=='/' || c=='\\') fname[--size]=0; else break; } } + struct stat finfo; - if(access(fname, R_OK)==0 && entry_exists(fname, &finfo)) { + if(entry_exists(fname, &finfo)) { bool is_dir=(finfo.st_mode&S_IFDIR) != 0; return is_dir==need_dir; } return false; } -bool file_readable(const String& file_spec) { - return entry_readable(file_spec, false); + +static bool entry_ifdir(const String& file_spec, bool need_dir) { + return entry_ifdir(file_spec.taint_cstrm(String::L_FILE_SPEC), need_dir); } -bool dir_readable(const String& file_spec) { - return entry_readable(file_spec, true); + +// throws nothing! [this is required in file_move & file_delete] +static void rmdir(const String& file_spec, size_t pos_after) { + char* dir_spec=file_spec.taint_cstrm(String::L_FILE_SPEC); + size_t length=strlen(dir_spec); + while( (length=get_dir(dir_spec, length)) && (length > pos_after) ){ +#ifdef _MSC_VER + if(!entry_ifdir(dir_spec, true)) + break; + DWORD attrs=GetFileAttributes(dir_spec); + if( + (attrs==INVALID_FILE_ATTRIBUTES) + || !(attrs & FILE_ATTRIBUTE_DIRECTORY) + || (attrs & FILE_ATTRIBUTE_REPARSE_POINT) + ) + break; +#endif + if( rmdir(dir_spec) ) + break; + }; } -const String* file_readable(const String& path, const String& name) { + +bool file_delete(const String& file_spec, bool fail_on_problem, bool keep_empty_dirs) { + const char* fname=file_spec.taint_cstr(String::L_FILE_SPEC); + if(unlink(fname)!=0) { + if(fail_on_problem) + throw Exception(errno==EACCES?"file.access":errno==ENOENT?"file.missing":0, + &file_spec, "unlink failed: %s (%d), actual filename '%s'", strerror(errno), errno, fname); + else + return false; + } + + if(!keep_empty_dirs) + rmdir(file_spec, 1); + + return true; +} + +void file_move(const String& old_spec, const String& new_spec, bool keep_empty_dirs) { + const char* old_spec_cstr=old_spec.taint_cstr(String::L_FILE_SPEC); + const char* new_spec_cstr=new_spec.taint_cstr(String::L_FILE_SPEC); + + create_dir_for_file(new_spec); + + if(rename(old_spec_cstr, new_spec_cstr)!=0) + throw Exception(errno==EACCES ? "file.access" : errno==ENOENT ? "file.missing" : 0, + &old_spec, "rename failed: %s (%d), actual filename '%s' to '%s'", strerror(errno), errno, old_spec_cstr, new_spec_cstr); + + if(!keep_empty_dirs) + rmdir(old_spec, 1); +} + + +bool file_exist(const String& file_spec) { + return entry_ifdir(file_spec, false); +} + +bool dir_exists(const String& file_spec) { + return entry_ifdir(file_spec, true); +} + +const String* file_exist(const String& path, const String& name) { String& result=*new String(path); - result << "/"; + if(path.last_char() != '/') + result << "/"; result << name; - return file_readable(result)?&result:0; + return file_exist(result)?&result:0; } + bool file_executable(const String& file_spec) { - return access(file_spec.cstr(String::L_FILE_SPEC), X_OK)==0; + return access(file_spec.taint_cstr(String::L_FILE_SPEC), X_OK)==0; } -bool file_stat(const String& file_spec, - size_t& rsize, - time_t& ratime, - time_t& rmtime, - time_t& rctime, - bool fail_on_read_problem) { - const char* fname=file_spec.cstr(String::L_FILE_SPEC); +bool file_stat(const String& file_spec, uint64_t& rsize, time_t& ratime, time_t& rmtime, time_t& rctime, bool fail_on_read_problem) { + const char* fname=file_spec.taint_cstr(String::L_FILE_SPEC); struct stat finfo; - if(stat(fname, &finfo)!=0) + if(pa_stat(fname, &finfo)!=0) { if(fail_on_read_problem) - throw Exception("file.missing", - &file_spec, - "getting file size failed: %s (%d), real filename '%s'", - strerror(errno), errno, fname); + throw Exception("file.missing", &file_spec, "getting file size failed: %s (%d), real filename '%s'", strerror(errno), errno, fname); else return false; + } rsize=finfo.st_size; - ratime=finfo.st_atime; - rmtime=finfo.st_mtime; - rctime=finfo.st_ctime; + ratime=(time_t)finfo.st_atime; + rmtime=(time_t)finfo.st_mtime; + rctime=(time_t)finfo.st_ctime; + return true; +} + +size_t check_file_size(uint64_t size, const String* file_spec){ + if(size > (uint64_t)pa_file_size_limit) + throw Exception(PARSER_RUNTIME, file_spec, "content size of %.15g bytes exceeds the limit (%.15g bytes)", (double)size, (double)pa_file_size_limit); + return (size_t)size; +} + +/** + String related functions +*/ + +bool capitalized(const char* s){ + bool upper=true; + for(const char* c=s; *c; c++){ + if(*c != (upper ? toupper((unsigned char)*c) : tolower((unsigned char)*c))) + return false; + upper=strchr("-_ ", *c) != 0; + } return true; } +const char* capitalize(const char* s){ + if(!s || capitalized(s)) + return s; + + char* result=pa_strdup(s); + if(result){ + bool upper=true; + for(char* c=result; *c; c++){ + *c=upper ? (char)toupper((unsigned char)*c) : (char)tolower((unsigned char)*c); + upper=strchr("-_ ", *c) != 0; + } + } + return (const char*)result; +} + +char *str_lower(const char *s, size_t length){ + char *result=pa_strdup(s, length); + for(char* c=result; *c; c++) + *c=(char)tolower((unsigned char)*c); + return result; +} + +char *str_upper(const char *s, size_t length){ + char *result=pa_strdup(s, length); + for(char* c=result; *c; c++) + *c=(char)toupper((unsigned char)*c); + return result; +} + +void fix_line_breaks(char *str, size_t& length) { + //_asm int 3; + const char* const eob=str+length; + char* dest=str; + // fix DOS: \r\n -> \n + // fix Macintosh: \r -> \n + char* bol=str; + while(char* eol=(char*)memchr(bol, '\r', eob -bol)) { + size_t len=eol-bol; + if(dest!=bol) + memmove(dest, bol, len); + dest+=len; + *dest++='\n'; + + if(&eol[1]= path; c--) { + if(*c == '/' || *c == '\\') + return c+1; + } + return path; +} + +// format: %[flags][width][.precision]type http://msdn.microsoft.com/ru-ru/library/56e442dc(en-us,VS.80).aspx +// flags: '-', '+', ' ', '#', '0' http://msdn.microsoft.com/ru-ru/library/8aky45ct(en-us,VS.80).aspx +// width, precision: non negative decimal number +enum FormatType { + FormatInvalid, + FormatInt, + FormatUInt, + FormatDouble +}; +FormatType format_type(const char* fmt){ + enum FormatState { + Percent, + Flags, + Width, + Precision, + Done + } state=Percent; + + FormatType result=FormatInvalid; + + const char* pos=fmt; + while(char c=*(pos++)){ + switch(state){ + case Percent: + if(c=='%'){ + state=Flags; + } else { + return FormatInvalid; // 1st char must be '%' only + } + break; + case Flags: + if(strchr("-+ #0", c)!=0){ + break; + } + // go to the next step + case Width: + if(c=='.'){ + state=Precision; + break; + } + // go to the next step + case Precision: + if(c>='0' && c<='9'){ + if(state == Flags) state=Width; // no more flags + break; + } else if(c=='d' || c=='i'){ + result=FormatInt; + } else if(strchr("feEgG", c)!=0){ + result=FormatDouble; + } else if(strchr("uoxX", c)!=0){ + result=FormatUInt; + } else { + return FormatInvalid; // invalid char + } + state=Done; + break; + case Done: + return FormatInvalid; // no chars allowed after 'type' + } + } + return result; +} + + +const char* format(double value, const char* fmt) { + char local_buf[MAX_NUMBER]; + int size=-1; + + if(fmt && strlen(fmt)){ + switch(format_type(fmt)){ + case FormatDouble: + size=snprintf(local_buf, sizeof(local_buf), fmt, value); + break; + case FormatUInt: + if(value >= 0){ // on Apple M1 (uint) is 0 + size=snprintf(local_buf, sizeof(local_buf), fmt, clip2uint(value)); + break; + } + case FormatInt: + size=snprintf(local_buf, sizeof(local_buf), fmt, clip2int(value)); + break; + case FormatInvalid: + throw Exception(PARSER_RUNTIME, 0, "Incorrect format string '%s' was specified.", fmt); + } + } else + return pa_itoa(clip2int(value)); + + if(size < 0 || size >= MAX_NUMBER-1){ // on win32 we manually reduce max size while printing + throw Exception(PARSER_RUNTIME, 0, "Error occurred white executing snprintf with format string '%s'.", fmt); + } + + return pa_strdup(local_buf, (size_t)size); } size_t stdout_write(const void *buf, size_t size) { #ifdef WIN32 size_t to_write = size; do{ - int chunk_written=fwrite(buf, 1, min((size_t)8*0x400, size), stdout); + int chunk_written=fwrite(buf, 1, min((size_t)8*0x400, size), stdout); if(chunk_written<=0) break; size-=chunk_written; buf=((const char*)buf)+chunk_written; - } while(size>0); + } while(size>0); + fflush(stdout); return to_write-size; #else - return fwrite(buf, 1, size, stdout); + size_t result=fwrite(buf, 1, size, stdout); + fflush(stdout); + return result; #endif } -char* unescape_chars(const char* cp, int len) { - char* s=new(PointerFreeGC) char[len + 1]; - enum EscapeState { - EscapeRest, - EscapeFirst, - EscapeSecond - } escapeState=EscapeRest; - uchar escapedValue=0; +enum EscapeState { + EscapeRest, + EscapeFirst, + EscapeSecond, + EscapeUnicode +}; + +// @todo prescan for reduce required size (unescaped sting in 1 byte charset requires less memory usually) +char* unescape_chars(const char* cp, int len, Charset* charset, bool js){ + char* s=new(PointerFreeGC) char[len+1]; // must be enough (%uXXXX==6 bytes, max utf-8 char length==6 bytes) + char* dst=s; + EscapeState escapeState=EscapeRest; + uint escapedValue=0; int srcPos=0; - int dstPos=0; - while(srcPos < len) { - uchar ch=(uchar)cp[srcPos]; - switch(escapeState) { - case EscapeRest: - if(ch=='%') { - escapeState=EscapeFirst; - } else if(ch=='+') { - s[dstPos++]=' '; - } else { - s[dstPos++]=ch; + short int jsCnt=0; + while(srcPosstore_Char((XMLByte*&)dst, (XMLCh)escapedValue, '?'); + escapeState=EscapeRest; + } + } else { + // not full unicode value + escapeState=EscapeRest; + } + break; } - break; - case EscapeFirst: - escapedValue=(uchar)(hex_value[ch] << 4); - escapeState=EscapeSecond; - break; - case EscapeSecond: - escapedValue +=hex_value[ch]; - s[dstPos++]=escapedValue; - escapeState=EscapeRest; - break; } - srcPos++; + + srcPos++; } - s[dstPos]=0; + + *dst=0; // zero-termination return s; } +char *search_stop(char*& current, char cstop_at) { + // sanity check + if(!current) + return 0; + + // skip leading WS + while(*current==' ' || *current=='\t') + current++; + if(!*current) + return current=0; + + char *result=current; + if(char *pstop_at=strchr(current, cstop_at)) { + *pstop_at=0; + current=pstop_at+1; + } else + current=0; + return result; +} + #ifdef WIN32 void back_slashes_to_slashes(char* s) { if(s) @@ -1138,100 +906,289 @@ void back_slashes_to_slashes(char* s) { if(*s=='\\') *s='/'; } -/* -void slashes_to_back_slashes(char* s) { - if(s) - for(; *s; s++) - if(*s=='/') - *s='\\'; -} -*/ #endif -bool StrEqNc(const char* s1, const char* s2, bool strict) { - while(true) { - if(!(*s1)) { - if(!(*s2)) - return true; - else - return !strict; - } else if(!(*s2)) - return !strict; - if(isalpha((unsigned char)*s1)) { - if(tolower((unsigned char)*s1) !=tolower((unsigned char)*s2)) - return false; - } else if((*s1) !=(*s2)) - return false; - s1++; - s2++; +size_t strpos(const char *str, const char *substr) { + const char *p = strstr(str, substr); + return (p==0)?STRING_NOT_FOUND:p-str; +} + +size_t remove_crlf(char* start, char* end) { + char* from=start; + char* to=start; + bool skip=false; + while(from < end){ + switch(*from){ + case '\n': + case '\r': + case '\t': + case ' ': + if(!skip){ + *to=' '; + to++; + skip=true; + } + break; + default: + if(from != to) + *to=*from; + to++; + skip=false; + } + from++; + } + return to-start; +} + +const char* hex_digits="0123456789ABCDEF"; + +const char* hex_string(unsigned char* bytes, size_t size, bool upcase) { + char *bytes_hex=new(PointerFreeGC) char [size*2/*byte->hh*/+1/*for zero-teminator*/]; + unsigned char *src=bytes; + unsigned char *end=bytes+size; + char *dest=bytes_hex; + + const char *hex=upcase? hex_digits : "0123456789abcdef"; + + for(; src 0; j--) + { + if(dwCrc & 1) + dwCrc = (dwCrc >> 1) ^ dwPolynomial; + else + dwCrc >>= 1; + } + crc32Table[i] = dwCrc; } + } +} + +inline void CalcCrc32(const unsigned char byte, unsigned long &crc32) +{ + crc32 = ((crc32) >> 8) ^ crc32Table[(byte) ^ ((crc32) & 0x000000FF)]; +} + + +unsigned long pa_crc32(const char *in, size_t in_size){ + unsigned long crc32=0xFFFFFFFF; + + InitCrc32Table(); + for(size_t i = 0; i(context); + if(finfo.st_size) { + InitCrc32Table(); + int nCount=0; + do { + unsigned char buffer[FILE_BUFFER_SIZE]; + nCount = file_block_read(f, buffer, sizeof(buffer)); + for(int i = 0; i < nCount; i++) CalcCrc32(buffer[i], crc32); + } while(nCount > 0); + } +} + +unsigned long pa_crc32(const String& file_spec){ + unsigned long crc32=0xFFFFFFFF; + file_read_action_under_lock(file_spec, "crc32", file_crc32_file_action, &crc32); + return ~crc32; +} + +// content-type: xxx; charset=WE-NEED-THIS +// content-type: xxx; charset="WE-NEED-THIS" +// content-type: xxx; charset="WE-NEED-THIS"; +Charset* detect_charset(const char* content_type){ + if(content_type){ + char* CONTENT_TYPE=str_upper(content_type); + + if(const char* begin=strstr(CONTENT_TYPE, "CHARSET=")){ + begin+=8; // skip "CHARSET=" + char* end=0; + if(*begin && (*begin=='"' || *begin =='\'')){ + char quote=*begin; + begin++; + end=(char*)strchr(begin, quote); + } + if(!end) + end=(char*)strchr(begin, ';'); + + if(end) + *end=0; // terminator + + return *begin ? &pa_charsets.get_direct(begin) : 0; + } + } + return 0; +} + +const UTF16* pa_utf16_encode(const char* in, Charset& source_charset){ + if(!in) + return 0; + + String::C sIn(in,strlen(in)); + + UTF16* utf16=(UTF16*)pa_malloc_atomic(sIn.length*2+2); + UTF16* utf16_end=utf16; + + if(!source_charset.isUTF8()) + sIn=Charset::transcode(sIn, source_charset, pa_UTF8_charset); + + int status=pa_convertUTF8toUTF16((const UTF8**)&sIn.str, (const UTF8*)(sIn.str+sIn.length), &utf16_end, utf16+sIn.length, strictConversion); + if(status != conversionOK) + throw Exception("utf-16 encode", new String(in), "utf-16 conversion failed (%d)", status); + + *utf16_end=0; + + return utf16; +} + +const char* pa_utf16_decode(const UTF16* in, Charset& asked_charset){ + if(!in) + return 0; + + const UTF16* utf16_start=in; + const UTF16* utf16_end; + + for(utf16_end=in; *utf16_end; utf16_end++); + + char *result = (char *)pa_malloc_atomic((utf16_end-in)*6+1); + char *result_end = result; + + int status=pa_convertUTF16toUTF8(&utf16_start, utf16_end, (UTF8**)&result_end, (UTF8*)(result+(utf16_end-in)*6), strictConversion); + + if(status != conversionOK) + throw Exception("utf-16 decode", 0, "utf conversion failed (%d)", status); + + *result_end='\0'; + + if(asked_charset.isUTF8()) + return result; + + return Charset::transcode(result, pa_UTF8_charset, asked_charset).cstr(); +} + +static bool is_latin(const char *in){ + for(; *in; in++){ + if ((unsigned char)(*in) > 0x7F) + return false; + } + return true; +} + +#define MAX_IDNA_LENGTH 256 + +const char *pa_idna_encode(const char *in, Charset& source_charset){ + if(!in || is_latin(in)) + return in; + + uint32_t utf32[MAX_IDNA_LENGTH]; + uint32_t *utf32_end=utf32; + + String::C sIn(in,strlen(in)); + + if(!source_charset.isUTF8()) + sIn=Charset::transcode(sIn, source_charset, pa_UTF8_charset); + + int status=pa_convertUTF8toUTF32((const UTF8**)&sIn.str, (const UTF8*)(sIn.str+sIn.length), &utf32_end, utf32+MAX_IDNA_LENGTH-1, strictConversion); + if(status != conversionOK) + throw Exception("idna encode", new String(in), "utf conversion failed (%d)", status); + + *utf32_end=0; + + char *result = (char *)pa_malloc(MAX_IDNA_LENGTH); + status=pa_idna_to_ascii_4z(utf32, result, MAX_IDNA_LENGTH, 0); + if(status != IDNA_SUCCESS) + throw Exception("idna encode", new String(in), "encode failed: %s", pa_idna_strerror(status)); + + return result; } +const char *pa_idna_decode(const char *in, Charset &asked_charset){ + if(!in || !(*in)) + return in; + + uint32_t utf32[MAX_IDNA_LENGTH]; + const uint32_t *utf32_start=utf32; + uint32_t *utf32_end; + + int status=pa_idna_to_unicode_4z(in, utf32, MAX_IDNA_LENGTH, 0); + if(status != IDNA_SUCCESS) + throw Exception("idna decode", new String(in), "decode failed: %s", pa_idna_strerror(status)); + + for(utf32_end=utf32; *utf32_end; utf32_end++); + + char *result = (char *)pa_malloc(MAX_IDNA_LENGTH); + char *result_end = result; + + status=pa_convertUTF32toUTF8(&utf32_start, utf32_end, (UTF8**)&result_end, (UTF8*)(result+MAX_IDNA_LENGTH-1), strictConversion); + if(status != conversionOK) + throw Exception("idna decode", new String(in), "utf conversion failed (%d)", status); + + *result_end='\0'; + + if(!asked_charset.isUTF8()) + result = (char *)Charset::transcode(result, pa_UTF8_charset, asked_charset).cstr(); + + return result; +} /// must be last in this file #undef vsnprintf -int __vsnprintf(char* b, size_t s, const char* f, va_list l) { +int pa_vsnprintf(char* b, size_t s, const char* f, va_list l) { if(!s) return 0; int r; - // note: on win32& maybe somewhere else + // note: on win32 & maybe somewhere else // vsnprintf do not writes terminating 0 in 'buffer full' case, reducing + // http://stackoverflow.com/questions/2915672/snprintf-and-visual-studio-2010 --s; // clients do not check for negative 's', feature: ignore such prints if((ssize_t)s<0) return 0; -#if _MSC_VER - /* - win32: - mk:@MSITStore:C:\Program%20Files\Microsoft%20Visual%20Studio\MSDN\2001APR\1033\vccore.chm::/html/_crt__vsnprintf.2c_._vsnwprintf.htm - - if the number of bytes to write exceeds buffer, then count bytes are written and Ö1 is returned - */ +#ifdef _MSC_VER + // win32: if the number of bytes to write exceeds buffer, then count bytes are written and -1 is returned r=_vsnprintf(b, s, f, l); if(r<0) r=s; #else r=vsnprintf(b, s, f, l); /* - solaris: - man vsnprintf + solaris: man vsnprintf - The snprintf() function returns the number of characters + The snprintf() function returns the number of characters formatted, that is, the number of characters that would have been written to the buffer if it were large enough. If the value of n is 0 on a call to snprintf(), an unspecified @@ -1247,186 +1204,11 @@ int __vsnprintf(char* b, size_t s, const return r; } -int __snprintf(char* b, size_t s, const char* f, ...) { +int pa_snprintf(char* b, size_t s, const char* f, ...) { va_list l; - va_start(l, f); - int r=__vsnprintf(b, s, f, l); - va_end(l); + va_start(l, f); + int r=pa_vsnprintf(b, s, f, l); + va_end(l); return r; } -/* mime64 functions are from libgmime[http://spruce.sourceforge.net/gmime/] lib */ -/* - * Authors: Michael Zucchi - * Jeffrey Stedfast - * - * Copyright 2000 Helix Code, Inc. (www.helixcode.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA. - * - */ -static char *base64_alphabet = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - -/** - * g_mime_utils_base64_encode_step: - * @in: input stream - * @inlen: length of the input - * @out: output string - * @state: holds the number of bits that are stored in @save - * @save: leftover bits that have not yet been encoded - * - * Base64 encodes a chunk of data. Performs an 'encode step', only - * encodes blocks of 3 characters to the output at a time, saves - * left-over state in state and save (initialise to 0 on first - * invocation). - * - * Returns the number of bytes encoded. - **/ -static size_t -g_mime_utils_base64_encode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save) -{ - register const unsigned char *inptr; - register unsigned char *outptr; - - if (inlen <= 0) - return 0; - - inptr = in; - outptr = out; - - if (inlen + ((unsigned char *)save)[0] > 2) { - const unsigned char *inend = in + inlen - 2; - register int c1 = 0, c2 = 0, c3 = 0; - register int already; - - already = *state; - - switch (((char *)save)[0]) { - case 1: c1 = ((unsigned char *)save)[1]; goto skip1; - case 2: c1 = ((unsigned char *)save)[1]; - c2 = ((unsigned char *)save)[2]; goto skip2; - } - - /* yes, we jump into the loop, no i'm not going to change it, its beautiful! */ - while (inptr < inend) { - c1 = *inptr++; - skip1: - c2 = *inptr++; - skip2: - c3 = *inptr++; - *outptr++ = base64_alphabet [c1 >> 2]; - *outptr++ = base64_alphabet [(c2 >> 4) | ((c1 & 0x3) << 4)]; - *outptr++ = base64_alphabet [((c2 & 0x0f) << 2) | (c3 >> 6)]; - *outptr++ = base64_alphabet [c3 & 0x3f]; - /* this is a bit ugly ... */ - if ((++already) >= 19) { - *outptr++ = '\n'; - already = 0; - } - } - - ((unsigned char *)save)[0] = 0; - inlen = 2 - (inptr - inend); - *state = already; - } - - //d(printf ("state = %d, inlen = %d\n", (int)((char *)save)[0], inlen)); - - if (inlen > 0) { - register char *saveout; - - /* points to the slot for the next char to save */ - saveout = & (((char *)save)[1]) + ((char *)save)[0]; - - /* inlen can only be 0 1 or 2 */ - switch (inlen) { - case 2: *saveout++ = *inptr++; - case 1: *saveout++ = *inptr++; - } - ((char *)save)[0] += inlen; - } - - /*d(printf ("mode = %d\nc1 = %c\nc2 = %c\n", - (int)((char *)save)[0], - (int)((char *)save)[1], - (int)((char *)save)[2]));*/ - - return (outptr - out); -} - -/** - * g_mime_utils_base64_encode_close: - * @in: input stream - * @inlen: length of the input - * @out: output string - * @state: holds the number of bits that are stored in @save - * @save: leftover bits that have not yet been encoded - * - * Base64 encodes the input stream to the output stream. Call this - * when finished encoding data with g_mime_utils_base64_encode_step to - * flush off the last little bit. - * - * Returns the number of bytes encoded. - **/ -static size_t -g_mime_utils_base64_encode_close (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save) -{ - unsigned char *outptr = out; - int c1, c2; - - if (inlen > 0) - outptr += g_mime_utils_base64_encode_step (in, inlen, outptr, state, save); - - c1 = ((unsigned char *)save)[1]; - c2 = ((unsigned char *)save)[2]; - - switch (((unsigned char *)save)[0]) { - case 2: - outptr[2] = base64_alphabet [(c2 & 0x0f) << 2]; - goto skip; - case 1: - outptr[2] = '='; - skip: - outptr[0] = base64_alphabet [c1 >> 2]; - outptr[1] = base64_alphabet [c2 >> 4 | ((c1 & 0x3) << 4)]; - outptr[3] = '='; - outptr += 4; - break; - } - - *outptr++ = 0; - - *save = 0; - *state = 0; - - return (outptr - out); -} - -char* pa_base64(const char *in, size_t len) -{ - /* wont go to more than 2x size (overly conservative) */ - char* result=new(PointerFreeGC) char[len * 2 + 6]; - int state=0; - int save=0; -#ifndef NDEBUG - size_t filled= -#endif - g_mime_utils_base64_encode_close ((const unsigned char*)in, len, - (unsigned char*)result, &state, &save); - assert(filled <= len * 2 + 6); - - return result; -}