--- parser3/src/include/pa_common.h 2002/08/01 11:26:46 1.78 +++ parser3/src/include/pa_common.h 2013/07/16 14:55:45 1.151 @@ -1,35 +1,83 @@ /** @file Parser: commonly used functions. - Copyright (c) 2001, 2002 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ #ifndef PA_COMMON_H #define PA_COMMON_H -static const char* IDENT_COMMON_H="$Id: pa_common.h,v 1.78 2002/08/01 11:26:46 paf Exp $"; +#define IDENT_PA_COMMON_H "$Id: pa_common.h,v 1.151 2013/07/16 14:55:45 moko Exp $" -#include "pa_pool.h" #include "pa_string.h" +#include "pa_hash.h" + +class Request; + +// defines +#define HTTP_USER_AGENT "user-agent" + +#define HTTP_STATUS "status" +#define HTTP_STATUS_CAPITALIZED "Status" + +#define HTTP_CONTENT_LENGTH "content-length" +#define HTTP_CONTENT_LENGTH_CAPITALIZED "Content-Length" + +#define HTTP_CONTENT_TYPE "content-type" +#define HTTP_CONTENT_TYPE_UPPER "CONTENT-TYPE" +#define HTTP_CONTENT_TYPE_CAPITALIZED "Content-Type" +#define HTTP_CONTENT_TYPE_FORM_URLENCODED "application/x-www-form-urlencoded" +#define HTTP_CONTENT_TYPE_MULTIPART_FORMDATA "multipart/form-data" +#define HTTP_CONTENT_TYPE_MULTIPART_RELATED "multipart/related" +#define HTTP_CONTENT_TYPE_MULTIPART_MIXED "multipart/mixed" + +#define CONTENT_TRANSFER_ENCODING_NAME "content-transfer-encoding" +#define CONTENT_TRANSFER_ENCODING_CAPITALIZED "Content-Transfer-Encoding" + +#define CONTENT_DISPOSITION "content-disposition" +#define CONTENT_DISPOSITION_CAPITALIZED "Content-Disposition" +#define CONTENT_DISPOSITION_ATTACHMENT "attachment" +#define CONTENT_DISPOSITION_INLINE "inline" +#define CONTENT_DISPOSITION_FILENAME_NAME "filename" + +#define BASE64_STRICT_OPTION_NAME "strict" + +const String http_content_type(HTTP_CONTENT_TYPE); + +const String content_transfer_encoding_name(CONTENT_TRANSFER_ENCODING_NAME); + +const String content_disposition(CONTENT_DISPOSITION); +const String content_disposition_inline(CONTENT_DISPOSITION_INLINE); +const String content_disposition_attachment(CONTENT_DISPOSITION_ATTACHMENT); +const String content_disposition_filename_name(CONTENT_DISPOSITION_FILENAME_NAME); + + +#define HASH_ORDER + +#ifdef HASH_ORDER +#undef PA_HASH_CLASS +#include "pa_hash.h" +#endif class Value; +typedef HASH_STRING HashStringValue; // replace system s*nprintf with our versions #undef vsnprintf -int __vsnprintf(char *, size_t, const char *, va_list); +int __vsnprintf(char *, size_t, const char* , va_list); #define vsnprintf __vsnprintf #undef snprintf -int __snprintf(char *, size_t, const char *, ...); +int __snprintf(char *, size_t, const char* , ...); #define snprintf __snprintf #if _MSC_VER /* -inline int open( const char *filename, int oflag ) { return _open(filename, oflag); } +inline int open( const char* filename, int oflag ) { return _open(filename, oflag); } inline int close( int handle ) { return _close(handle); } inline int read( int handle, void *buffer, unsigned int count ) { return _read(handle,buffer,count); } inline int write( int handle, const void *buffer, unsigned int count ) { return _write(handle,buffer,count); } -inline int stat( const char *path, struct _stat *buffer ) { return _stat(path, buffer); } +inline int stat( const char* path, struct _stat *buffer ) { return _stat(path, buffer); } inline long lseek( int handle, long offset, int origin ) { return _lseek(handle, offset, origin); } */ @@ -55,57 +103,86 @@ inline long lseek( int handle, long offs #endif -#ifdef HAVE_TRUNC -# ifndef trunc -extern "C" double trunc(double); -# endif -#else -inline double trunc(double param) { return param > 0? floor(param) : ceil(param); } -#endif - -#ifdef HAVE_ROUND -# ifndef round -extern "C" double round(double); -# endif -#else -inline double round(double param) { return floor(param+0.5); } -#endif -#ifdef HAVE_SIGN -# ifndef sign -extern "C" double sign(double); -# endif -#else -inline double sign(double param) { return param > 0 ? 1 : ( param < 0 ? -1 : 0 ); } -#endif - -/// yields to OS for secs secs and usecs milliseconds -int pa_sleep(unsigned long secs, unsigned long usecs); +const char* capitalize(const char* s); /** under WIN32 "t" mode fixes DOS chars OK, can't say that about other systems/ line break styles */ void fix_line_breaks( - char *buf, - size_t& size ///< may change! used to speedup next actions - ); + char *str, + size_t& length///< may change! used to speedup next actions + ); + +int pa_lock_shared_blocking(int fd); +int pa_lock_exclusive_blocking(int fd); +int pa_lock_exclusive_nonblocking(int fd); +int pa_unlock(int fd); + +void create_dir_for_file(const String& file_spec); + +typedef void (*File_read_action)( + struct stat& finfo, + int f, + const String& file_spec, const char* fname, bool as_text, + void *context); /** - read specified text file using pool, + shared-lock specified file, + do actions under lock. if fail_on_read_problem is true[default] throws an exception + + @returns true if read OK */ -char *file_read_text(Pool& pool, - const String& file_spec, - bool fail_on_read_problem=true); +bool file_read_action_under_lock(const String& file_spec, + const char* action_name, File_read_action action, void *context, + bool as_text=false, + bool fail_on_read_problem=true); /** - read specified file using pool, + read specified text file using if fail_on_read_problem is true[default] throws an exception + + WARNING: charset is used for http header case conversion, it's not a charset of input file! */ -bool file_read(Pool& pool, const String& file_spec, - void*& data, size_t& size, - bool as_text, - bool fail_on_read_problem=true, - size_t offset=0, size_t limit=0); +char *file_read_text(Request_charsets& charsets, + const String& file_spec, + bool fail_on_read_problem=true, + HashStringValue* options=0, + bool transcode_result=true); + +char *file_load_text(Request& r, + const String& file_spec, + bool fail_on_read_problem=true, + HashStringValue* options=0, + bool transcode_result=true); + +struct File_read_result { + bool success; + char* str; size_t length; + HashStringValue* headers; +}; + +/** + read specified file using + if fail_on_read_problem is true[default] throws an exception + + WARNING: charset is used for http header case conversion, it's not a charset of input file! +*/ +File_read_result file_read(Request_charsets& charsets, + const String& file_spec, + bool as_text, + HashStringValue* options=0, + bool fail_on_read_problem=true, + char* buf=0, size_t offset=0, size_t size=0, bool transcode_text_result=true); + +File_read_result file_load(Request& r, + const String& file_spec, + bool as_text, + HashStringValue* options=0, + bool fail_on_read_problem=true, + char* buf=0, size_t offset=0, size_t size=0, bool transcode_text_result=true); + +typedef void (*File_write_action)(int f, void *context); /** lock specified file exclusively, @@ -117,7 +194,9 @@ bool file_read(Pool& pool, const String& */ bool file_write_action_under_lock( const String& file_spec, - const char *action_name, void (*action)(int, void *), void *context=0, + const char* action_name, + File_write_action action, + void *context, bool as_text=false, bool do_append=false, bool do_block=true, @@ -128,33 +207,38 @@ bool file_write_action_under_lock( throws an exception in case of problems */ void file_write( + Request_charsets& charsets, const String& file_spec, - const void *data, size_t size, + const char* data, + size_t size, bool as_text, - bool do_append=false); + bool do_append=false, + Charset* asked_charset=0); /** delete specified file throws an exception in case of problems */ -bool file_delete(const String& file_spec, bool fail_on_read_problem=true); +bool file_delete(const String& file_spec, bool fail_on_problem=true, bool keep_empty_dirs=false); /** move specified file throws an exception in case of problems */ -void file_move(const String& old_spec, const String& new_spec); +void file_move(const String& old_spec, const String& new_spec, bool keep_empty_dirs=false); -bool file_readable(const String& file_spec); -bool dir_readable(const String& file_spec); -String *file_readable(const String& path, const String& name); +bool entry_exists(const char* fname, struct stat *afinfo=0); +bool entry_exists(const String& file_spec); +bool file_exist(const String& file_spec); +bool dir_exists(const String& file_spec); +const String* file_exist(const String& path, const String& name); bool file_executable(const String& file_spec); bool file_stat(const String& file_spec, - size_t& rsize, - time_t& ratime, - time_t& rmtime, - time_t& rctime, - bool fail_on_read_problem=true); + size_t& rsize, + time_t& ratime, + time_t& rmtime, + time_t& rctime, + bool fail_on_read_problem=true); /** scans for @a delim[default \n] in @a *row_ref, @@ -166,26 +250,13 @@ char *getrow(char **row_ref,char delim=' char *lsplit(char *string, char delim); char *lsplit(char **string_ref,char delim); char *rsplit(char *string, char delim); -char *format(Pool& pool, double value, char *fmt); - -#ifndef max -inline int max(int a, int b) { return a>b?a:b; } -inline int min(int a, int b){ return ab?a:b; } -inline size_t min(size_t a, size_t b){ return a - content-type: text/html - $content-type[$value[text/html] charset[windows-1251]] -> - content-type: text/html; charset=windows-1251 -*/ -const String& attributed_meaning_to_string(Value& meaning, String::Untaint_lang lang); +char *search_stop(char*& current, char cstop_at); #ifdef WIN32 void back_slashes_to_slashes(char *s); @@ -197,11 +268,150 @@ void back_slashes_to_slashes(char *s); qsort(names,cnt,sizeof_names,func_addr) #endif -bool StrEqNc(const char *s1, const char *s2, bool strict=true); +bool StrStartFromNC(const char* str, const char* substr, bool equal=false); +size_t strpos(const char *str, const char *substr); + +Charset* detect_charset(const char* content_type); #define SECS_PER_DAY (60*60*24) int getMonthDays(int year, int month); -void remove_crlf(char *start, char *end); +int remove_crlf(char *start, char *end); +inline bool pa_isalpha(unsigned char c) { + return (((c>='A') && (c<='Z')) || ((c>='a') && (c<='z'))); +} + +inline bool pa_isalnum(unsigned char c) { + return (((c>='0') && (c<='9')) || pa_isalpha(c)); +} + +void check_safe_mode(struct stat finfo, const String& file_spec, const char* fname); + +void pa_base64_decode(const char *in, size_t in_size, char*& result, size_t& result_size, bool strict=false); +char* pa_base64_encode(const char *in, size_t in_size); +struct File_base64_action_info { + unsigned char** base64; +}; +char* pa_base64_encode(const String& file_spec); +static void file_base64_file_action( + struct stat& finfo, + int f, + const String&, const char* /*fname*/, bool, + void *context); + +#define FILE_BUFFER_SIZE 4096 +static unsigned long crc32Table[256]; +static void InitCrc32Table() +{ + if(crc32Table[1] == 0){ + // This is the official polynomial used by CRC32 in PKZip. + // Often times the polynomial shown reversed as 0x04C11DB7. + static const unsigned long dwPolynomial = 0xEDB88320; + + for(int i = 0; i < 256; i++) + { + unsigned long dwCrc = i; + for(int j = 8; j > 0; j--) + { + if(dwCrc & 1) + dwCrc = (dwCrc >> 1) ^ dwPolynomial; + else + dwCrc >>= 1; + } + crc32Table[i] = dwCrc; + } + } +} + +int file_block_read(const int f, unsigned char* buffer, const size_t size); + +inline void CalcCrc32(const unsigned char byte, unsigned long &crc32) +{ + crc32 = ((crc32) >> 8) ^ crc32Table[(byte) ^ ((crc32) & 0x000000FF)]; +} + +const unsigned long pa_crc32(const char *in, size_t in_size); +const unsigned long pa_crc32(const String& file_spec); +static void file_crc32_file_action( + struct stat& finfo, + int f, + const String&, const char* /*fname*/, bool, + void *context); + +static const char* hex_string(unsigned char* bytes, size_t size, bool upcase) { + char *bytes_hex=new(PointerFreeGC) char [size*2/*byte->hh*/+1/*for zero-teminator*/]; + unsigned char *src=bytes; + unsigned char *end=bytes+size; + char *dest=bytes_hex; + + const char *hex=upcase?"0123456789ABCDEF":"0123456789abcdef"; + + for(; srcput(key, value); +} + +static void remove_key_from( + HashStringValue::key_type key, + HashStringValue::value_type /*value*/, + HashStringValue* dest) { + dest->remove(key); +} + +static String::C date_gmt_string(tm* tms) { + /// http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3 + static const char month_names[12][4]={ + "Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"}; + static const char days[7][4]={ + "Sun","Mon","Tue","Wed","Thu","Fri","Sat"}; + + char *buf=new(PointerFreeGC) char[MAX_STRING]; + return String::C(buf, + snprintf(buf, MAX_STRING, "%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT", + days[tms->tm_wday], + tms->tm_mday,month_names[tms->tm_mon],tms->tm_year+1900, + tms->tm_hour,tms->tm_min,tms->tm_sec)); +} + +// globals + +extern const String file_status_name; + +// global defines for file options which are handled but not checked elsewhere, we check them + +#define PA_SQL_LIMIT_NAME "limit" +#define PA_SQL_OFFSET_NAME "offset" +#define PA_COLUMN_SEPARATOR_NAME "separator" +#define PA_COLUMN_ENCLOSER_NAME "encloser" +#define PA_CHARSET_NAME "charset" +#define PA_RESPONSE_CHARSET_NAME "response-charset" + +// globals defines for sql options + +#define SQL_BIND_NAME "bind" +#define SQL_DEFAULT_NAME "default" +#define SQL_DISTINCT_NAME "distinct" +#define SQL_VALUE_TYPE_NAME "type" + +#ifndef DOXYGEN +enum Table2hash_distint { D_ILLEGAL, D_FIRST }; +enum Table2hash_value_type { C_HASH, C_STRING, C_TABLE }; #endif + +#endif +