--- parser3/src/include/pa_string.h 2002/04/19 08:28:35 1.136 +++ parser3/src/include/pa_string.h 2003/03/05 11:42:04 1.144.2.26 @@ -1,31 +1,44 @@ /** @file Parser: string class decl. - Copyright (c) 2001, 2002 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2003 ArtLebedev Group (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) - - $Id: pa_string.h,v 1.136 2002/04/19 08:28:35 paf Exp $ */ #ifndef PA_STRING_H #define PA_STRING_H +static const char* IDENT_STRING_H="$Date: 2003/03/05 11:42:04 $"; + +// includes + #include "pa_pool.h" #include "pa_types.h" +#include "pa_array.h" + +// forwards + +class Table; DECLARE_OBJECT_PTR(Table); +class SQL_Connection; +class Dictionary; +class Request_charsets; +typedef Array ArrayString; DECLARE_OBJECT_PTR(ArrayString); + +// helpers #ifndef NO_STRING_ORIGIN # define STRING_APPEND_PARAMS \ - const char *src, size_t size, \ - uchar lang, \ - const char *file, uint line + const char* src, size_t size, \ + String_UL lang, \ + const char* file, uint line /// appends piece to String @see String::real_append # define APPEND(src, size, lang, file, line) \ real_append(src, size, lang, file, line) #else # define STRING_APPEND_PARAMS \ - const char *src, \ + const char* src, \ size_t size, \ - uchar lang + String_UL lang /// appends piece to String @see String::real_append # define APPEND(src, size, lang, file, line) \ real_append(src, size, lang) @@ -42,39 +55,35 @@ /// handy: appends const char* piece to String @see String::real_append #define APPEND_CONST(src) APPEND_AS_IS(src, 0, 0, 0) -class Table; -class Array; -class SQL_Connection; -class Dictionary; +typedef uint String_UL; -/** - Pooled string. +struct String_fragment { + String_UL/*Untaint_lang*/ lang; ///< untaint flag, later untaint language + const char* ptr; ///< pointer to the start + size_t size; ///< length + +#ifndef NO_STRING_ORIGIN + /// all String pieces hold information of where they come from + struct Origin { + const char* file; ///< macros file name | load file name | sql query text + uint line; ///< file line no | record no + } origin; ///< origin +#endif +}; - Internal structure: - @verbatim - String Chunk0 - ====== ======== - head--------------->[ptr, size, ...] - append_here-------->[ptr, size, ...] - . - . - [ptr, size, ...] - link_row----------->[link to the next chunk] - @endverbatim +/** + String which knows the language of all it's fragments. All pieces remember - the file and its line they are from [can be turned off by NO_STRING_ORIGIN] - whether they are tainted or not, and the language which should be used to detaint them */ -#include "pa_pragma_pack_begin.h" -class String : public Pooled { -public: - enum { - CR_PREALLOCATED_COUNT=2, ///< default preallocated item count - CR_GROW_COUNT=1 ///< each time the String chunk_is_full() string expanded() - }; +class String: public Array { + size_t fsize; + int count() const { return 0; } // hiding fro accidental use instead of size() +public: /** piece is tainted or not. the language to use when detaint remember to change String_Untaint_lang_name @ untaint.C along @@ -101,30 +110,38 @@ public: UL_JS, ///< JavaScript code UL_XML, ///< ^dom:set xml UL_HTML, ///< HTML code (for editing) - UL_OPTIMIZE_BIT = 0x80 ///< flag, requiring cstr whitespace optimization + UL_OPTIMIZE_BIT = 0x8000 ///< flag, requiring cstr whitespace optimization }; public: - String(Pool& apool, const char *src=0, size_t src_size=0, bool tainted=false); + explicit String(const char* src=0, size_t src_size=0, bool tainted=false); String(const String& src); - bool is_empty() const { return append_here==head.chunk.rows; } - size_t size() const; + bool is_empty() const { return !size(); } + size_t size() const { return fsize; } /// convert to C string. if 'lang' known, forcing 'lang' to it - char *cstr(Untaint_lang lang=UL_AS_IS, + CharPtr cstr(Untaint_lang lang=UL_AS_IS, SQL_Connection *connection=0, - Charset *cstr_charset=0, const char *cstr_charset_name=0) const { + const Request_charsets *charsets=0) const { - char *result=(char *)malloc(cstr_bufsize(lang, connection, cstr_charset)); - char *eol=store_to(result, lang, connection, cstr_charset, cstr_charset_name); + CharPtr result(new char[cstr_bufsize(lang, connection, charsets)]); + char *eol=store_to(result.get(), lang, connection, charsets); + *eol=0; + return result; + } + char *cstr(Pool& pool, Untaint_lang lang=UL_AS_IS, + SQL_Connection *connection=0, + const Request_charsets *charsets=0) const { + char *result=new(pool) char[cstr_bufsize(lang, connection, charsets)]; + char *eol=store_to(result, lang, connection, charsets); *eol=0; return result; } - char *cstr_debug_origins() const; + char *cstr_debug_origins(Pool& pool) const; /// puts pieces to buf - void serialize(size_t prolog_size, void *& buf, size_t& buf_size) const; + void serialize(Pool& pool, size_t prolog_size, char *& buf, size_t& buf_size) const; /// appends pieces from buf to self - bool deserialize(size_t prolog_size, void *buf, size_t buf_size, const char *file); + bool deserialize(size_t prolog_size, void *buf, size_t buf_size, const char* file); /** append fragment @see APPEND_AS_IS, APPEND_CLEAN, APPEND_TAINTED, APPEND_CONST */ @@ -153,6 +170,11 @@ public: */ int cmp(int& partial, const char* src_ptr, size_t src_size=0, size_t this_offset=0, Untaint_lang lang=UL_UNSPECIFIED) const; + /// this starts with src + bool starts_with(const char* src_ptr, size_t src_size=0) const { + int p; cmp(p, src_ptr, src_size); + return p==0 || p==2; + } bool operator == (const char* src_ptr) const { size_t src_size=src_ptr?strlen(src_ptr):0; if(size() != src_size) @@ -165,158 +187,115 @@ public: return cmp(partial, src_ptr, 0)!=0; } - String& append(const String& src, uchar lang, bool forced=false); + String& append_to(String& dest, String_UL lang, bool forced) const; + String& append(const String& src, String_UL lang, bool forced=false) { + return src.append_to(*this, lang, forced); + } String& operator << (const String& src) { return append(src, UL_PASS_APPENDED); } - String& operator << (const char *src) { return APPEND_CONST(src); } + String& operator << (StringPtr src) { return append(*src, UL_PASS_APPENDED); } + String& operator << (const char* src) { return APPEND_CONST(src); } - /// simple hash code of string. used by Hash + /// simple hash code of string uint hash_code() const; - /// extracts first char of a string - char first_char() const; + /// extracts first char of a string, if any + char first_char() const { + return is_empty()?0:*felements[0].ptr; + } /// extracts [start, finish) piece of string - String& mid(size_t start, size_t finish) const; + StringPtr mid(size_t start, size_t finish) const; /// @return position of substr in string, -1 means "not found" [String version] int pos(const String& substr, int this_offset=0, Untaint_lang lang=UL_UNSPECIFIED) const; /// @return position of substr in string, -1 means "not found" [const char* version] - int pos(const char *substr, size_t substr_size=0, + int pos(const char* substr, size_t substr_size=0, int this_offset=0, Untaint_lang lang=UL_UNSPECIFIED) const; - void split(Array& result, + void split(ArrayString& result, size_t *pos_after_ref, - const char *delim, size_t delim_size, - Untaint_lang lang=UL_UNSPECIFIED, int limit=-1) const; - void split(Array& result, + const char* delim, size_t delim_size, + Untaint_lang lang=UL_UNSPECIFIED, int limit=-1); + void split(ArrayString& result, size_t *pos_after_ref, const String& delim, - Untaint_lang lang=UL_UNSPECIFIED, int limit=-1) const; + Untaint_lang lang=UL_UNSPECIFIED, int limit=-1); - typedef void (*Row_action)(Table& table, Array *row, + typedef void (*Row_action)(TablePtr table, ArrayStringPtr row, int prestart, int prefinish, int poststart, int postfinish, void *info); /** - @return true if fills table. + @return table of found items, if any. table format is defined and fixed[can be used by others]: @verbatim prematch/match/postmatch/1/2/3/... @endverbatim */ - bool match( - const String *aorigin, + TablePtr match(Charset& source_charset, + StringPtr aorigin, const String& regexp, - const String *options, - Table **table, + StringPtr options, Row_action row_action, void *info, bool *was_global=0) const; enum Change_case_kind { CC_UPPER, CC_LOWER }; - String& change_case(Pool& pool, - Change_case_kind kind) const; - String& replace(Pool& pool, Dictionary& dict) const; - double as_double() const; - int as_int() const; - -#ifndef NO_STRING_ORIGIN - /// origin of string. calculated by first row - const Origin& origin() const; -#endif + StringPtr change_case(Pool& pool, Charset& source_charset, + Change_case_kind kind); + StringPtr replace(Pool& pool, const Dictionary& dict) const; + double as_double(); + int as_int(); -private: + StringPtr join_chains(Pool& pool) const; - /** several String fragments - */ - struct Chunk { - typedef uchar count_type; - count_type count; ///< the number of rows in chunk - // here could be some padding bytes - /// string fragment or a link to next chunk union - typedef union Row { - typedef uchar item_size_type; - /// fragment - struct { - const char *ptr; ///< pointer to the start - item_size_type size; ///< length - uchar/*Untaint_lang*/ lang; ///< untaint flag, later untaint language #ifndef NO_STRING_ORIGIN - Origin origin; ///< origin + /// origin of string. calculated by first row + const String_fragment::Origin& origin() const; #endif - } item; - // we are using the fact that there's no padding before this field! - Chunk *link; ///< link to the next chunk in chain - } rows_type[CR_PREALLOCATED_COUNT]; - rows_type rows; - }; - /** - 'mutable' because can write after it's end, after it was appended to somebody - @see String::append - */ - mutable struct { - Chunk chunk; - Chunk *link_storage; - } head; ///< the head chunk of the chunk chain - - /// next append would write to this record - Chunk::Row *append_here; - -private: - /// last chunk - mutable Chunk *last_chunk; - -private: - bool chunk_is_full() { - return append_here == last_chunk->rows+last_chunk->count; - } - uint used_rows() const; - void expand(); - - Untaint_lang lang_of(size_t offset) const; - - size_t cstr_bufsize(Untaint_lang lang, - SQL_Connection *connection, - Charset *buf_charset) const; + size_t cstr_bufsize(Untaint_lang lang=UL_UNSPECIFIED, + SQL_Connection *connection=0, + const Request_charsets *charsets=0) const; /// convert to C string, store to 'dest' which must be big enough for proper untaint char *store_to(char *dest, Untaint_lang lang=UL_UNSPECIFIED, SQL_Connection *connection=0, - Charset *store_to_charset=0, - const char *store_to_charset_name=0) const; - - void join_chain(Pool& pool, + const Request_charsets *charsets=0) const; +/* + void join_chain( const Chunk*& achunk, const Chunk::Row*& arow, uint& acountdown, - uchar& joined_lang, const char *& joined_ptr, size_t& joined_size) const; - + String_UL& joined_lang, const char* & joined_ptr, size_t& joined_size) const; +*/ private: //disabled String& operator = (const String&) { return *this; } }; -#include "pa_pragma_pack_end.h" -#define STRING_PREPARED_FOREACH_ROW(self, body) \ - while(row!=(self).append_here) { \ - if(countdown==0) { \ - chunk=row->link; \ - row=chunk->rows; \ - countdown=chunk->count; \ - }; \ - { body } \ - row++; countdown--; \ - } - -#define STRING_PREFIX_FOREACH_ROW(self, body) { \ - const Chunk *chunk=&(self).head.chunk; \ - const Chunk::Row *row=chunk->rows; \ - uint countdown=chunk->count; \ - STRING_PREPARED_FOREACH_ROW(self, body) \ + +/// simple hash code of string. used by Hash +inline uint hash_code(const String& self) { + return self.hash_code(); } +/// simple hash code of string. used by Hash +inline uint hash_code(StringPtr self) { + return self->hash_code(); +} + +// helpers -#define STRING_FOREACH_ROW(body) STRING_PREFIX_FOREACH_ROW(*this, body) -#define STRING_SRC_FOREACH_ROW(body) STRING_PREFIX_FOREACH_ROW(src, body) +#define STRING_PREPARED_FOREACH_FRAGMENT(code) \ + for(; fragment