--- parser3/src/include/pa_string.h 2002/04/19 08:28:35 1.136 +++ parser3/src/include/pa_string.h 2003/01/23 15:38:05 1.144.2.2 @@ -1,17 +1,18 @@ /** @file Parser: string class decl. - Copyright (c) 2001, 2002 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001, 2003 ArtLebedev Group (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) - - $Id: pa_string.h,v 1.136 2002/04/19 08:28:35 paf Exp $ */ #ifndef PA_STRING_H #define PA_STRING_H +static const char* IDENT_STRING_H="$Date: 2003/01/23 15:38:05 $"; + #include "pa_pool.h" #include "pa_types.h" +#include "pa_array.h" #ifndef NO_STRING_ORIGIN # define STRING_APPEND_PARAMS \ @@ -43,38 +44,35 @@ #define APPEND_CONST(src) APPEND_AS_IS(src, 0, 0, 0) class Table; -class Array; class SQL_Connection; class Dictionary; -/** - Pooled string. +struct String_fragment { + const char *ptr; ///< pointer to the start + size_t size; ///< length + int/*Untaint_lang*/ lang; ///< untaint flag, later untaint language + +#ifndef NO_STRING_ORIGIN + /// all String pieces hold information of where they come from + struct Origin { + const char *file; ///< macros file name | load file name | sql query text + uint line; ///< file line no | record no + } origin; ///< origin +#endif +}; - Internal structure: - @verbatim - String Chunk0 - ====== ======== - head--------------->[ptr, size, ...] - append_here-------->[ptr, size, ...] - . - . - [ptr, size, ...] - link_row----------->[link to the next chunk] - @endverbatim +/** + String which knows the language of all it's fragments. All pieces remember - the file and its line they are from [can be turned off by NO_STRING_ORIGIN] - whether they are tainted or not, and the language which should be used to detaint them */ -#include "pa_pragma_pack_begin.h" -class String : public Pooled { -public: - enum { - CR_PREALLOCATED_COUNT=2, ///< default preallocated item count - CR_GROW_COUNT=1 ///< each time the String chunk_is_full() string expanded() - }; +class String: public Array { + size_t fsize; +public: /** piece is tainted or not. the language to use when detaint remember to change String_Untaint_lang_name @ untaint.C along @@ -106,10 +104,11 @@ public: public: - String(Pool& apool, const char *src=0, size_t src_size=0, bool tainted=false); + //static String& OnPool(Pool& apool, const char *local_src=0, size_t src_size=0, bool tainted=false); + String(const char *src=0, size_t src_size=0, bool tainted=false); String(const String& src); - bool is_empty() const { return append_here==head.chunk.rows; } - size_t size() const; + bool is_empty() const { return !size(); } + size_t size() const { return fsize; } /// convert to C string. if 'lang' known, forcing 'lang' to it char *cstr(Untaint_lang lang=UL_AS_IS, SQL_Connection *connection=0, @@ -153,6 +152,11 @@ public: */ int cmp(int& partial, const char* src_ptr, size_t src_size=0, size_t this_offset=0, Untaint_lang lang=UL_UNSPECIFIED) const; + /// this starts with src + bool starts_with(const char* src_ptr, size_t src_size=0) const { + int p; cmp(p, src_ptr, src_size); + return p==0 || p==2; + } bool operator == (const char* src_ptr) const { size_t src_size=src_ptr?strlen(src_ptr):0; if(size() != src_size) @@ -172,8 +176,10 @@ public: /// simple hash code of string. used by Hash uint hash_code() const; - /// extracts first char of a string - char first_char() const; + /// extracts first char of a string, if any + char first_char() const { + return is_empty()?0:*felements[0].ptr; + } /// extracts [start, finish) piece of string String& mid(size_t start, size_t finish) const; @@ -205,7 +211,7 @@ public: prematch/match/postmatch/1/2/3/... @endverbatim */ - bool match( + bool match(Charset& source_charset, const String *aorigin, const String& regexp, const String *options, @@ -216,66 +222,21 @@ public: CC_UPPER, CC_LOWER }; - String& change_case(Pool& pool, + String& change_case(Charset& source_charset, Change_case_kind kind) const; - String& replace(Pool& pool, Dictionary& dict) const; + String& replace(Dictionary& dict) const; double as_double() const; int as_int() const; -#ifndef NO_STRING_ORIGIN - /// origin of string. calculated by first row - const Origin& origin() const; -#endif - -private: + String& join_chains(char** cstr) const; - /** several String fragments - */ - struct Chunk { - typedef uchar count_type; - count_type count; ///< the number of rows in chunk - // here could be some padding bytes - /// string fragment or a link to next chunk union - typedef union Row { - typedef uchar item_size_type; - /// fragment - struct { - const char *ptr; ///< pointer to the start - item_size_type size; ///< length - uchar/*Untaint_lang*/ lang; ///< untaint flag, later untaint language #ifndef NO_STRING_ORIGIN - Origin origin; ///< origin + /// origin of string. calculated by first row + const String_fragment::Origin& origin() const; #endif - } item; - // we are using the fact that there's no padding before this field! - Chunk *link; ///< link to the next chunk in chain - } rows_type[CR_PREALLOCATED_COUNT]; - rows_type rows; - }; - /** - 'mutable' because can write after it's end, after it was appended to somebody - @see String::append - */ - mutable struct { - Chunk chunk; - Chunk *link_storage; - } head; ///< the head chunk of the chunk chain - - /// next append would write to this record - Chunk::Row *append_here; - -private: - /// last chunk - mutable Chunk *last_chunk; private: - bool chunk_is_full() { - return append_here == last_chunk->rows+last_chunk->count; - } - uint used_rows() const; - void expand(); - Untaint_lang lang_of(size_t offset) const; size_t cstr_bufsize(Untaint_lang lang, @@ -286,37 +247,15 @@ private: SQL_Connection *connection=0, Charset *store_to_charset=0, const char *store_to_charset_name=0) const; - - void join_chain(Pool& pool, +/* + void join_chain( const Chunk*& achunk, const Chunk::Row*& arow, uint& acountdown, uchar& joined_lang, const char *& joined_ptr, size_t& joined_size) const; - +*/ private: //disabled String& operator = (const String&) { return *this; } }; -#include "pa_pragma_pack_end.h" - -#define STRING_PREPARED_FOREACH_ROW(self, body) \ - while(row!=(self).append_here) { \ - if(countdown==0) { \ - chunk=row->link; \ - row=chunk->rows; \ - countdown=chunk->count; \ - }; \ - { body } \ - row++; countdown--; \ - } - -#define STRING_PREFIX_FOREACH_ROW(self, body) { \ - const Chunk *chunk=&(self).head.chunk; \ - const Chunk::Row *row=chunk->rows; \ - uint countdown=chunk->count; \ - STRING_PREPARED_FOREACH_ROW(self, body) \ -} - -#define STRING_FOREACH_ROW(body) STRING_PREFIX_FOREACH_ROW(*this, body) -#define STRING_SRC_FOREACH_ROW(body) STRING_PREFIX_FOREACH_ROW(src, body) #endif