--- parser3/src/include/pa_string.h 2003/09/23 13:53:04 1.145.4.4 +++ parser3/src/include/pa_string.h 2003/09/25 09:15:02 1.146 @@ -8,7 +8,7 @@ #ifndef PA_STRING_H #define PA_STRING_H -static const char* IDENT_STRING_H="$Date: 2003/09/23 13:53:04 $"; +static const char* IDENT_STRING_H="$Date: 2003/09/25 09:15:02 $"; // includes @@ -20,15 +20,10 @@ extern "C" { // cord's author forgot to #include "cord.h" }; -/// must use simple version, without optimization for short pieces -/// it's much more convinient for iterating. we rely on this here: CORD_block_iter -#define CORD_chars(c, i) CORD_chars_block((c), (i)) -extern "C" CORD CORD_chars_block(char c, size_t i); - // cord extension /* Returns true if x does contain */ /* char not_c at positions i..i+n. Value i,i+n must be < CORD_len(x). */ -int CORD_nnchr(CORD x, size_t i, size_t n, int not_c); +int CORD_range_contains_chr_greater_then(CORD x, size_t i, size_t n, int c); // forwards @@ -43,92 +38,6 @@ typedef Array ArrayString /// this is result of pos functions which mean that substr were not found #define STRING_NOT_FOUND ((size_t)-1) -class StringBody { - - CORD body; - -public: - - StringBody(): body(CORD_EMPTY) {} - StringBody(CORD abody): body(abody) { - assert(!body // no body - || *body // ordinary string - || body[1]==1 // CONCAT_HDR - || body[1]==4 // FN_HDR - || body[1]==6 // SUBSTR_HDR - ); - } - /// WARNING: length is only HELPER length, str in ANY case should be zero-terminated - StringBody(const char* str, size_t helper_length): body(CORD_EMPTY) { - append_know_length(str, helper_length?helper_length:strlen(str)); - } - static StringBody Format(int value); - - void clear() { body=CORD_EMPTY; } - - bool operator! () const { return is_empty(); } - - uint hash_code() const; - - const char* cstr() const { return CORD_to_const_char_star(body); } - char* cstrm() const { return CORD_to_char_star(body); } - - size_t length() const { return CORD_len(body); } - - bool is_empty() const { return body==CORD_EMPTY; } - - void append_know_length(const char *str, size_t known_length) { - if(known_length) - body=CORD_cat_char_star(body, str, known_length); - } - void append_strdup_know_length(const char* str, size_t known_length) { - if(known_length) - append_know_length(pa_strdup(str, known_length), known_length); - } - void append(char c) { body=CORD_cat_char(body, c); } - StringBody& operator << (const StringBody src) { body=CORD_cat(body, src.body); return *this; } - StringBody& operator << (const char* str) { append_know_length(str, strlen(str)); return *this; } - - // could not figure out why this operator is needed [should do this chain: string->simple->==] - bool operator < (const StringBody src) const { return CORD_cmp(body, src.body)<0; } - bool operator > (const StringBody src) const { return CORD_cmp(body, src.body)>0; } - bool operator <= (const StringBody src) const { return CORD_cmp(body, src.body)<=0; } - bool operator >= (const StringBody src) const { return CORD_cmp(body, src.body)>=0; } - bool operator != (const StringBody src) const { return CORD_cmp(body, src.body)!=0; } - bool operator == (const StringBody src) const { return CORD_cmp(body, src.body)==0; } - - int ncmp(size_t x_begin, const StringBody y, size_t y_begin, size_t size) const { - return CORD_ncmp(body, x_begin, y.body, y_begin, size); - } - - char fetch(size_t index) const { return CORD_fetch(body, index); } - StringBody mid(size_t index, size_t length) const { return CORD_substr(body, index, length); } - size_t pos(const char* substr, size_t offset=0) const { return CORD_str(body, offset, substr); } - size_t pos(const StringBody substr, size_t offset=0) const { - if(!substr.length()) - return STRING_NOT_FOUND; // in this case CORD_str returns 0 [parser users got used to -1] - return CORD_str(body, offset, substr.body); - } - size_t pos(char c, - size_t offset=0) const { - return CORD_chr(body, offset, c); - } - -/* template void for_each(int (*callback)(const char* s, I), I info) const { - CORD_iter5(body, 0, 0, (CORD_batched_iter_fn)callback, info); - }*/ - - void set_pos(CORD_pos& pos, size_t index) const { CORD_set_pos(pos, body, index); } - - /*StringBody normalize() const { - return StringBody(CORD_balance(body)); - }*/ - - void dump() const { - CORD_dump(body); - } -}; - template inline size_t get_length(T current) { return current; @@ -149,7 +58,7 @@ public: enum Language { L_UNSPECIFIED=0, ///< no real string has parts of this lange: it's just convinient to check when string's empty // these two must go before others, there are checks for >L_AS_IS - L_CLEAN, ///< clean + L_CLEAN='1', ///< clean L_AS_IS, ///< leave all characters intact L_PASS_APPENDED, @@ -176,7 +85,7 @@ public: union { struct { Language lang:8; - int is_not_just_lang:16-8; + int is_not_just_lang:sizeof(CORD)*8-8; }; CORD langs; }; @@ -213,6 +122,8 @@ public: public: + const char* v() const; + Languages(): langs(0) {} Languages(Language alang): lang(alang), is_not_just_lang(0) {} @@ -266,15 +177,15 @@ public: append(current, src.make_langs(aoffset, alength)); } - /// checks if we have alang all from aoffset to aoffset+alength + /// checks if we have lang<=alang all from aoffset to aoffset+alength bool check_lang(Language alang, size_t aoffset, size_t alength) const { if(alang==L_UNSPECIFIED) // ignore lang? return true; if(is_not_just_lang) - return CORD_nnchr(langs, aoffset, alength, (unsigned)alang)!=0; + return CORD_range_contains_chr_greater_then(langs, aoffset, alength, (unsigned)alang)==0; else - return lang==alang; + return lang<=alang; } template @@ -296,6 +207,91 @@ public: } }; + class Body { + + CORD body; + + public: + + const char* v() const; + + Body(): body(CORD_EMPTY) {} + Body(CORD abody): body(abody) { + assert(!body // no body + || *body // ordinary string + || body[1]==1 // CONCAT_HDR + || body[1]==4 // FN_HDR + || body[1]==6 // SUBSTR_HDR + ); + } + /// WARNING: length is only HELPER length, str in ANY case should be zero-terminated + Body(const char* str, size_t helper_length): body(CORD_EMPTY) { + append_know_length(str, helper_length?helper_length:strlen(str)); + } + static Body Format(int value); + + void clear() { body=CORD_EMPTY; } + + bool operator! () const { return is_empty(); } + + uint hash_code() const; + + const char* cstr() const { return CORD_to_const_char_star(body); } + char* cstrm() const { return CORD_to_char_star(body); } + + size_t length() const { return CORD_len(body); } + + bool is_empty() const { return body==CORD_EMPTY; } + + void append_know_length(const char *str, size_t known_length) { + if(known_length) + body=CORD_cat_char_star(body, str, known_length); + } + void append_strdup_know_length(const char* str, size_t known_length) { + if(known_length) + append_know_length(pa_strdup(str, known_length), known_length); + } + void append(char c) { body=CORD_cat_char(body, c); } + Body& operator << (const Body src) { body=CORD_cat(body, src.body); return *this; } + Body& operator << (const char* str) { append_know_length(str, strlen(str)); return *this; } + + // could not figure out why this operator is needed [should do this chain: string->simple->==] + bool operator < (const Body src) const { return CORD_cmp(body, src.body)<0; } + bool operator > (const Body src) const { return CORD_cmp(body, src.body)>0; } + bool operator <= (const Body src) const { return CORD_cmp(body, src.body)<=0; } + bool operator >= (const Body src) const { return CORD_cmp(body, src.body)>=0; } + bool operator != (const Body src) const { return CORD_cmp(body, src.body)!=0; } + bool operator == (const Body src) const { return CORD_cmp(body, src.body)==0; } + + int ncmp(size_t x_begin, const Body y, size_t y_begin, size_t size) const { + return CORD_ncmp(body, x_begin, y.body, y_begin, size); + } + + char fetch(size_t index) const { return CORD_fetch(body, index); } + Body mid(size_t index, size_t length) const { return CORD_substr(body, index, length); } + size_t pos(const char* substr, size_t offset=0) const { return CORD_str(body, offset, substr); } + size_t pos(const Body substr, size_t offset=0) const { + if(!substr.length()) + return STRING_NOT_FOUND; // in this case CORD_str returns 0 [parser users got used to -1] + return CORD_str(body, offset, substr.body); + } + size_t pos(char c, + size_t offset=0) const { + return CORD_chr(body, offset, c); + } + + /* template void for_each(int (*callback)(const char* s, I), I info) const { + CORD_iter5(body, 0, 0, (CORD_batched_iter_fn)callback, info); + }*/ + + void set_pos(CORD_pos& pos, size_t index) const { CORD_set_pos(pos, body, index); } + + /*Body normalize() const { + return Body(CORD_balance(body)); + }*/ + }; + + struct C { const char *str; size_t length; @@ -313,7 +309,9 @@ public: private: Languages langs; ///< string characters lang - StringBody body; ///< all characters of string + Body body; ///< all characters of string + + const char* v() const; #define ASSERT_STRING_INVARIANT(string) \ assert((string).langs.invariant((string).body.length())) @@ -322,7 +320,7 @@ public: explicit String(const char* cstr=0, size_t helper_length=0, bool tainted=false); explicit String(const C cstr, bool tainted=false); - String(StringBody abody, Language alang): body(abody), langs(alang) { + String(Body abody, Language alang): body(abody), langs(alang) { assert(!body.is_empty()); ASSERT_STRING_INVARIANT(*this); } @@ -331,13 +329,13 @@ public: } /// for convinient hash lookup - operator const StringBody() const { return body; } + operator const Body() const { return body; } bool is_empty() const { return body.is_empty(); } size_t length() const { return body.length(); } /// convert to CORD. if 'lang' known, forcing 'lang' to it - StringBody cstr_to_string_body(Language lang=L_AS_IS, + Body cstr_to_string_body(Language lang=L_AS_IS, SQL_Connection* connection=0, const Request_charsets *charsets=0) const; @@ -357,22 +355,22 @@ public: Cm serialize(size_t prolog_size) const; /// appends pieces from buf to self bool deserialize(size_t prolog_size, void *buf, size_t buf_size); - /// @see StringBody::append_know_length + /// @see Body::append_know_length String& append_know_length(const char* str, size_t known_length, Language lang); - /// @see StringBody::append_help_length + /// @see Body::append_help_length String& append_help_length(const char* str, size_t helper_length, Language lang); String& append_strdup(const char* str, size_t helper_length, Language lang); - bool operator == (const char* y) const { return body==StringBody(y); } - bool operator != (const char* y) const { return body!=StringBody(y); } + bool operator == (const char* y) const { return body==Body(y); } + bool operator != (const char* y) const { return body!=Body(y); } /// this starts with y bool starts_with(const char* y) const { - return body.ncmp(0/*x_begin*/, StringBody(y), 0/*y_begin*/, strlen(y))==0; + return body.ncmp(0/*x_begin*/, Body(y), 0/*y_begin*/, strlen(y))==0; } /// x starts with this bool this_starts(const char* x) const { - return StringBody(x).ncmp(0/*x_begin*/, body, 0/*y_begin*/, length())==0; + return Body(x).ncmp(0/*x_begin*/, body, 0/*y_begin*/, length())==0; } String& append_to(String& dest, Language lang, bool forced) const; @@ -381,7 +379,7 @@ public: } String& operator << (const String& src) { return append(src, L_PASS_APPENDED); } String& operator << (const char* src) { return append_help_length(src, 0, L_AS_IS); } - String& operator << (const StringBody src) { + String& operator << (const Body src) { langs.append(body, L_AS_IS, src.length()); body< -inline size_t get_length(StringBody body) { +inline size_t get_length(String::Body body) { return body.length(); } /// simple hash code of string. used by Hash -inline uint hash_code(const StringBody self) { +inline uint hash_code(const String::Body self) { return self.hash_code(); }