--- parser3/src/include/pa_string.h 2015/04/08 17:36:19 1.207 +++ parser3/src/include/pa_string.h 2016/05/24 11:55:13 1.215 @@ -1,14 +1,14 @@ /** @file Parser: string class decl. - Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com) + Copyright (c) 2001-2015 Art. Lebedev Studio (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ #ifndef PA_STRING_H #define PA_STRING_H -#define IDENT_PA_STRING_H "$Id: pa_string.h,v 1.207 2015/04/08 17:36:19 moko Exp $" +#define IDENT_PA_STRING_H "$Id: pa_string.h,v 1.215 2016/05/24 11:55:13 moko Exp $" // includes #include "pa_types.h" @@ -22,7 +22,7 @@ extern "C" { // cord's author forgot to #define CORD_cat(x, y) CORD_cat_optimized(x, y) #define CORD_cat_char_star(x, y, leny) CORD_cat_char_star_optimized(x, y, leny) #endif -}; +} // defines @@ -55,10 +55,14 @@ class VRegex; int pa_atoi(const char* str, const String* problem_source=0); double pa_atod(const char* str, const String* problem_source=0); unsigned int pa_atoui(const char *str, int base, const String* problem_source=0); +unsigned long long int pa_atoul(const char *str, int base, const String* problem_source=0); /// this is result of pos functions which mean that substr were not found #define STRING_NOT_FOUND ((size_t)-1) +/// CORD can't be empty string, thus checking it in assigment +#define AS_CORD(v) ((v) && *(v) ? (CORD)(v):0) + /** String which knows the lang of all it's langs. @@ -185,7 +189,7 @@ public: assert(alang); assert(length); - if(!opt.is_not_just_lang) + if(!opt.is_not_just_lang) { if(opt.lang) { if(opt.lang==alang) // same language? ignoring return; @@ -193,6 +197,7 @@ public: opt.lang=alang; // to uninitialized return; } + } append(current, CORD_chars((char)alang, length)); } @@ -201,7 +206,7 @@ public: assert(alang); assert(length); - if(!opt.is_not_just_lang) + if(!opt.is_not_just_lang) { if(opt.lang) { if(opt.lang==alang) // same language? ignoring return; @@ -209,6 +214,7 @@ public: opt.lang=alang; // to uninitialized return; } + } append(current, CORD_chars((char)alang, length)); } @@ -216,7 +222,7 @@ public: void appendHelper(const Body& current, Language alang, const Body &length_helper) { assert(alang); - if(!opt.is_not_just_lang) + if(!opt.is_not_just_lang) { if(opt.lang) { if(opt.lang==alang) // same language? ignoring return; @@ -224,6 +230,7 @@ public: opt.lang=alang; // to uninitialized return; } + } append(current, CORD_chars((char)alang, length_helper.length())); } @@ -304,6 +311,20 @@ public: } }; + struct C { + const char *str; + size_t length; + C(): str(0), length(0) {} + C(const char *astr, size_t asize): str(astr), length(asize) {} + }; + + struct Cm { + char *str; + size_t length; + Cm(): str(0), length(0) {} + Cm(char *astr, size_t asize): str(astr), length(asize) {} + }; + class Body { CORD body; @@ -312,15 +333,20 @@ public: // cached hash code is not reseted on write operations as test shows // that string body does not change after it is stored as a hash key mutable uint hash_code; +#define INIT_HASH_CODE(c) ,hash_code(c) +#define ZERO_HASH_CODE hash_code=0; +#else +#define INIT_HASH_CODE(c) +#define ZERO_HASH_CODE #endif #ifdef STRING_LENGTH_CACHING // cached length is reseted on modification, used only for char*, not CORD mutable size_t string_length; -#define INIT_LENGTH ,string_length(0) +#define INIT_LENGTH(l) ,string_length(l) #define ZERO_LENGTH string_length=0; #else -#define INIT_LENGTH +#define INIT_LENGTH(l) #define ZERO_LENGTH #endif @@ -329,14 +355,11 @@ public: const char* v() const; void dump() const; -#ifdef HASH_CODE_CACHING - Body(): body(CORD_EMPTY), hash_code(0) INIT_LENGTH {} - Body(CORD abody, uint ahash_code): body(abody), hash_code(ahash_code) INIT_LENGTH {} - Body(CORD abody): body(abody), hash_code(0) INIT_LENGTH { -#else - Body(): body(CORD_EMPTY) INIT_LENGTH {} - Body(CORD abody): body(abody) INIT_LENGTH { -#endif + Body(): body(CORD_EMPTY) INIT_HASH_CODE(0) INIT_LENGTH(0) {} + Body(const char *abody): body(AS_CORD(abody)) INIT_HASH_CODE(0) INIT_LENGTH(0) {} + Body(CORD abody, uint ahash_code): body(abody) INIT_HASH_CODE(ahash_code) INIT_LENGTH(0) {} + explicit Body(C ac): body(AS_CORD(ac.str)) INIT_HASH_CODE(0) INIT_LENGTH(ac.length) {} + explicit Body(CORD abody): body(abody) INIT_HASH_CODE(0) INIT_LENGTH(0) { #ifdef CORD_CAT_OPTIMIZATION assert(!body // no body || *body // ordinary string @@ -355,20 +378,24 @@ public: #endif } + static Body Format(int value); - void clear() { ZERO_LENGTH body=CORD_EMPTY; } + void clear() { ZERO_LENGTH ZERO_HASH_CODE body=CORD_EMPTY; } bool operator! () const { return is_empty(); } - CORD get_cord() const { return body; } + inline CORD get_cord() const { return body; } uint get_hash_code() const; const char* cstr() const { #ifdef STRING_LENGTH_CACHING string_length = length(); - if(string_length) - return const_cast(this)->body=CORD_to_const_char_star(body, string_length); + if(string_length){ + const char *result=CORD_to_const_char_star(body, string_length); + const_cast(this)->body=(CORD)result; + return result; + } #endif return CORD_to_const_char_star(body, length()); } @@ -377,12 +404,12 @@ public: #ifdef STRING_LENGTH_CACHING void set_length(size_t alength){ string_length = alength; } - size_t length() const { return body ? CORD_IS_STRING(body) ? string_length ? string_length : (string_length=strlen(body)) : CORD_len(body) : 0; } + size_t length() const { return body ? CORD_IS_STRING(body) ? string_length ? string_length : (string_length=strlen((const char *)body)) : CORD_len(body) : 0; } #else size_t length() const { return CORD_len(body); } #endif - bool is_empty() const { return body==CORD_EMPTY; } + inline bool is_empty() const { return body==CORD_EMPTY; } void append_know_length(const char *str, size_t known_length) { if(known_length){ @@ -390,7 +417,7 @@ public: body = CORD_cat_char_star(body, str, known_length); ZERO_LENGTH } else { - body=str; + body=(CORD)str; #ifdef STRING_LENGTH_CACHING string_length=known_length; #endif @@ -414,16 +441,16 @@ public: bool operator != (const Body src) const { return CORD_cmp(body, src.body)!=0; } bool operator == (const Body src) const { return CORD_cmp(body, src.body)==0; } - bool operator != (const char *src) const { return CORD_cmp(body, src)!=0; } - bool operator == (const char *src) const { return CORD_cmp(body, src)==0; } + bool operator != (const char *src) const { return CORD_cmp(body, AS_CORD(src))!=0; } + bool operator == (const char *src) const { return CORD_cmp(body, AS_CORD(src))==0; } int ncmp(size_t x_begin, const Body y, size_t y_begin, size_t size) const { return CORD_ncmp(body, x_begin, y.body, y_begin, size); } char fetch(size_t index) const { return CORD_fetch(body, index); } - Body mid(size_t aindex, size_t alength) const { return CORD_substr(body, aindex, alength, length()); } - size_t pos(const char* substr, size_t offset=0) const { return CORD_str(body, offset, substr, length()); } + Body mid(size_t aindex, size_t alength) const { return Body(CORD_substr(body, aindex, alength, length())); } + size_t pos(const char* substr, size_t offset=0) const { return CORD_str(body, offset, AS_CORD(substr), length()); } size_t pos(const Body substr, size_t offset=0) const { if(substr.is_empty()) return STRING_NOT_FOUND; // in this case CORD_str returns 0 [parser users got used to -1] @@ -459,22 +486,6 @@ public: size_t* out_start=0, size_t* out_length=0, Charset* source_charset=0) const; }; - struct C { - const char *str; - size_t length; - operator const char *() { return str; } - C(): str(0), length(0) {} - C(const char *astr, size_t asize): str(astr), length(asize) {} - }; - - struct Cm { - char *str; - size_t length; - //operator char *() { return str; } - Cm(): str(0), length(0) {} - Cm(char *astr, size_t asize): str(astr), length(asize) {} - }; - private: Body body; ///< all characters of string @@ -490,29 +501,23 @@ public: static const String Empty; explicit String(){}; - explicit String(const char* cstr, Language alang=L_CLEAN){ - if(cstr && *cstr){ - body=cstr; + explicit String(const char* cstr, Language alang=L_CLEAN) : body(cstr){ + if(body.get_cord()){ langs=alang; } } - explicit String(const char* cstr, Language alang, size_t alength){ - if(cstr && *cstr){ - body=cstr; -#ifdef STRING_LENGTH_CACHING - body.set_length(alength); -#endif + explicit String(C ac, Language alang=L_CLEAN) : body(ac){ + if(body.get_cord()){ langs=alang; } } - - String(int value, const char *format); String(Body abody, Language alang): body(abody), langs(alang) { ASSERT_STRING_INVARIANT(*this); } String(const String& src): body(src.body), langs(src.langs) { ASSERT_STRING_INVARIANT(*this); } + String(int value, const char *format); /// for convinient hash lookup #ifdef HASH_CODE_CACHING @@ -530,14 +535,9 @@ public: /// convert to CORD with tainting dirty to lang Body cstr_to_string_body_untaint(Language lang, SQL_Connection* connection=0, const Request_charsets *charsets=0) const; - /// - const char* cstr() const { - return body.cstr(); - } - /// - char* cstrm() const { - return body.cstrm(); - } + /// from body + const char* cstr() const { return body.cstr(); } + char* cstrm() const { return body.cstrm(); } /// convert to constant C string forcing lang tainting const char* taint_cstr(Language lang, SQL_Connection* connection=0, const Request_charsets *charsets=0) const {