--- parser3/src/include/pa_string.h 2015/10/08 18:29:15 1.211 +++ parser3/src/include/pa_string.h 2016/09/07 14:40:07 1.217 @@ -1,14 +1,14 @@ /** @file Parser: string class decl. - Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com) + Copyright (c) 2001-2015 Art. Lebedev Studio (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ #ifndef PA_STRING_H #define PA_STRING_H -#define IDENT_PA_STRING_H "$Id: pa_string.h,v 1.211 2015/10/08 18:29:15 moko Exp $" +#define IDENT_PA_STRING_H "$Id: pa_string.h,v 1.217 2016/09/07 14:40:07 moko Exp $" // includes #include "pa_types.h" @@ -311,6 +311,20 @@ public: } }; + struct C { + const char *str; + size_t length; + C(): str(0), length(0) {} + C(const char *astr, size_t asize): str(astr), length(asize) {} + }; + + struct Cm { + char *str; + size_t length; + Cm(): str(0), length(0) {} + Cm(char *astr, size_t asize): str(astr), length(asize) {} + }; + class Body { CORD body; @@ -319,15 +333,20 @@ public: // cached hash code is not reseted on write operations as test shows // that string body does not change after it is stored as a hash key mutable uint hash_code; +#define INIT_HASH_CODE(c) ,hash_code(c) +#define ZERO_HASH_CODE hash_code=0; +#else +#define INIT_HASH_CODE(c) +#define ZERO_HASH_CODE #endif #ifdef STRING_LENGTH_CACHING // cached length is reseted on modification, used only for char*, not CORD mutable size_t string_length; -#define INIT_LENGTH ,string_length(0) +#define INIT_LENGTH(l) ,string_length(l) #define ZERO_LENGTH string_length=0; #else -#define INIT_LENGTH +#define INIT_LENGTH(l) #define ZERO_LENGTH #endif @@ -336,16 +355,11 @@ public: const char* v() const; void dump() const; -#ifdef HASH_CODE_CACHING - Body(): body(CORD_EMPTY), hash_code(0) INIT_LENGTH {} - Body(CORD abody, uint ahash_code): body(abody), hash_code(ahash_code) INIT_LENGTH {} - Body(const char *abody): body(AS_CORD(abody)), hash_code(0) INIT_LENGTH {} - explicit Body(CORD abody): body(abody), hash_code(0) INIT_LENGTH { -#else - Body(): body(CORD_EMPTY) INIT_LENGTH {} - Body(const char *abody): body(AS_CORD(abody)) INIT_LENGTH {} - explicit Body(CORD abody): body(abody) INIT_LENGTH { -#endif + Body(): body(CORD_EMPTY) INIT_HASH_CODE(0) INIT_LENGTH(0) {} + Body(const char *abody): body(AS_CORD(abody)) INIT_HASH_CODE(0) INIT_LENGTH(0) {} + Body(CORD abody, uint ahash_code): body(abody) INIT_HASH_CODE(ahash_code) INIT_LENGTH(0) {} + explicit Body(C ac): body(AS_CORD(ac.str)) INIT_HASH_CODE(0) INIT_LENGTH(ac.length) {} + explicit Body(CORD abody): body(abody) INIT_HASH_CODE(0) INIT_LENGTH(0) { #ifdef CORD_CAT_OPTIMIZATION assert(!body // no body || *body // ordinary string @@ -364,9 +378,10 @@ public: #endif } + static Body Format(int value); - void clear() { ZERO_LENGTH body=CORD_EMPTY; } + void clear() { ZERO_LENGTH ZERO_HASH_CODE body=CORD_EMPTY; } bool operator! () const { return is_empty(); } @@ -394,7 +409,7 @@ public: size_t length() const { return CORD_len(body); } #endif - bool is_empty() const { return body==CORD_EMPTY; } + inline bool is_empty() const { return body==CORD_EMPTY; } void append_know_length(const char *str, size_t known_length) { if(known_length){ @@ -471,23 +486,7 @@ public: size_t* out_start=0, size_t* out_length=0, Charset* source_charset=0) const; }; - struct C { - const char *str; - size_t length; - //operator const char *() { return str; } - C(): str(0), length(0) {} - C(const char *astr, size_t asize): str(astr), length(asize) {} - }; - - struct Cm { - char *str; - size_t length; - //operator char *() { return str; } - Cm(): str(0), length(0) {} - Cm(char *astr, size_t asize): str(astr), length(asize) {} - }; - -private: +protected: Body body; ///< all characters of string Languages langs; ///< string characters lang @@ -507,31 +506,18 @@ public: langs=alang; } } - explicit String(const char* cstr, Language alang, size_t alength) : body(cstr){ + explicit String(C ac, Language alang=L_CLEAN) : body(ac){ if(body.get_cord()){ -#ifdef STRING_LENGTH_CACHING - body.set_length(alength); -#endif langs=alang; } } - - explicit String(C ac, Language alang=L_CLEAN) : body(ac.str){ - if(body.get_cord()){ -#ifdef STRING_LENGTH_CACHING - body.set_length(ac.length); -#endif - langs=alang; - } - } - - String(int value, const char *format); String(Body abody, Language alang): body(abody), langs(alang) { ASSERT_STRING_INVARIANT(*this); } String(const String& src): body(src.body), langs(src.langs) { ASSERT_STRING_INVARIANT(*this); } + String(int value, const char *format); /// for convinient hash lookup #ifdef HASH_CODE_CACHING @@ -549,14 +535,9 @@ public: /// convert to CORD with tainting dirty to lang Body cstr_to_string_body_untaint(Language lang, SQL_Connection* connection=0, const Request_charsets *charsets=0) const; - /// - const char* cstr() const { - return body.cstr(); - } - /// - char* cstrm() const { - return body.cstrm(); - } + /// from body + const char* cstr() const { return body.cstr(); } + char* cstrm() const { return body.cstrm(); } /// convert to constant C string forcing lang tainting const char* taint_cstr(Language lang, SQL_Connection* connection=0, const Request_charsets *charsets=0) const { @@ -650,18 +631,13 @@ public: but when specified: look for substring that lies in ONE fragment in THAT lang @return position of substr in string, -1 means "not found" [const char* version] */ - size_t pos(const Body substr, - size_t this_offset=0, Language lang=L_UNSPECIFIED) const; + size_t pos(const Body substr, size_t this_offset=0, Language lang=L_UNSPECIFIED) const; /// String version of @see pos(const char*, int, Language) - size_t pos(const String& substr, - size_t this_offset=0, Language lang=L_UNSPECIFIED) const; - size_t pos(char c, - size_t this_offset=0) const { + size_t pos(const String& substr, size_t this_offset=0, Language lang=L_UNSPECIFIED) const; + size_t pos(char c, size_t this_offset=0) const { return body.pos(c, this_offset); } - size_t pos(Charset& charset, - const String& substr, - size_t this_offset=0, Language lang=L_UNSPECIFIED) const; + size_t pos(Charset& charset, const String& substr, size_t this_offset=0, Language lang=L_UNSPECIFIED) const; size_t strrpbrk(const char* chars, size_t left=0) const { return (length()) ? body.strrpbrk(chars, left, length()-1) : STRING_NOT_FOUND; @@ -677,19 +653,11 @@ public: return body.rskipchars(chars, left, right); } - void split(ArrayString& result, - size_t& pos_after, - const char* delim, - Language lang=L_UNSPECIFIED, int limit=-1) const; - void split(ArrayString& result, - size_t& pos_after, - const String& delim, - Language lang=L_UNSPECIFIED, int limit=-1) const; - - typedef void (*Row_action)(Table& table, ArrayString* row, - int prestart, int prefinish, - int poststart, int postfinish, - void *info); + void split(ArrayString& result, size_t& pos_after, const char* delim, Language lang=L_UNSPECIFIED, int limit=-1) const; + void split(ArrayString& result, size_t& pos_after, const String& delim, Language lang=L_UNSPECIFIED, int limit=-1) const; + + typedef void (*Row_action)(Table& table, ArrayString* row, int prestart, int prefinish, int poststart, int postfinish, void *info); + /** @return table of found items, if any. table format is defined and fixed[can be used by others]: @@ -697,15 +665,14 @@ public: prematch/match/postmatch/1/2/3/... @endverbatim */ - Table* match(VRegex* vregex, - Row_action row_action, void *info, - int& matches_count) const; + Table* match(VRegex* vregex, Row_action row_action, void *info, int& matches_count) const; + enum Change_case_kind { CC_UPPER, CC_LOWER }; - String& change_case(Charset& source_charset, - Change_case_kind kind) const; + String& change_case(Charset& source_charset, Change_case_kind kind) const; + const String& replace(const Dictionary& dict) const; const String& trim(Trim_kind kind=TRIM_BOTH, const char* chars=0, Charset* source_charset=0) const; double as_double() const { return pa_atod(cstr(), this); }