--- parser3/src/include/pa_string.h 2015/10/09 11:42:38 1.213 +++ parser3/src/include/pa_string.h 2017/02/07 22:00:36 1.223 @@ -1,14 +1,14 @@ /** @file Parser: string class decl. - Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com) + Copyright (c) 2001-2017 Art. Lebedev Studio (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ #ifndef PA_STRING_H #define PA_STRING_H -#define IDENT_PA_STRING_H "$Id: pa_string.h,v 1.213 2015/10/09 11:42:38 moko Exp $" +#define IDENT_PA_STRING_H "$Id: pa_string.h,v 1.223 2017/02/07 22:00:36 moko Exp $" // includes #include "pa_types.h" @@ -73,6 +73,7 @@ unsigned long long int pa_atoul(const ch class String: public PA_Object { + friend class StringSplitHelper; public: /** piece is tainted or not. the lang to use when detaint @@ -95,11 +96,6 @@ public: L_CLEAN='0', ///< clean WARNING: read above warning before changing L_AS_IS='A', ///< leave all characters intact WARNING: read above warning before changing - L_PASS_APPENDED='P', - /**< - leave lang built into string being appended. - just a flag, that value not stored - */ L_TAINTED='T', ///< tainted, untaint lang as assigned later // untaint langs. assigned by ^untaint[lang]{...} L_FILE_SPEC='F', ///< file specification @@ -175,7 +171,7 @@ public: public: - const char* v() const; + const char* visualize() const; void dump() const; Languages(): langs(0) {} @@ -316,6 +312,7 @@ public: size_t length; C(): str(0), length(0) {} C(const char *astr, size_t asize): str(astr), length(asize) {} + explicit C(Body abody): str(abody.cstr()), length(abody.length()) {} }; struct Cm { @@ -323,6 +320,7 @@ public: size_t length; Cm(): str(0), length(0) {} Cm(char *astr, size_t asize): str(astr), length(asize) {} + explicit Cm(Body abody): str(abody.cstrm()), length(abody.length()) {} }; class Body { @@ -334,8 +332,10 @@ public: // that string body does not change after it is stored as a hash key mutable uint hash_code; #define INIT_HASH_CODE(c) ,hash_code(c) +#define ZERO_HASH_CODE hash_code=0; #else #define INIT_HASH_CODE(c) +#define ZERO_HASH_CODE #endif #ifdef STRING_LENGTH_CACHING @@ -350,7 +350,6 @@ public: public: - const char* v() const; void dump() const; Body(): body(CORD_EMPTY) INIT_HASH_CODE(0) INIT_LENGTH(0) {} @@ -379,13 +378,14 @@ public: static Body Format(int value); - void clear() { ZERO_LENGTH body=CORD_EMPTY; } + void clear() { ZERO_LENGTH ZERO_HASH_CODE body=CORD_EMPTY; } bool operator! () const { return is_empty(); } inline CORD get_cord() const { return body; } uint get_hash_code() const; + // never null const char* cstr() const { #ifdef STRING_LENGTH_CACHING string_length = length(); @@ -398,6 +398,7 @@ public: return CORD_to_const_char_star(body, length()); } + // never null char* cstrm() const { return CORD_to_char_star(body, length()); } #ifdef STRING_LENGTH_CACHING @@ -489,7 +490,6 @@ private: Body body; ///< all characters of string Languages langs; ///< string characters lang - const char* v() const; void dump() const; #define ASSERT_STRING_INVARIANT(string) \ assert((string).langs.invariant((string).body.length())) @@ -563,6 +563,8 @@ public: return langs.opt.lang; } + char* visualize_langs() const; + /// puts pieces to buf Cm serialize(size_t prolog_size) const; /// appends pieces from buf to self @@ -585,6 +587,7 @@ public: return Body(x).ncmp(0/*x_begin*/, body, 0/*y_begin*/, length())==0; } + String& append_to(String& dest) const; String& append_to(String& dest, Language lang, bool forced=false) const; String& append(const String& src, Language lang, bool forced=false) { return src.append_to(*this, lang, forced); @@ -597,7 +600,7 @@ public: return *this; } - String& operator << (const String& src) { return append(src, L_PASS_APPENDED); } + String& operator << (const String& src) { return src.append_to(*this); } String& operator << (const char* src) { return append_help_length(src, 0, L_AS_IS); } String& operator << (const Body& src){ langs.appendHelper(body, L_AS_IS, src); @@ -629,18 +632,13 @@ public: but when specified: look for substring that lies in ONE fragment in THAT lang @return position of substr in string, -1 means "not found" [const char* version] */ - size_t pos(const Body substr, - size_t this_offset=0, Language lang=L_UNSPECIFIED) const; + size_t pos(const Body substr, size_t this_offset=0, Language lang=L_UNSPECIFIED) const; /// String version of @see pos(const char*, int, Language) - size_t pos(const String& substr, - size_t this_offset=0, Language lang=L_UNSPECIFIED) const; - size_t pos(char c, - size_t this_offset=0) const { + size_t pos(const String& substr, size_t this_offset=0, Language lang=L_UNSPECIFIED) const; + size_t pos(char c, size_t this_offset=0) const { return body.pos(c, this_offset); } - size_t pos(Charset& charset, - const String& substr, - size_t this_offset=0, Language lang=L_UNSPECIFIED) const; + size_t pos(Charset& charset, const String& substr, size_t this_offset=0, Language lang=L_UNSPECIFIED) const; size_t strrpbrk(const char* chars, size_t left=0) const { return (length()) ? body.strrpbrk(chars, left, length()-1) : STRING_NOT_FOUND; @@ -656,19 +654,11 @@ public: return body.rskipchars(chars, left, right); } - void split(ArrayString& result, - size_t& pos_after, - const char* delim, - Language lang=L_UNSPECIFIED, int limit=-1) const; - void split(ArrayString& result, - size_t& pos_after, - const String& delim, - Language lang=L_UNSPECIFIED, int limit=-1) const; - - typedef void (*Row_action)(Table& table, ArrayString* row, - int prestart, int prefinish, - int poststart, int postfinish, - void *info); + void split(ArrayString& result, size_t pos_after, const char* delim, Language lang=L_UNSPECIFIED) const; + void split(ArrayString& result, size_t pos_after, const String& delim, Language lang=L_UNSPECIFIED) const; + + typedef void (*Row_action)(Table& table, ArrayString* row, int prestart, int prefinish, int poststart, int postfinish, void *info); + /** @return table of found items, if any. table format is defined and fixed[can be used by others]: @@ -676,15 +666,14 @@ public: prematch/match/postmatch/1/2/3/... @endverbatim */ - Table* match(VRegex* vregex, - Row_action row_action, void *info, - int& matches_count) const; + Table* match(VRegex* vregex, Row_action row_action, void *info, int& matches_count) const; + enum Change_case_kind { CC_UPPER, CC_LOWER }; - String& change_case(Charset& source_charset, - Change_case_kind kind) const; + String& change_case(Charset& source_charset, Change_case_kind kind) const; + const String& replace(const Dictionary& dict) const; const String& trim(Trim_kind kind=TRIM_BOTH, const char* chars=0, Charset* source_charset=0) const; double as_double() const { return pa_atod(cstr(), this); }