--- parser3/src/include/pa_string.h 2004/01/30 09:56:49 1.155 +++ parser3/src/include/pa_string.h 2005/11/22 15:09:10 1.166 @@ -1,17 +1,16 @@ /** @file Parser: string class decl. - Copyright (c) 2001-2003 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2005 ArtLebedev Group (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ #ifndef PA_STRING_H #define PA_STRING_H -static const char * const IDENT_STRING_H="$Date: 2004/01/30 09:56:49 $"; +static const char * const IDENT_STRING_H="$Date: 2005/11/22 15:09:10 $"; // includes - #include "pa_types.h" #include "pa_array.h" @@ -20,6 +19,8 @@ extern "C" { // cord's author forgot to #include "cord.h" }; +// defines + // cord extension /* Returns true if x does contain */ /* char not_c at positions i..i+n. Value i,i+n must be < CORD_len(x). */ @@ -90,20 +91,33 @@ public: L_HTTP_HEADER='h', ///< text in HTTP response header L_MAIL_HEADER='m', ///< text in mail header L_URI='U', ///< text in uri - L_TABLE='L', ///< ^table:set body L_SQL='Q', ///< ^table:sql body L_JS='J', ///< JavaScript code L_XML='X', ///< ^dom:set xml - L_HTML='H', ///< HTML code (for editing) + L_HTML='H', ///< HTML code + L_REGEX='R', ///< RegEx expression // READ WARNING ABOVE BEFORE ADDING ANYTHING L_OPTIMIZE_BIT = 0x80 ///< flag, requiring cstr whitespace optimization }; + enum Trim_kind { + TRIM_BOTH, + TRIM_START, + TRIM_END + }; + union Languages { struct { +#ifdef PA_LITTLE_ENDIAN Language lang:8; int is_not_just_lang:sizeof(CORD)*8-8; +#elif defined(PA_BIG_ENDIAN) + int is_not_just_lang:sizeof(CORD)*8-8; + Language lang:8; +#else +# error word endianness not determined for some obscure reason +#endif } opt; CORD langs; @@ -120,7 +134,7 @@ public: :CORD_chars((char)opt.lang, alength); } - /// appending when 'langs' already contain something [simple cases hanled elsewhere] + /// appending when 'langs' already contain something [simple cases handled elsewhere] template void append(C current, const CORD to_nonempty_target_langs) { @@ -140,6 +154,7 @@ public: public: const char* v() const; + void dump() const; Languages(): langs(0) {} Languages(Language alang) { @@ -228,7 +243,7 @@ public: callback(opt.lang, get_length(current), info); } - bool invariant(size_t current_length) { + bool invariant(size_t current_length) const { if(!langs) return current_length==0; if(opt.is_not_just_lang) @@ -244,6 +259,7 @@ public: public: const char* v() const; + void dump() const; Body(): body(CORD_EMPTY) {} Body(CORD abody): body(abody) { @@ -315,11 +331,17 @@ public: return CORD_chr(body, offset, c); } - template void for_each( + template int for_each( + int (*f)(char c, I), + I info) const { + return CORD_iter(body, (CORD_iter_fn)f, (void*)info); + } + + template int for_each( int (*f1)(char c, I), int (*f2)(const char* s, I), I info) const { - CORD_iter5(body, 0, (CORD_iter_fn)f1, (CORD_batched_iter_fn)f2, info); + return CORD_iter5(body, 0, (CORD_iter_fn)f1, (CORD_batched_iter_fn)f2, info); } void set_pos(CORD_pos& pos, size_t index) const { CORD_set_pos(pos, body, index); } @@ -327,12 +349,17 @@ public: /*Body normalize() const { return Body(CORD_balance(body)); }*/ + + /// @returns this or 0 or mid. if returns this or 0 out_* are not filled + Body trim(Trim_kind kind=TRIM_BOTH, const char* chars=0, + size_t* out_start=0, size_t* out_length=0) const; }; struct C { const char *str; size_t length; operator const char *() { return str; } + C(): str(0), length(0) {} C(const char *astr, size_t asize): str(astr), length(asize) {} }; @@ -340,6 +367,7 @@ public: char *str; size_t length; //operator char *() { return str; } + Cm(): str(0), length(0) {} Cm(char *astr, size_t asize): str(astr), length(asize) {} }; @@ -349,9 +377,9 @@ private: Languages langs; ///< string characters lang const char* v() const; - -#define ASSERT_STRING_INVARIANT(string) \ - assert((string).langs.invariant((string).body.length())) + void dump() const; + #define ASSERT_STRING_INVARIANT(string) \ + assert((string).langs.invariant((string).body.length())) public: @@ -481,6 +509,7 @@ public: String& change_case(Charset& source_charset, Change_case_kind kind) const; const String& replace(const Dictionary& dict) const; + const String& trim(Trim_kind kind=TRIM_BOTH, const char* chars=0) const; double as_double() const { return pa_atod(cstr(), this); } int as_int() const { return pa_atoi(cstr(), this); }