--- parser3/src/include/pa_string.h 2012/03/16 09:24:10 1.201 +++ parser3/src/include/pa_string.h 2015/10/06 22:20:50 1.210 @@ -8,7 +8,7 @@ #ifndef PA_STRING_H #define PA_STRING_H -#define IDENT_PA_STRING_H "$Id: pa_string.h,v 1.201 2012/03/16 09:24:10 moko Exp $" +#define IDENT_PA_STRING_H "$Id: pa_string.h,v 1.210 2015/10/06 22:20:50 moko Exp $" // includes #include "pa_types.h" @@ -22,7 +22,7 @@ extern "C" { // cord's author forgot to #define CORD_cat(x, y) CORD_cat_optimized(x, y) #define CORD_cat_char_star(x, y, leny) CORD_cat_char_star_optimized(x, y, leny) #endif -}; +} // defines @@ -54,6 +54,8 @@ class VRegex; int pa_atoi(const char* str, const String* problem_source=0); double pa_atod(const char* str, const String* problem_source=0); +unsigned int pa_atoui(const char *str, int base, const String* problem_source=0); +unsigned long long int pa_atoul(const char *str, int base, const String* problem_source=0); /// this is result of pos functions which mean that substr were not found #define STRING_NOT_FOUND ((size_t)-1) @@ -184,7 +186,7 @@ public: assert(alang); assert(length); - if(!opt.is_not_just_lang) + if(!opt.is_not_just_lang) { if(opt.lang) { if(opt.lang==alang) // same language? ignoring return; @@ -192,6 +194,7 @@ public: opt.lang=alang; // to uninitialized return; } + } append(current, CORD_chars((char)alang, length)); } @@ -200,7 +203,7 @@ public: assert(alang); assert(length); - if(!opt.is_not_just_lang) + if(!opt.is_not_just_lang) { if(opt.lang) { if(opt.lang==alang) // same language? ignoring return; @@ -208,6 +211,7 @@ public: opt.lang=alang; // to uninitialized return; } + } append(current, CORD_chars((char)alang, length)); } @@ -215,7 +219,7 @@ public: void appendHelper(const Body& current, Language alang, const Body &length_helper) { assert(alang); - if(!opt.is_not_just_lang) + if(!opt.is_not_just_lang) { if(opt.lang) { if(opt.lang==alang) // same language? ignoring return; @@ -223,6 +227,7 @@ public: opt.lang=alang; // to uninitialized return; } + } append(current, CORD_chars((char)alang, length_helper.length())); } @@ -405,14 +410,17 @@ public: Body& operator << (const char* str) { append_know_length(str, strlen(str)); return *this; } - // could not figure out why this operator is needed [should do this chain: string->simple->==] bool operator < (const Body src) const { return CORD_cmp(body, src.body)<0; } bool operator > (const Body src) const { return CORD_cmp(body, src.body)>0; } bool operator <= (const Body src) const { return CORD_cmp(body, src.body)<=0; } bool operator >= (const Body src) const { return CORD_cmp(body, src.body)>=0; } + bool operator != (const Body src) const { return CORD_cmp(body, src.body)!=0; } bool operator == (const Body src) const { return CORD_cmp(body, src.body)==0; } + bool operator != (const char *src) const { return CORD_cmp(body, src)!=0; } + bool operator == (const char *src) const { return CORD_cmp(body, src)==0; } + int ncmp(size_t x_begin, const Body y, size_t y_begin, size_t size) const { return CORD_ncmp(body, x_begin, y.body, y_begin, size); } @@ -427,14 +435,17 @@ public: // CORD_str checks for bad offset [CORD_chr does not] return CORD_str(body, offset, substr.body, length()); } - size_t pos(char c, - size_t offset=0) const { + size_t pos(char c, size_t offset=0) const { if(offset>=length()) // CORD_chr does not check that [and ABORT's in that case] return STRING_NOT_FOUND; return CORD_chr(body, offset, c); } + size_t strrpbrk(const char* chars, size_t left, size_t right) const; + + size_t rskipchars(const char* chars, size_t left, size_t right) const; + template int for_each(int (*f)(char c, I), I info) const { return CORD_iter(body, (CORD_iter_fn)f, (void*)info); @@ -447,10 +458,6 @@ public: void set_pos(CORD_pos& pos, size_t index) const { CORD_set_pos(pos, body, index); } - /*Body normalize() const { - return Body(CORD_balance(body)); - }*/ - /// @returns this or 0 or mid. if returns this or 0 out_* are not filled Body trim(Trim_kind kind=TRIM_BOTH, const char* chars=0, size_t* out_start=0, size_t* out_length=0, Charset* source_charset=0) const; @@ -503,7 +510,7 @@ public: } } - String(int value, char *format); + String(int value, const char *format); String(Body abody, Language alang): body(abody), langs(alang) { ASSERT_STRING_INVARIANT(*this); } @@ -554,6 +561,14 @@ public: const char* untaint_and_transcode_cstr(Language lang, const Request_charsets *charsets) const; + bool is_not_just_lang() const { + return langs.opt.is_not_just_lang !=0; + } + + Language just_lang() const { + return langs.opt.lang; + } + /// puts pieces to buf Cm serialize(size_t prolog_size) const; /// appends pieces from buf to self @@ -596,11 +611,14 @@ public: return *this; } - /// extracts first char of a string, if any char first_char() const { return is_empty()?0:body.fetch(0); } + char last_char() const { + return is_empty()?0:body.fetch(body.length()-1); + } + bool operator < (const String& src) const { return body (const String& src) const { return body>src.body; } bool operator <= (const String& src) const { return body<=src.body; } @@ -630,6 +648,20 @@ public: const String& substr, size_t this_offset=0, Language lang=L_UNSPECIFIED) const; + size_t strrpbrk(const char* chars, size_t left=0) const { + return (length()) ? body.strrpbrk(chars, left, length()-1) : STRING_NOT_FOUND; + } + size_t strrpbrk(const char* chars, size_t left, size_t right) const { + return body.strrpbrk(chars, left, right); + } + + size_t rskipchars(const char* chars, size_t left=0) const { + return (length()) ? body.rskipchars(chars, left, length()-1) : STRING_NOT_FOUND; + } + size_t rskipchars(const char* chars, size_t left, size_t right) const { + return body.rskipchars(chars, left, right); + } + void split(ArrayString& result, size_t& pos_after, const char* delim,