--- parser3/src/include/pa_string.h 2009/04/22 04:37:52 1.178 +++ parser3/src/include/pa_string.h 2009/05/15 06:57:43 1.185 @@ -1,14 +1,14 @@ /** @file Parser: string class decl. - Copyright (c) 2001-2005 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ #ifndef PA_STRING_H #define PA_STRING_H -static const char * const IDENT_STRING_H="$Date: 2009/04/22 04:37:52 $"; +static const char * const IDENT_STRING_H="$Date: 2009/05/15 06:57:43 $"; // includes #include "pa_types.h" @@ -21,6 +21,9 @@ extern "C" { // cord's author forgot to // defines +// cache hash code in String::Body for faster hash access +#define HASH_CODE_CACHING + // cord extension /* Returns true if x does contain */ /* char not_c at positions i..i+n. Value i,i+n must be < CORD_len(x). */ @@ -117,9 +120,9 @@ public: struct { #ifdef PA_LITTLE_ENDIAN Language lang:8; - int is_not_just_lang:sizeof(CORD)*8-8; + size_t is_not_just_lang:sizeof(CORD)*8-8; #elif defined(PA_BIG_ENDIAN) - int is_not_just_lang:sizeof(CORD)*8-8; + size_t is_not_just_lang:sizeof(CORD)*8-8; Language lang:8; #else # error word endianness not determined for some obscure reason @@ -176,7 +179,7 @@ public: if(!opt.is_not_just_lang) if(opt.lang) { - if(opt.lang==alang) // same length? ignoring + if(opt.lang==alang) // same language? ignoring return; } else { opt.lang=alang; // to uninitialized @@ -192,7 +195,7 @@ public: if(!opt.is_not_just_lang) if(opt.lang) { - if(opt.lang==alang) // same length? ignoring + if(opt.lang==alang) // same language? ignoring return; } else { opt.lang=alang; // to uninitialized @@ -276,13 +279,25 @@ public: CORD body; +#ifdef HASH_CODE_CACHING + // cached hash code is not reseted on write operations as test shows + // that string body does not change after it is stored as a hash key + mutable uint hash_code; +#endif + public: const char* v() const; void dump() const; +#ifdef HASH_CODE_CACHING + Body(): body(CORD_EMPTY), hash_code(0) {} + Body(CORD abody, uint ahash_code): body(abody), hash_code(ahash_code) {} + Body(CORD abody): body(abody), hash_code(0) { +#else Body(): body(CORD_EMPTY) {} Body(CORD abody): body(abody) { +#endif assert(!body // no body || *body // ordinary string || body[1]==1 // CONCAT_HDR @@ -290,17 +305,15 @@ public: || body[1]==6 // SUBSTR_HDR ); } - /// WARNING: length is only HELPER length, str in ANY case should be zero-terminated - Body(const char* str, size_t helper_length): body(CORD_EMPTY) { - append_know_length(str, helper_length?helper_length:strlen(str)); - } + static Body Format(int value); void clear() { body=CORD_EMPTY; } bool operator! () const { return is_empty(); } - uint hash_code() const; + CORD get_cord() const { return body; } + uint get_hash_code() const; const char* cstr() const { return CORD_to_const_char_star(body); } char* cstrm() const { return CORD_to_char_star(body); } @@ -337,7 +350,7 @@ public: Body mid(size_t index, size_t length) const { return CORD_substr(body, index, length); } size_t pos(const char* substr, size_t offset=0) const { return CORD_str(body, offset, substr); } size_t pos(const Body substr, size_t offset=0) const { - if(!substr.length()) + if(substr.is_empty()) return STRING_NOT_FOUND; // in this case CORD_str returns 0 [parser users got used to -1] // CORD_str checks for bad offset [CORD_chr does not] @@ -405,8 +418,20 @@ public: static const String Empty; - explicit String(const char* cstr=0, size_t helper_length=0, bool tainted=false); - explicit String(const C cstr, bool tainted=false); + explicit String(){}; + explicit String(const char* cstr, Language alang=L_CLEAN){ + if(cstr && *cstr){ + body=cstr; + langs=alang; + } + } + explicit String(const String::C cstr, Language alang=L_CLEAN){ + if(cstr.length){ + body=cstr.str; + langs=alang; + } + } + String(int value, char *format); String(Body abody, Language alang): body(abody), langs(alang) { ASSERT_STRING_INVARIANT(*this); } @@ -415,7 +440,11 @@ public: } /// for convinient hash lookup +#ifdef HASH_CODE_CACHING + operator const Body&() const { return body; } +#else operator const Body() const { return body; } +#endif bool is_empty() const { return body.is_empty(); } size_t length() const { return body.length(); } @@ -549,10 +578,12 @@ inline size_t get_length(S return body.length(); } +#ifndef HASH_CODE_CACHING /// simple hash code of string. used by Hash inline uint hash_code(const String::Body self) { - return self.hash_code(); + return self.get_hash_code(); } +#endif /// now that we've declared specialization we can use it