--- parser3/src/main/pa_string.C 2012/06/20 21:01:20 1.243 +++ parser3/src/main/pa_string.C 2020/10/10 06:08:37 1.263 @@ -1,7 +1,7 @@ /** @file Parser: string class. @see untalength_t.C. - Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com) + Copyright (c) 2001-2017 Art. Lebedev Studio (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ @@ -12,15 +12,17 @@ #include "pa_charset.h" #include "pa_vregex.h" -volatile const char * IDENT_PA_STRING_C="$Id: pa_string.C,v 1.243 2012/06/20 21:01:20 moko Exp $" IDENT_PA_STRING_H; +volatile const char * IDENT_PA_STRING_C="$Id: pa_string.C,v 1.263 2020/10/10 06:08:37 moko Exp $" IDENT_PA_STRING_H; const String String::Empty; +#define COMPILE_ASSERT(x) extern int assert_checker[(x) ? 1 : -1] +COMPILE_ASSERT(sizeof(String::Languages) == sizeof(CORD)); // pa_atoui is based on Manuel Novoa III _strto_l for uClibc -unsigned int pa_atoui(const char *str, int base, const String* problem_source){ - unsigned int result = 0; +template inline T pa_ato_any(const char *str, int base, const String* problem_source,const T max){ + T result = 0; const char *pos = str; while (isspace(*pos)) /* skip leading whitespace */ @@ -37,9 +39,10 @@ unsigned int pa_atoui(const char *str, i base = 10; /* default is 10 */ if (*pos == '0') { ++pos; - if (*pos == 'x' || *pos == 'X') + if (*pos == 'x' || *pos == 'X'){ ++pos; base=16; + } } } @@ -47,8 +50,8 @@ unsigned int pa_atoui(const char *str, i throw Exception(PARSER_RUNTIME, 0, "base to must be an integer from 2 to 16"); } - unsigned int cutoff = UINT_MAX / base; - int cutoff_digit = UINT_MAX - cutoff * base; + T cutoff = max / base; + int cutoff_digit = (int)(max - cutoff * base); while(true) { int digit; @@ -82,6 +85,20 @@ unsigned int pa_atoui(const char *str, i return result; } +unsigned int pa_atoui(const char *str, int base, const String* problem_source){ + if(!str) + return 0; + + return pa_ato_any(str, base, problem_source, UINT_MAX); +} + +unsigned long long pa_atoul(const char *str, int base, const String* problem_source){ + if(!str) + return 0; + + return pa_ato_any(str, base, problem_source, ULLONG_MAX); +} + int pa_atoi(const char* str, const String* problem_source) { if(!str) return 0; @@ -130,15 +147,16 @@ double pa_atod(const char* str, const St } double result; - if(str[0]=='0') - if(str[1]=='x' || str[1]=='X'){ + if(str[0]=='0') { + if(str[1]=='x' || str[1]=='X') { // 0xABC - result=(double)pa_atoui(str, 0, problem_source); + result=(double)pa_atoul(str, 0, problem_source); return negative ? -result : result; } else { // skip leading 0000, to disable octal interpretation do str++; while(*str=='0'); } + } char *error_pos; result=strtod(str, &error_pos); @@ -154,39 +172,42 @@ double pa_atod(const char* str, const St #ifndef DOXYGEN typedef struct { - ssize_t countdown; - int target; /* Character we're looking for */ + ssize_t countdown; + int target; /* Character we're looking for */ } chr_data; #endif + static int CORD_range_contains_chr_greater_then_proc(char c, size_t size, void* client_data) { - register chr_data * d = (chr_data *)client_data; - - if (d -> countdown<=0) return(2); - d -> countdown -= size; - if (c > d -> target) return(1); - return(0); + register chr_data * d = (chr_data *)client_data; + + if (d -> countdown<=0) return(2); + d -> countdown -= size; + if (c > d -> target) return(1); + return(0); } + int CORD_range_contains_chr_greater_then(CORD x, size_t i, size_t n, int c) { - chr_data d; + chr_data d; - d.countdown = n; - d.target = c; - return(CORD_block_iter(x, i, CORD_range_contains_chr_greater_then_proc, &d) == 1/*alternatives: 0 normally ended, 2=struck 'n'*/); + d.countdown = n; + d.target = c; + return(CORD_block_iter(x, i, CORD_range_contains_chr_greater_then_proc, &d) == 1/*alternatives: 0 normally ended, 2=struck 'n'*/); } static int CORD_block_count_proc(char /*c*/, size_t /*size*/, void* client_data) { - int* result=(int*)client_data; - (*result)++; - return(0); // 0=continue + int* result=(int*)client_data; + (*result)++; + return(0); // 0=continue } + size_t CORD_block_count(CORD x) { size_t result=0; CORD_block_iter(x, 0, CORD_block_count_proc, &result); - return result; + return result; } // helpers @@ -217,8 +238,7 @@ String::Body String::Body::Format(int va return String::Body(pa_strdup(local, length)); } -String::Body String::Body::trim(String::Trim_kind kind, const char* chars, - size_t* out_start, size_t* out_length, Charset* source_charset) const { +String::Body String::Body::trim(String::Trim_kind kind, const char* chars, size_t* out_start, size_t* out_length, Charset* source_charset) const { size_t our_length=length(); if(!our_length) return *this; @@ -340,11 +360,13 @@ static int CORD_batched_iter_fn_generic_ generic_hash_code(result, c); return 0; } + static int CORD_batched_iter_fn_generic_hash_code(const char* s, void * client_data) { uint& result=*static_cast(client_data); generic_hash_code(result, s); return 0; -}; +} + uint String::Body::get_hash_code() const { #ifdef HASH_CODE_CACHING if(hash_code) @@ -353,7 +375,7 @@ uint String::Body::get_hash_code() const uint hash_code=0; #endif if (body && CORD_IS_STRING(body)){ - generic_hash_code(hash_code, body); + generic_hash_code(hash_code, (const char *)body); } else { CORD_iter5(body, 0, CORD_batched_iter_fn_generic_hash_code, @@ -427,6 +449,7 @@ String& String::append_know_length(const ASSERT_STRING_INVARIANT(*this); return *this; } + String& String::append_help_length(const char* str, size_t helper_length, Language lang) { if(!str) return *this; @@ -436,10 +459,12 @@ String& String::append_help_length(const return append_know_length(str, known_length, lang); } -String::String(int value, char *format) : langs(L_CLEAN){ + +String::String(int value, const char *format) : langs(L_CLEAN){ char buf[MAX_NUMBER]; body.append_strdup_know_length(buf, snprintf(buf, MAX_NUMBER, format, value)); } + String& String::append_strdup(const char* str, size_t helper_length, Language lang) { size_t known_length=helper_length?helper_length:strlen(str); if(!known_length) @@ -460,7 +485,8 @@ struct CORD_length_info { }; int CORD_batched_len(const char* s, CORD_length_info* info){ - info->len += lengthUTF8( (const XMLByte *)s, (const XMLByte *)s+strlen(s)); return 0; + info->len += lengthUTF8( (const XMLByte *)s, (const XMLByte *)s+strlen(s)); + return 0; } // can be called only for IS_FUNCTION(CORD) which are used in large String::Body::mid @@ -507,7 +533,7 @@ String& String::mid(size_t substr_begin, String& String::mid(Charset& charset, size_t from, size_t to, size_t helper_length) const { String& result=*new String; - size_t self_length=(helper_length)?helper_length:length(charset); + size_t self_length=helper_length ? helper_length : length(charset); if(!self_length) return result; @@ -555,13 +581,11 @@ size_t String::pos(const String::Body su } } -size_t String::pos(const String& substr, - size_t this_offset, Language lang) const { +size_t String::pos(const String& substr, size_t this_offset, Language lang) const { return pos(substr.body, this_offset, lang); } -size_t String::pos(Charset& charset, const String& substr, - size_t this_offset, Language lang) const { +size_t String::pos(Charset& charset, const String& substr, size_t this_offset, Language lang) const { if(charset.isUTF8()){ const XMLByte* srcPtr=(const XMLByte*)cstr(); @@ -582,58 +606,44 @@ size_t String::pos(Charset& charset, con } } -void String::split(ArrayString& result, - size_t& pos_after, - const char* delim, - Language lang, int limit) const { +void String::split(ArrayString& result, size_t pos_after, const char* delim, Language lang) const { if(is_empty()) return; size_t self_length=length(); if(size_t delim_length=strlen(delim)) { size_t pos_before; // while we have 'delim'... - for(; (pos_before=pos(delim, pos_after, lang))!=STRING_NOT_FOUND && limit; limit--) { + while((pos_before=pos(delim, pos_after, lang))!=STRING_NOT_FOUND) { result+=&mid(pos_after, pos_before); pos_after=pos_before+delim_length; } // last piece - if(pos_afterinfo(); // I have no idea what does it for? @@ -824,12 +834,14 @@ const String& String::replace(const Dict static int serialize_body_char(char c, char** cur) { *((*cur)++)=c; return 0; // 0=continue -}; +} + static int serialize_body_piece(const char* s, char** cur) { size_t length=strlen(s); memcpy(*cur, s, length); *cur+=length; return 0; // 0=continue -}; +} + static int serialize_lang_piece(char alang, size_t asize, char** cur) { // lang **cur=alang; (*cur)++; @@ -838,6 +850,7 @@ static int serialize_lang_piece(char ala return 0; // 0=continue } + String::Cm String::serialize(size_t prolog_length) const { size_t fragments_count=langs.count(); size_t body_length=body.length(); @@ -866,6 +879,7 @@ String::Cm String::serialize(size_t prol return result; } + bool String::deserialize(size_t prolog_size, void *buf, size_t buf_size) { size_t in_buf=buf_size; if(in_buf<=prolog_size) @@ -889,10 +903,7 @@ bool String::deserialize(size_t prolog_s if(cur[body_length] != 0) // in place? return false; // 3: letters - body=String::Body(*cur?cur:0); -#ifdef STRING_LENGTH_CACHING - body.set_length(body_length); -#endif + body=String::Body(String::C(cur, body_length)); cur+=body_length+1; in_buf-=body_length+1; @@ -938,45 +949,40 @@ bool String::deserialize(size_t prolog_s return true; } -const char* String::Body::v() const { - return CORD_to_const_char_star(body, length()); -} void String::Body::dump() const { CORD_dump(body); } -const char* String::Languages::v() const { +const char* String::Languages::visualize() const { if(opt.is_not_just_lang) return CORD_to_const_char_star(langs, 0); else - return (const char*)&langs; + return 0; } + void String::Languages::dump() const { if(opt.is_not_just_lang) CORD_dump(langs); else puts((const char*)&langs); } -const char* String::v() const { - const uint LIMIT_VIEW=20; - char* buf=(char*)malloc(MAX_STRING); - const char*body_view=body.v(); - const char*langs_view=langs.v(); - snprintf(buf, MAX_STRING, - "%d:%.*s%s} " - "{%d:%s", - langs.count(), LIMIT_VIEW, langs_view, strlen(langs_view)>LIMIT_VIEW?"...":"", - strlen(body_view), body_view - ); - - return buf; -} void String::dump() const { body.dump(); langs.dump(); } +static char *n_chars(char c, size_t length){ + char *result=(char *)pa_malloc_atomic(length+1); + memset(result, c, length); + result[length] = '\0'; + return result; +} + +char* String::visualize_langs() const { + return is_not_just_lang() ? pa_strdup(langs.visualize()) : n_chars((char)just_lang(), length()); +} + const String& String::trim(String::Trim_kind kind, const char* chars, Charset* source_charset) const { if(is_empty()) return *this;