--- parser3/src/main/pa_string.C 2016/09/05 21:59:22 1.254 +++ parser3/src/main/pa_string.C 2023/12/13 20:07:11 1.271 @@ -1,8 +1,8 @@ /** @file Parser: string class. @see untalength_t.C. - Copyright (c) 2001-2015 Art. Lebedev Studio (http://www.artlebedev.com) - Author: Alexandr Petrosian (http://paf.design.ru) + Copyright (c) 2001-2023 Art. Lebedev Studio (http://www.artlebedev.com) + Authors: Konstantin Morshnev , Alexandr Petrosian */ #include "pa_string.h" @@ -12,14 +12,13 @@ #include "pa_charset.h" #include "pa_vregex.h" -#ifndef ULLONG_MAX -#define ULLONG_MAX 18446744073709551615ULL -#endif - -volatile const char * IDENT_PA_STRING_C="$Id: pa_string.C,v 1.254 2016/09/05 21:59:22 moko Exp $" IDENT_PA_STRING_H; +volatile const char * IDENT_PA_STRING_C="$Id: pa_string.C,v 1.271 2023/12/13 20:07:11 moko Exp $" IDENT_PA_STRING_H; const String String::Empty; +#define COMPILE_ASSERT(x) extern int assert_checker[(x) ? 1 : -1] +COMPILE_ASSERT(sizeof(String::Languages) == sizeof(CORD)); + // pa_atoui is based on Manuel Novoa III _strto_l for uClibc template inline T pa_ato_any(const char *str, int base, const String* problem_source,const T max){ @@ -87,14 +86,20 @@ template inline T pa_ato_any } unsigned int pa_atoui(const char *str, int base, const String* problem_source){ + if(!str) + return 0; + return pa_ato_any(str, base, problem_source, UINT_MAX); } -unsigned long long pa_atoul(const char *str, int base, const String* problem_source){ - return pa_ato_any(str, base, problem_source, ULLONG_MAX); +uint64_t pa_atoul(const char *str, int base, const String* problem_source){ + if(!str) + return 0; + + return pa_ato_any(str, base, problem_source, ULLONG_MAX); } -int pa_atoi(const char* str, const String* problem_source) { +int pa_atoi(const char* str, int base, const String* problem_source) { if(!str) return 0; @@ -104,26 +109,31 @@ int pa_atoi(const char* str, const Strin if(!*str) return 0; + const char *str_copy=str; bool negative=false; if(str[0]=='-') { negative=true; str++; + if(!*str || isspace(*str)) + throw Exception("number.format", problem_source, problem_source ? "invalid number (int)" : "'%s' is invalid number (int)", str_copy); } else if(str[0]=='+') { str++; + if(!*str || isspace(*str)) + throw Exception("number.format", problem_source, problem_source ? "invalid number (int)" : "'%s' is invalid number (int)", str_copy); } - unsigned int result=pa_atoui(str, 0, problem_source); + unsigned int result=pa_atoui(str, base, problem_source); if(negative && result <= ((unsigned int)(-(1+INT_MIN)))+1) return -(int)result; if(result<=INT_MAX) return (int)result; - - throw Exception("number.format", problem_source, problem_source ? "out of range (int)" : "'%s' is out of range (int)", str); + + throw Exception("number.format", problem_source, problem_source ? "out of range (int)" : "'%s' is out of range (int)", str_copy); } -double pa_atod(const char* str, const String* problem_source) { +double pa_atod(const char* str, const String* problem_source /* never null */) { if(!str) return 0; @@ -137,8 +147,12 @@ double pa_atod(const char* str, const St if(str[0]=='-') { negative=true; str++; + if(!*str || isspace(*str)) + throw Exception("number.format", problem_source, "invalid number (double)"); } else if(str[0]=='+') { str++; + if(!*str || isspace(*str)) + throw Exception("number.format", problem_source, "invalid number (double)"); } double result; @@ -156,9 +170,9 @@ double pa_atod(const char* str, const St char *error_pos; result=strtod(str, &error_pos); - while(char c=*error_pos++) - if(!isspace((unsigned char)c)) - throw Exception("number.format", problem_source, problem_source ? "invalid number (double)" : "'%s' is invalid number (double)", str); + while(const char c=*error_pos++) + if(!isspace(c)) + throw Exception("number.format", problem_source, "invalid number (double)"); return negative ? -result : result; } @@ -480,7 +494,8 @@ struct CORD_length_info { }; int CORD_batched_len(const char* s, CORD_length_info* info){ - info->len += lengthUTF8( (const XMLByte *)s, (const XMLByte *)s+strlen(s)); return 0; + info->len += lengthUTF8( (const XMLByte *)s, (const XMLByte *)s+strlen(s)); + return 0; } // can be called only for IS_FUNCTION(CORD) which are used in large String::Body::mid @@ -527,7 +542,7 @@ String& String::mid(size_t substr_begin, String& String::mid(Charset& charset, size_t from, size_t to, size_t helper_length) const { String& result=*new String; - size_t self_length=(helper_length)?helper_length:length(charset); + size_t self_length=helper_length ? helper_length : length(charset); if(!self_length) return result; @@ -600,46 +615,40 @@ size_t String::pos(Charset& charset, con } } -void String::split(ArrayString& result, size_t& pos_after, const char* delim, Language lang, int limit) const { +void String::split(ArrayString& result, size_t pos_after, const char* delim, Language lang) const { if(is_empty()) return; size_t self_length=length(); if(size_t delim_length=strlen(delim)) { size_t pos_before; // while we have 'delim'... - for(; (pos_before=pos(delim, pos_after, lang))!=STRING_NOT_FOUND && limit; limit--) { + while((pos_before=pos(delim, pos_after, lang))!=STRING_NOT_FOUND) { result+=&mid(pos_after, pos_before); pos_after=pos_before+delim_length; } // last piece - if(pos_afterexec(subject, subject_length, ovector, ovector_size, prestart); @@ -670,8 +680,9 @@ Table* String::match(VRegex* vregex, Row int prefinish=ovector[0]; poststart=ovector[1]; - if (prestart==poststart && subject[poststart]=='\n'){ + if (prestart==poststart && action_was_executed==1){ prestart++; + action_was_executed=0; continue; } @@ -692,12 +703,13 @@ Table* String::match(VRegex* vregex, Row } matches_count++; - row_action(table, row, prestart, prefinish, poststart, postfinish, info); + row_action(table, row, prestart - !action_was_executed, prefinish, poststart, postfinish, info); - if(!global || prestart==poststart) // last step + if(!global || poststart>=subject_length) // last step, avoid prestart++ after last char break; prestart=poststart; + action_was_executed=1; } row_action(table, 0/*last time, no raw*/, 0, 0, poststart, postfinish, info); @@ -949,19 +961,15 @@ bool String::deserialize(size_t prolog_s return true; } -const char* String::Body::v() const { - return CORD_to_const_char_star(body, length()); -} - void String::Body::dump() const { CORD_dump(body); } -const char* String::Languages::v() const { +const char* String::Languages::visualize() const { if(opt.is_not_just_lang) return CORD_to_const_char_star(langs, 0); else - return (const char*)&langs; + return 0; } void String::Languages::dump() const { @@ -971,26 +979,22 @@ void String::Languages::dump() const { puts((const char*)&langs); } -const char* String::v() const { - const uint LIMIT_VIEW=20; - char* buf=(char*)pa_malloc(MAX_STRING); - const char*body_view=body.v(); - const char*langs_view=langs.v(); - snprintf(buf, MAX_STRING, - "%d:%.*s%s} " - "{%d:%s", - langs.count(), LIMIT_VIEW, langs_view, strlen(langs_view)>LIMIT_VIEW?"...":"", - strlen(body_view), body_view - ); - - return buf; -} - void String::dump() const { body.dump(); langs.dump(); } +static char *n_chars(char c, size_t length){ + char *result=(char *)pa_malloc_atomic(length+1); + memset(result, c, length); + result[length] = '\0'; + return result; +} + +char* String::visualize_langs() const { + return is_not_just_lang() ? pa_strdup(langs.visualize()) : n_chars((char)just_lang(), length()); +} + const String& String::trim(String::Trim_kind kind, const char* chars, Charset* source_charset) const { if(is_empty()) return *this;