--- parser3/src/main/pa_string.C 2003/09/29 09:42:12 1.181 +++ parser3/src/main/pa_string.C 2004/02/11 15:33:16 1.194 @@ -1,11 +1,11 @@ /** @file Parser: string class. @see untalength_t.C. - Copyright (c) 2001-2003 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2004 ArtLebedev Group (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ -static const char* IDENT_STRING_C="$Date: 2003/09/29 09:42:12 $"; +static const char * const IDENT_STRING_C="$Date: 2004/02/11 15:33:16 $"; #include "pcre.h" @@ -15,6 +15,66 @@ static const char* IDENT_STRING_C="$Date #include "pa_dictionary.h" #include "pa_charset.h" +const String String::Empty; + +int pa_atoi(const char* str, const String* problem_source) { + if(!str) + return 0; + + while(*str && isspace(*str)) + str++; + if(!*str) + return 0; + + int result; + char *error_pos; + // 0xABC + if(str[0]=='0') + if(str[1]=='x' || str[1]=='X') + result=(int)(unsigned long)strtol(str, &error_pos, 0); + else + result=(int)strtol(str+1/*skip leading 0*/, &error_pos, 0); + else + result=(int)strtol(str, &error_pos, 0); + + while(char c=*error_pos++) + if(!isspace(c)) + throw Exception("number.format", + problem_source, + problem_source?"invalid number (int)": "'%s' is invalid number (int)", str); + + return result; +} + +double pa_atod(const char* str, const String* problem_source) { + if(!str) + return 0; + + while(*str && isspace(*str)) + str++; + if(!*str) + return 0; + + double result; + char *error_pos; + // 0xABC + if(str[0]=='0') + if(str[1]=='x' || str[1]=='X') + result=(double)(unsigned long)strtol(str, &error_pos, 0); + else + result=(double)strtod(str+1/*skip leading 0*/, &error_pos); + else + result=(double)strtod(str, &error_pos); + + while(char c=*error_pos++) + if(!isspace(c)) + throw Exception("number.format", + problem_source, + problem_source?"invalid number (double)": "'%s' is invalid number (double)", str); + + return result; +} + // cord lib extension #ifndef DOXYGEN @@ -41,7 +101,7 @@ int CORD_range_contains_chr_greater_then return(CORD_block_iter(x, i, CORD_range_contains_chr_greater_then_proc, &d) == 1/*alternatives: 0 normally ended, 2=struck 'n'*/); } -static int CORD_block_count_proc(char c, size_t size, void* client_data) +static int CORD_block_count_proc(char /*c*/, size_t /*size*/, void* client_data) { int* result=(int*)client_data; (*result)++; @@ -167,11 +227,18 @@ String& String::mid(size_t substr_begin, } size_t String::pos(const String::Body substr, size_t this_offset, Language lang) const { - size_t substr_begin=body.pos(substr, this_offset); - if(substr_begin==CORD_NOT_FOUND || !langs.check_lang(lang, substr_begin, substr.length())) - return STRING_NOT_FOUND; + size_t substr_length=substr.length(); + while(true) { + size_t substr_begin=body.pos(substr, this_offset); + + if(substr_begin==CORD_NOT_FOUND) + return STRING_NOT_FOUND; + + if(langs.check_lang(lang, substr_begin, substr_length)) + return substr_begin; - return substr_begin; + this_offset=substr_begin+substr_length; + } } size_t String::pos(const String& substr, @@ -185,7 +252,7 @@ void String::split(ArrayString& result, Language lang, int limit) const { size_t self_length=length(); if(size_t delim_length=strlen(delim)) { - int pos_before; + size_t pos_before; // while we have 'delim'... for(; (pos_before=pos(delim, pos_after, lang))!=STRING_NOT_FOUND && limit; limit--) { result+=&mid(pos_after, pos_before); @@ -207,7 +274,7 @@ void String::split(ArrayString& result, const String& delim, Language lang, int limit) const { if(!delim.is_empty()) { - int pos_before; + size_t pos_before; // while we have 'delim'... for(; (pos_before=pos(delim, pos_after, lang))!=STRING_NOT_FOUND && limit; limit--) { result+=&mid(pos_after, pos_before); @@ -232,13 +299,13 @@ static void regex_options(const String* int *result; bool *flag; } regex_option[]={ - {"i", "I", 0, PCRE_CASELESS, result}, // a=A - {"s", "S", 0, PCRE_DOTALL, result}, // \n\n$ [default] - {"x", "U", 0, PCRE_EXTENDED, result}, // whitespace in regex ignored - {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$ - {"g", "G", 0, true, result+1}, // many rows + {"i", "I", 0, PCRE_CASELESS, result, 0}, // a=A + {"s", "S", 0, PCRE_DOTALL, result, 0}, // \n\n$ [default] + {"x", "U", 0, PCRE_EXTENDED, result, 0}, // whitespace in regex ignored + {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result, 0}, // ^aaa\n$^bbb\n$ + {"g", "G", 0, 1, result+1, 0}, // many rows {"'", 0, 0, 0, 0, &need_pre_post_match}, - {0} + {0, 0, 0, 0, 0, 0} }; result[0]=PCRE_EXTRA | PCRE_DOTALL | PCRE_DOLLAR_ENDONLY; result[1]=0; @@ -334,9 +401,9 @@ Table* String::match(Charset& source_cha *row+=&mid(prefinish, poststart); // .match *row+=&mid(poststart, postfinish); // .postmatch } else { - *row+=0; // .prematch column value - *row+=0; // .match - *row+=0; // .postmatch + *row+=&Empty; // .prematch column value + *row+=&Empty; // .match + *row+=&Empty; // .postmatch } for(int i=1; iget(0); // skip 'a' in 'current'; move prematch_begin - current+=a->length(); prematch_begin=current; + current+=subst.from_length; prematch_begin=current; - if(row->count()>1) { // are there any b? - const String* b=row->get(1); + if(const String* b=subst.to) // are there any b? result<<*b; - } } else // simply advance current++; } @@ -453,61 +517,6 @@ const String& String::replace(const Dict return result; } -double String::as_double() const { - double result; - const char *str=cstr(); - - while(*str && isspace(*str)) - str++; - if(!*str) - return 0; - - char *error_pos; - // 0xABC - if(str[0]=='0') - if(str[1]=='x' || str[1]=='X') - result=(double)(unsigned long)strtol(str, &error_pos, 0); - else - result=(double)strtod(str+1/*skip leading 0*/, &error_pos); - else - result=(double)strtod(str, &error_pos); - - while(char c=*error_pos++) - if(!isspace(c)) - throw Exception("number.format", - this, - "invalid number (double)"); - - return result; -} -int String::as_int() const { - int result; - const char *str=cstr(); - - while(*str && isspace(*str)) - str++; - if(!*str) - return 0; - - char *error_pos; - // 0xABC - if(str[0]=='0') - if(str[1]=='x' || str[1]=='X') - result=(int)(unsigned long)strtol(str, &error_pos, 0); - else - result=(int)strtol(str+1/*skip leading 0*/, &error_pos, 0); - else - result=(int)strtol(str, &error_pos, 0); - - while(char c=*error_pos++) - if(!isspace(c)) - throw Exception("number.format", - this, - "invalid number (int)"); - - return result; -} - static int serialize_body_char(char c, char** cur) { *((*cur)++)=c; return 0; // 0=continue @@ -519,8 +528,8 @@ static int serialize_body_piece(const ch }; static int serialize_lang_piece(char alang, size_t asize, char** cur) { // lang - memcpy(*cur, &alang, sizeof(alang)); *cur+=sizeof(alang); - // length + **cur=alang; (*cur)++; + // length [WARNING: not cast, addresses must be %4=0 on sparc] memcpy(*cur, &asize, sizeof(asize)); *cur+=sizeof(asize); return 0; // 0=continue @@ -537,13 +546,14 @@ String::Cm String::serialize(size_t prol // 1: prolog char *cur=result.str+prolog_length; - // 2: langs.count + // 2: langs.count [WARNING: not cast, addresses must be %4=0 on sparc] memcpy(cur, &fragments_count, sizeof(fragments_count)); cur+=sizeof(fragments_count); // 3: lang info langs.for_each(body, serialize_lang_piece, &cur); // 4: letters body.for_each(serialize_body_char, serialize_body_piece, &cur); - // 5: zero terminator already there put by new(PointerFreeGC) + // 5: zero terminator + *cur=0; return result; } @@ -557,22 +567,30 @@ bool String::deserialize(size_t prolog_l const char* cur=(const char* )buf+prolog_length; // 2: langs.count - if(buf_length(cur); cur+=sizeof(size_t); - buf_length-=sizeof(size_t); + // [WARNING: not cast, addresses must be %4=0 on sparc] + memcpy(&fragments_count, cur, sizeof(fragments_count)); cur+=sizeof(fragments_count); + buf_length-=sizeof(fragments_count); if(fragments_count) { // 3: lang info size_t total_length=0; for(size_t f=0; f(cur); cur+=sizeof(char); - size_t fragment_length=*reinterpret_cast(cur); cur+=sizeof(size_t); - langs.append(total_length, lang, fragment_length); + // lang + lang=*cur++; + // length [WARNING: not cast, addresses must be %4=0 on sparc] + memcpy(&fragment_length, cur, sizeof(fragment_length)); cur+=sizeof(fragment_length); + + // uchar needed to prevent propagating 0x80 bit to upper bytes + langs.append(total_length, (String::Language)(uchar)lang, fragment_length); total_length+=fragment_length; buf_length-=piece_length;