--- parser3/src/main/pa_string.C 2012/03/12 22:29:41 1.239 +++ parser3/src/main/pa_string.C 2012/06/20 21:01:20 1.243 @@ -1,12 +1,10 @@ /** @file Parser: string class. @see untalength_t.C. - Copyright (c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ -static const char * const IDENT_STRING_C="$Date: 2012/03/12 22:29:41 $"; - #include "pa_string.h" #include "pa_exception.h" #include "pa_table.h" @@ -14,19 +12,86 @@ static const char * const IDENT_STRING_C #include "pa_charset.h" #include "pa_vregex.h" +volatile const char * IDENT_PA_STRING_C="$Id: pa_string.C,v 1.243 2012/06/20 21:01:20 moko Exp $" IDENT_PA_STRING_H; + const String String::Empty; + +// pa_atoui is based on Manuel Novoa III _strto_l for uClibc + +unsigned int pa_atoui(const char *str, int base, const String* problem_source){ + unsigned int result = 0; + const char *pos = str; + + while (isspace(*pos)) /* skip leading whitespace */ + ++pos; + + if (base == 16 && *pos == '0') { /* handle option prefix */ + ++pos; + if (*pos == 'x' || *pos == 'X') { + ++pos; + } + } + + if (base == 0) { /* dynamic base */ + base = 10; /* default is 10 */ + if (*pos == '0') { + ++pos; + if (*pos == 'x' || *pos == 'X') + ++pos; + base=16; + } + } + + if (base < 2 || base > 16) { /* illegal base */ + throw Exception(PARSER_RUNTIME, 0, "base to must be an integer from 2 to 16"); + } + + unsigned int cutoff = UINT_MAX / base; + int cutoff_digit = UINT_MAX - cutoff * base; + + while(true) { + int digit; + + if ((*pos >= '0') && (*pos <= '9')) { + digit = (*pos - '0'); + } else if (*pos >= 'a') { + digit = (*pos - 'a' + 10); + } else if (*pos >= 'A') { + digit = (*pos - 'A' + 10); + } else break; + + if (digit >= base) { + break; + } + + ++pos; + + /* adjust number, with overflow check */ + if ((result > cutoff) || ((result == cutoff) && (digit > cutoff_digit))) { + throw Exception("number.format", problem_source, problem_source ? "out of range (int)" : "'%s' is out of range (int)", str); + } else { + result = result * base + digit; + } + } + + while(char c=*pos++) + if(!isspace(c)) + throw Exception("number.format", problem_source, problem_source ? "invalid number (int)" : "'%s' is invalid number (int)", str); + + return result; +} + int pa_atoi(const char* str, const String* problem_source) { if(!str) return 0; - while(*str && isspace((unsigned char)*str)) + while(isspace(*str)) str++; + if(!*str) return 0; - int result; - char *error_pos; bool negative=false; if(str[0]=='-') { negative=true; @@ -34,40 +99,28 @@ int pa_atoi(const char* str, const Strin } else if(str[0]=='+') { str++; } - // 0xABC - if(str[0]=='0') - if(str[1]=='x' || str[1]=='X') - result=(int)(unsigned long)strtol(str, &error_pos, 0); - else { - // skip leading 0000, to disable octal interpretation - do str++; while(*str=='0'); - result=(int)strtol(str, &error_pos, 0); - } - else - result=(int)strtol(str, &error_pos, 0); - if(negative) - result=-result; - while(char c=*error_pos++) - if(!isspace((unsigned char)c)) - throw Exception("number.format", - problem_source, - problem_source?"invalid number (int)": "'%s' is invalid number (int)", str); + unsigned int result=pa_atoui(str, 0, problem_source); - return result; + if(negative && result <= ((unsigned int)(-(1+INT_MIN)))+1) + return -(int)result; + + if(result<=INT_MAX) + return (int)result; + + throw Exception("number.format", problem_source, problem_source ? "out of range (int)" : "'%s' is out of range (int)", str); } double pa_atod(const char* str, const String* problem_source) { if(!str) return 0; - while(*str && isspace((unsigned char)*str)) + while(isspace(*str)) str++; + if(!*str) return 0; - double result; - char *error_pos; bool negative=false; if(str[0]=='-') { negative=true; @@ -75,27 +128,26 @@ double pa_atod(const char* str, const St } else if(str[0]=='+') { str++; } - // 0xABC + + double result; if(str[0]=='0') - if(str[1]=='x' || str[1]=='X') - result=(double)(unsigned long)strtol(str, &error_pos, 0); - else { + if(str[1]=='x' || str[1]=='X'){ + // 0xABC + result=(double)pa_atoui(str, 0, problem_source); + return negative ? -result : result; + } else { // skip leading 0000, to disable octal interpretation - do str++; while(*str=='0'); - result=(double)strtod(str, &error_pos); + do str++; while(*str=='0'); } - else - result=(double)strtod(str, &error_pos); - if(negative) - result=-result; + + char *error_pos; + result=strtod(str, &error_pos); while(char c=*error_pos++) if(!isspace((unsigned char)c)) - throw Exception("number.format", - problem_source, - problem_source?"invalid number (double)": "'%s' is invalid number (double)", str); + throw Exception("number.format", problem_source, problem_source ? "invalid number (double)" : "'%s' is invalid number (double)", str); - return result; + return negative ? -result : result; } // cord lib extension @@ -310,6 +362,57 @@ uint String::Body::get_hash_code() const return hash_code; } +struct CORD_pos_info { + const char* chars; + size_t left; + size_t pos; +}; + +// can be called only for IS_FUNCTION(CORD) which is used in String::Body::strrpbrk +static int CORD_iter_fn_rpos(char c, CORD_pos_info* info) { + if(info->pos < info->left){ + info->pos=STRING_NOT_FOUND; + return 1; + } + if(strchr(info->chars, c)) + return 1; + --(info->pos); + return 0; +} + +size_t String::Body::strrpbrk(const char* chars, size_t left, size_t right) const { + if(is_empty() || !chars || !strlen(chars)) + return STRING_NOT_FOUND; + CORD_pos_info info={chars, left, right}; + if(CORD_riter4(body, right, (CORD_iter_fn)CORD_iter_fn_rpos, &info)) + return info.pos; + else + return STRING_NOT_FOUND; +} + + +// can be called only for IS_FUNCTION(CORD) which is used in String::Body::rskipchars +static int CORD_iter_fn_rskip(char c, CORD_pos_info* info) { + if(info->pos < info->left) { + info->pos=STRING_NOT_FOUND; + return 1; + } + if(!strchr(info->chars, c)) + return 1; + --(info->pos); + return 0; +} + +size_t String::Body::rskipchars(const char* chars, size_t left, size_t right) const { + if(is_empty() || !chars || !strlen(chars)) + return STRING_NOT_FOUND; + CORD_pos_info info={chars, left, right}; + if(CORD_riter4(body, right, (CORD_iter_fn)CORD_iter_fn_rskip, &info)) + return info.pos; + else + return STRING_NOT_FOUND; +} + // String methods String& String::append_know_length(const char* str, size_t known_length, Language lang) {