--- parser3/src/main/pa_string.C 2011/05/05 23:45:45 1.237 +++ parser3/src/main/pa_string.C 2012/05/24 12:49:49 1.241 @@ -1,12 +1,10 @@ /** @file Parser: string class. @see untalength_t.C. - Copyright (c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ -static const char * const IDENT_STRING_C="$Date: 2011/05/05 23:45:45 $"; - #include "pa_string.h" #include "pa_exception.h" #include "pa_table.h" @@ -14,6 +12,8 @@ static const char * const IDENT_STRING_C #include "pa_charset.h" #include "pa_vregex.h" +volatile const char * IDENT_PA_STRING_C="$Id: pa_string.C,v 1.241 2012/05/24 12:49:49 misha Exp $" IDENT_PA_STRING_H; + const String String::Empty; int pa_atoi(const char* str, const String* problem_source) { @@ -310,6 +310,57 @@ uint String::Body::get_hash_code() const return hash_code; } +struct CORD_pos_info { + const char* chars; + size_t left; + size_t pos; +}; + +// can be called only for IS_FUNCTION(CORD) which is used in String::Body::strrpbrk +static int CORD_iter_fn_rpos(char c, CORD_pos_info* info) { + if(info->pos < info->left){ + info->pos=STRING_NOT_FOUND; + return 1; + } + if(strchr(info->chars, c)) + return 1; + --(info->pos); + return 0; +} + +size_t String::Body::strrpbrk(const char* chars, size_t left, size_t right) const { + if(is_empty() || !chars || !strlen(chars)) + return STRING_NOT_FOUND; + CORD_pos_info info={chars, left, right}; + if(CORD_riter4(body, right, (CORD_iter_fn)CORD_iter_fn_rpos, &info)) + return info.pos; + else + return STRING_NOT_FOUND; +} + + +// can be called only for IS_FUNCTION(CORD) which is used in String::Body::rskipchars +static int CORD_iter_fn_rskip(char c, CORD_pos_info* info) { + if(info->pos < info->left) { + info->pos=STRING_NOT_FOUND; + return 1; + } + if(!strchr(info->chars, c)) + return 1; + --(info->pos); + return 0; +} + +size_t String::Body::rskipchars(const char* chars, size_t left, size_t right) const { + if(is_empty() || !chars || !strlen(chars)) + return STRING_NOT_FOUND; + CORD_pos_info info={chars, left, right}; + if(CORD_riter4(body, right, (CORD_iter_fn)CORD_iter_fn_rskip, &info)) + return info.pos; + else + return STRING_NOT_FOUND; +} + // String methods String& String::append_know_length(const char* str, size_t known_length, Language lang) { @@ -655,34 +706,63 @@ const String& String::escape(Charset& so return Charset::escape(*this, source_charset); } +#define STRING_APPEND(result, from_cstr, langs, langs_offset, length) \ + result.langs.append(result.body, langs, langs_offset, length); \ + result.body.append_strdup_know_length(from_cstr, length); + const String& String::replace(const Dictionary& dict) const { + if(!dict.count() || is_empty()) + return *this; + String& result=*new String(); const char* old_cstr=cstr(); const char* prematch_begin=old_cstr; - const char* current=old_cstr; - while(*current) { - if(Dictionary::Subst subst=dict.first_that_begins(current)) { + if(dict.count()==1) { + // optimized simple case + + Dictionary::Subst subst=dict.get(0); + while(const char* p=strstr(prematch_begin, subst.from)) { // prematch - if(size_t prematch_length=current-prematch_begin) { - result.langs.append(result.body, langs, prematch_begin-old_cstr, prematch_length); - result.body.append_strdup_know_length(prematch_begin, prematch_length); + if(size_t prematch_length=p-prematch_begin) { + STRING_APPEND(result, prematch_begin, langs, prematch_begin-old_cstr, prematch_length) } // match - // skip 'a' in 'current'; move prematch_begin - current+=subst.from_length; prematch_begin=current; + prematch_begin=p+subst.from_length; if(const String* b=subst.to) // are there any b? result<<*b; - } else // simply advance - current++; + } + + } else { + + const char* current=old_cstr; + while(*current) { + if(Dictionary::Subst subst=dict.first_that_begins(current)) { + // prematch + if(size_t prematch_length=current-prematch_begin) { + STRING_APPEND(result, prematch_begin, langs, prematch_begin-old_cstr, prematch_length) + } + + // match + // skip 'a' in 'current'; move prematch_begin + current+=subst.from_length; prematch_begin=current; + + if(const String* b=subst.to) // are there any b? + result<<*b; + } else // simply advance + current++; + } + } + if(prematch_begin==old_cstr) // not modified + return *this; + // postmatch - if(size_t postmatch_length=current-prematch_begin) { - result.langs.append(result.body, langs, prematch_begin-old_cstr, postmatch_length); - result.body.append_strdup_know_length(prematch_begin, postmatch_length); + if(size_t postmatch_length=old_cstr+length()-prematch_begin) { + STRING_APPEND(result, prematch_begin, langs, prematch_begin-old_cstr, postmatch_length) } ASSERT_STRING_INVARIANT(result);