--- parser3/src/main/pa_string.C 2003/09/26 06:53:27 1.178 +++ parser3/src/main/pa_string.C 2003/10/21 05:11:00 1.185 @@ -5,7 +5,7 @@ Author: Alexandr Petrosian (http://paf.design.ru) */ -static const char* IDENT_STRING_C="$Date: 2003/09/26 06:53:27 $"; +static const char* IDENT_STRING_C="$Date: 2003/10/21 05:11:00 $"; #include "pcre.h" @@ -15,6 +15,8 @@ static const char* IDENT_STRING_C="$Date #include "pa_dictionary.h" #include "pa_charset.h" +const String String::Empty; + // cord lib extension #ifndef DOXYGEN @@ -167,11 +169,18 @@ String& String::mid(size_t substr_begin, } size_t String::pos(const String::Body substr, size_t this_offset, Language lang) const { - size_t substr_begin=body.pos(substr, this_offset); - if(substr_begin==CORD_NOT_FOUND || !langs.check_lang(lang, substr_begin, substr.length())) - return STRING_NOT_FOUND; + size_t substr_length=substr.length(); + while(true) { + size_t substr_begin=body.pos(substr, this_offset); + + if(substr_begin==CORD_NOT_FOUND) + return STRING_NOT_FOUND; + + if(langs.check_lang(lang, substr_begin, substr_length)) + return substr_begin; - return substr_begin; + this_offset=substr_begin+substr_length; + } } size_t String::pos(const String& substr, @@ -334,9 +343,9 @@ Table* String::match(Charset& source_cha *row+=&mid(prefinish, poststart); // .match *row+=&mid(poststart, postfinish); // .postmatch } else { - *row+=0; // .prematch column value - *row+=0; // .match - *row+=0; // .postmatch + *row+=&Empty; // .prematch column value + *row+=&Empty; // .match + *row+=&Empty; // .postmatch } for(int i=1; i(kind)); // never - a=b=0; // calm, compiler - break; // never - } - char* new_cstr=cstrm(); char *dest=new_cstr; - unsigned char index; - for(const char* current=new_cstr; index=(unsigned char)*current; current++) { - unsigned char c=a[index]; - if(b) - c=b[c]; + if(source_charset.isUTF8()) { + switch(kind) { + case CC_UPPER: + change_case_UTF8((const XMLByte*)new_cstr, (XMLByte*)new_cstr, UTF8CaseToUpper); + break; + case CC_LOWER: + change_case_UTF8((const XMLByte*)new_cstr, (XMLByte*)new_cstr, UTF8CaseToLower); + break; + default: + assert(!"unknown change case kind"); + break; // never + } + + } else { + const unsigned char *tables=source_charset.pcre_tables; + + const unsigned char *a; + const unsigned char *b; + switch(kind) { + case CC_UPPER: + a=tables+lcc_offset; + b=tables+fcc_offset; + break; + case CC_LOWER: + a=tables+lcc_offset; + b=0; + break; + default: + assert(!"unknown change case kind"); + a=b=0; // calm, compiler + break; // never + } + + unsigned char index; + for(const char* current=new_cstr; index=(unsigned char)*current; current++) { + unsigned char c=a[index]; + if(b) + c=b[c]; - *dest++=(char)c; + *dest++=(char)c; + } } result.langs=langs; result.body=new_cstr; @@ -410,30 +431,26 @@ const String& String::replace(const Dict const char* current=old_cstr; while(*current) { - if(Table::element_type row=dict.first_that_begins(current)) { + if(Dictionary::Subst subst=dict.first_that_begins(current)) { // prematch if(size_t prematch_length=current-prematch_begin) { - result.langs.append(result.body, langs, prematch_begin-old_cstr, current-old_cstr); + result.langs.append(result.body, langs, prematch_begin-old_cstr, prematch_length); result.body.append_strdup_know_length(prematch_begin, prematch_length); } // match - - const String* a=row->get(0); // skip 'a' in 'current'; move prematch_begin - current+=a->length(); prematch_begin=current; + current+=subst.from_length; prematch_begin=current; - if(row->count()>1) { // are there any b? - const String* b=row->get(1); + if(const String* b=subst.to) // are there any b? result<<*b; - } } else // simply advance current++; } // postmatch if(size_t postmatch_length=current-prematch_begin) { - result.langs.append(result.body, langs, prematch_begin-old_cstr, current-old_cstr); + result.langs.append(result.body, langs, prematch_begin-old_cstr, postmatch_length); result.body.append_strdup_know_length(prematch_begin, postmatch_length); } @@ -496,6 +513,10 @@ int String::as_int() const { return result; } +static int serialize_body_char(char c, char** cur) { + *((*cur)++)=c; + return 0; // 0=continue +}; static int serialize_body_piece(const char* s, char** cur) { size_t length=strlen(s); memcpy(*cur, s, length); *cur+=length; @@ -526,8 +547,9 @@ String::Cm String::serialize(size_t prol // 3: lang info langs.for_each(body, serialize_lang_piece, &cur); // 4: letters - body.for_each(serialize_body_piece, &cur); - // 5: zero terminator already there put by new(PointerFreeGC) + body.for_each(serialize_body_char, serialize_body_piece, &cur); + // 5: zero terminator + *cur=0; return result; }