--- parser3/src/main/pa_string.C 2003/09/26 06:53:27 1.178 +++ parser3/src/main/pa_string.C 2003/09/30 05:20:16 1.183 @@ -5,7 +5,7 @@ Author: Alexandr Petrosian (http://paf.design.ru) */ -static const char* IDENT_STRING_C="$Date: 2003/09/26 06:53:27 $"; +static const char* IDENT_STRING_C="$Date: 2003/09/30 05:20:16 $"; #include "pcre.h" @@ -167,11 +167,18 @@ String& String::mid(size_t substr_begin, } size_t String::pos(const String::Body substr, size_t this_offset, Language lang) const { - size_t substr_begin=body.pos(substr, this_offset); - if(substr_begin==CORD_NOT_FOUND || !langs.check_lang(lang, substr_begin, substr.length())) - return STRING_NOT_FOUND; + size_t substr_length=substr.length(); + while(true) { + size_t substr_begin=body.pos(substr, this_offset); + + if(substr_begin==CORD_NOT_FOUND) + return STRING_NOT_FOUND; + + if(langs.check_lang(lang, substr_begin, substr_length)) + return substr_begin; - return substr_begin; + this_offset=substr_begin+substr_length; + } } size_t String::pos(const String& substr, @@ -365,37 +372,49 @@ String& String::change_case(Charset& sou if(is_empty()) return result; - const unsigned char *tables=source_charset.pcre_tables; - - const unsigned char *a; - const unsigned char *b; - switch(kind) { - case CC_UPPER: - a=tables+lcc_offset; - b=tables+fcc_offset; - break; - case CC_LOWER: - a=tables+lcc_offset; - b=0; - break; - default: - throw Exception(0, - this, - "unknown change case kind #%d", - static_cast(kind)); // never - a=b=0; // calm, compiler - break; // never - } - char* new_cstr=cstrm(); char *dest=new_cstr; - unsigned char index; - for(const char* current=new_cstr; index=(unsigned char)*current; current++) { - unsigned char c=a[index]; - if(b) - c=b[c]; + if(source_charset.isUTF8()) { + switch(kind) { + case CC_UPPER: + change_case_UTF8((const XMLByte*)new_cstr, (XMLByte*)new_cstr, UTF8CaseToUpper); + break; + case CC_LOWER: + change_case_UTF8((const XMLByte*)new_cstr, (XMLByte*)new_cstr, UTF8CaseToLower); + break; + default: + assert(!"unknown change case kind"); + break; // never + } + + } else { + const unsigned char *tables=source_charset.pcre_tables; + + const unsigned char *a; + const unsigned char *b; + switch(kind) { + case CC_UPPER: + a=tables+lcc_offset; + b=tables+fcc_offset; + break; + case CC_LOWER: + a=tables+lcc_offset; + b=0; + break; + default: + assert(!"unknown change case kind"); + a=b=0; // calm, compiler + break; // never + } + + unsigned char index; + for(const char* current=new_cstr; index=(unsigned char)*current; current++) { + unsigned char c=a[index]; + if(b) + c=b[c]; - *dest++=(char)c; + *dest++=(char)c; + } } result.langs=langs; result.body=new_cstr; @@ -413,7 +432,7 @@ const String& String::replace(const Dict if(Table::element_type row=dict.first_that_begins(current)) { // prematch if(size_t prematch_length=current-prematch_begin) { - result.langs.append(result.body, langs, prematch_begin-old_cstr, current-old_cstr); + result.langs.append(result.body, langs, prematch_begin-old_cstr, prematch_length); result.body.append_strdup_know_length(prematch_begin, prematch_length); } @@ -433,7 +452,7 @@ const String& String::replace(const Dict // postmatch if(size_t postmatch_length=current-prematch_begin) { - result.langs.append(result.body, langs, prematch_begin-old_cstr, current-old_cstr); + result.langs.append(result.body, langs, prematch_begin-old_cstr, postmatch_length); result.body.append_strdup_know_length(prematch_begin, postmatch_length); } @@ -496,6 +515,10 @@ int String::as_int() const { return result; } +static int serialize_body_char(char c, char** cur) { + *((*cur)++)=c; + return 0; // 0=continue +}; static int serialize_body_piece(const char* s, char** cur) { size_t length=strlen(s); memcpy(*cur, s, length); *cur+=length; @@ -526,8 +549,9 @@ String::Cm String::serialize(size_t prol // 3: lang info langs.for_each(body, serialize_lang_piece, &cur); // 4: letters - body.for_each(serialize_body_piece, &cur); - // 5: zero terminator already there put by new(PointerFreeGC) + body.for_each(serialize_body_char, serialize_body_piece, &cur); + // 5: zero terminator + *cur=0; return result; }