--- parser3/src/main/pa_string.C 2003/09/26 06:53:27 1.178 +++ parser3/src/main/pa_string.C 2003/09/29 09:42:12 1.181 @@ -5,7 +5,7 @@ Author: Alexandr Petrosian (http://paf.design.ru) */ -static const char* IDENT_STRING_C="$Date: 2003/09/26 06:53:27 $"; +static const char* IDENT_STRING_C="$Date: 2003/09/29 09:42:12 $"; #include "pcre.h" @@ -365,37 +365,49 @@ String& String::change_case(Charset& sou if(is_empty()) return result; - const unsigned char *tables=source_charset.pcre_tables; - - const unsigned char *a; - const unsigned char *b; - switch(kind) { - case CC_UPPER: - a=tables+lcc_offset; - b=tables+fcc_offset; - break; - case CC_LOWER: - a=tables+lcc_offset; - b=0; - break; - default: - throw Exception(0, - this, - "unknown change case kind #%d", - static_cast(kind)); // never - a=b=0; // calm, compiler - break; // never - } - char* new_cstr=cstrm(); char *dest=new_cstr; - unsigned char index; - for(const char* current=new_cstr; index=(unsigned char)*current; current++) { - unsigned char c=a[index]; - if(b) - c=b[c]; + if(source_charset.isUTF8()) { + switch(kind) { + case CC_UPPER: + change_case_UTF8((const XMLByte*)new_cstr, (XMLByte*)new_cstr, UTF8CaseToUpper); + break; + case CC_LOWER: + change_case_UTF8((const XMLByte*)new_cstr, (XMLByte*)new_cstr, UTF8CaseToLower); + break; + default: + assert(!"unknown change case kind"); + break; // never + } + + } else { + const unsigned char *tables=source_charset.pcre_tables; + + const unsigned char *a; + const unsigned char *b; + switch(kind) { + case CC_UPPER: + a=tables+lcc_offset; + b=tables+fcc_offset; + break; + case CC_LOWER: + a=tables+lcc_offset; + b=0; + break; + default: + assert(!"unknown change case kind"); + a=b=0; // calm, compiler + break; // never + } + + unsigned char index; + for(const char* current=new_cstr; index=(unsigned char)*current; current++) { + unsigned char c=a[index]; + if(b) + c=b[c]; - *dest++=(char)c; + *dest++=(char)c; + } } result.langs=langs; result.body=new_cstr; @@ -413,7 +425,7 @@ const String& String::replace(const Dict if(Table::element_type row=dict.first_that_begins(current)) { // prematch if(size_t prematch_length=current-prematch_begin) { - result.langs.append(result.body, langs, prematch_begin-old_cstr, current-old_cstr); + result.langs.append(result.body, langs, prematch_begin-old_cstr, prematch_length); result.body.append_strdup_know_length(prematch_begin, prematch_length); } @@ -433,7 +445,7 @@ const String& String::replace(const Dict // postmatch if(size_t postmatch_length=current-prematch_begin) { - result.langs.append(result.body, langs, prematch_begin-old_cstr, current-old_cstr); + result.langs.append(result.body, langs, prematch_begin-old_cstr, postmatch_length); result.body.append_strdup_know_length(prematch_begin, postmatch_length); } @@ -496,6 +508,10 @@ int String::as_int() const { return result; } +static int serialize_body_char(char c, char** cur) { + *((*cur)++)=c; + return 0; // 0=continue +}; static int serialize_body_piece(const char* s, char** cur) { size_t length=strlen(s); memcpy(*cur, s, length); *cur+=length; @@ -526,7 +542,7 @@ String::Cm String::serialize(size_t prol // 3: lang info langs.for_each(body, serialize_lang_piece, &cur); // 4: letters - body.for_each(serialize_body_piece, &cur); + body.for_each(serialize_body_char, serialize_body_piece, &cur); // 5: zero terminator already there put by new(PointerFreeGC) return result;