|
|
| version 1.151, 2002/04/10 08:53:55 | version 1.200, 2004/09/13 10:39:17 |
|---|---|
| Line 1 | Line 1 |
| /** @file | /** @file |
| Parser: string class. @see untasize_t.C. | Parser: string class. @see untalength_t.C. |
| Copyright (c) 2001, 2002 ArtLebedev Group (http://www.artlebedev.com) | Copyright (c) 2001-2004 ArtLebedev Group (http://www.artlebedev.com) |
| Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru) | Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru) |
| $Id$ | |
| */ | */ |
| static const char * const IDENT_STRING_C="$Date$"; | |
| #include "pcre.h" | #include "pcre.h" |
| #include "pa_pool.h" | |
| #include "pa_string.h" | #include "pa_string.h" |
| #include "pa_hash.h" | |
| #include "pa_exception.h" | #include "pa_exception.h" |
| #include "pa_common.h" | |
| #include "pa_array.h" | |
| #include "pa_globals.h" | |
| #include "pa_table.h" | #include "pa_table.h" |
| #include "pa_dictionary.h" | #include "pa_dictionary.h" |
| #include "pa_charset.h" | #include "pa_charset.h" |
| #define DEBUG_STRING_APPENDS_VS_EXPANDS | const String String::Empty; |
| #ifdef DEBUG_STRING_APPENDS_VS_EXPANDS | int pa_atoi(const char* str, const String* problem_source) { |
| ulong string_piece_appends=0; | if(!str) |
| #endif | return 0; |
| while(*str && isspace((unsigned char)*str)) | |
| str++; | |
| if(!*str) | |
| return 0; | |
| String::String(Pool& apool, const char *src, size_t src_size, bool tainted) : | int result; |
| Pooled(apool) { | char *error_pos; |
| last_chunk=&head.chunk; | bool negative=false; |
| head.chunk.count=CR_PREALLOCATED_COUNT; | if(str[0]=='-') { |
| append_here=head.chunk.rows; | negative=true; |
| str++; | |
| } else if(str[0]=='+') { | |
| str++; | |
| } | |
| // 0xABC | |
| if(str[0]=='0') | |
| if(str[1]=='x' || str[1]=='X') | |
| result=(int)(unsigned long)strtol(str, &error_pos, 0); | |
| else { | |
| // skip leading 0000, to disable octal interpretation | |
| do str++; while(*str=='0'); | |
| result=(int)strtol(str, &error_pos, 0); | |
| } | |
| else | |
| result=(int)strtol(str, &error_pos, 0); | |
| if(negative) | |
| result=-result; | |
| while(char c=*error_pos++) | |
| if(!isspace((unsigned char)c)) | |
| throw Exception("number.format", | |
| problem_source, | |
| problem_source?"invalid number (int)": "'%s' is invalid number (int)", str); | |
| if(src) | return result; |
| if(tainted) | |
| APPEND_TAINTED(src, src_size, 0, 0); | |
| else | |
| APPEND_CLEAN(src, src_size, 0, 0); | |
| } | } |
| String::String(const String& src) : | double pa_atod(const char* str, const String* problem_source) { |
| Pooled(src.pool()) { | if(!str) |
| last_chunk=&head.chunk; | return 0; |
| head.chunk.count=CR_PREALLOCATED_COUNT; | |
| append_here=head.chunk.rows; | while(*str && isspace((unsigned char)*str)) |
| str++; | |
| if(!*str) | |
| return 0; | |
| append(src, UL_UNSPECIFIED); | double result; |
| } | char *error_pos; |
| bool negative=false; | |
| if(str[0]=='-') { | |
| negative=true; | |
| str++; | |
| } else if(str[0]=='+') { | |
| str++; | |
| } | |
| // 0xABC | |
| if(str[0]=='0') | |
| if(str[1]=='x' || str[1]=='X') | |
| result=(double)(unsigned long)strtol(str, &error_pos, 0); | |
| else { | |
| // skip leading 0000, to disable octal interpretation | |
| do str++; while(*str=='0'); | |
| result=(double)strtod(str, &error_pos); | |
| } | |
| else | |
| result=(double)strtod(str, &error_pos); | |
| if(negative) | |
| result=-result; | |
| while(char c=*error_pos++) | |
| if(!isspace((unsigned char)c)) | |
| throw Exception("number.format", | |
| problem_source, | |
| problem_source?"invalid number (double)": "'%s' is invalid number (double)", str); | |
| size_t String::size() const { | |
| size_t result=0; | |
| STRING_FOREACH_ROW( | |
| result+=row->item.size; | |
| ); | |
| return result; | return result; |
| } | } |
| /// @todo not very optimal | // cord lib extension |
| uint String::used_rows() const { | |
| uint result=0; | #ifndef DOXYGEN |
| STRING_FOREACH_ROW( | typedef struct { |
| result++; | ssize_t countdown; |
| ); | char target; /* Character we're looking for */ |
| return result; | } chr_data; |
| #endif | |
| static int CORD_range_contains_chr_greater_then_proc(char c, size_t size, void* client_data) | |
| { | |
| register chr_data * d = (chr_data *)client_data; | |
| if (d -> countdown<=0) return(2); | |
| d -> countdown -= size; | |
| if (c > d -> target) return(1); | |
| return(0); | |
| } | |
| int CORD_range_contains_chr_greater_then(CORD x, size_t i, size_t n, int c) | |
| { | |
| chr_data d; | |
| d.countdown = n; | |
| d.target = c; | |
| return(CORD_block_iter(x, i, CORD_range_contains_chr_greater_then_proc, &d) == 1/*alternatives: 0 normally ended, 2=struck 'n'*/); | |
| } | |
| static int CORD_block_count_proc(char /*c*/, size_t /*size*/, void* client_data) | |
| { | |
| int* result=(int*)client_data; | |
| (*result)++; | |
| return(0); // 0=continue | |
| } | } |
| void String::expand() { | size_t CORD_block_count(CORD x) |
| uint new_chunk_count=last_chunk->count+CR_GROW_COUNT; | { |
| if(new_chunk_count>max_integral(Chunk::count_type)) | size_t result=0; |
| new_chunk_count=max_integral(Chunk::count_type); | CORD_block_iter(x, 0, CORD_block_count_proc, &result); |
| return result; | |
| Chunk *new_chunk=static_cast<Chunk *>( | |
| malloc( | |
| sizeof(Chunk)// count+interpadding(?)+rows[CR_PREALLOCATED_COUNT]+tailpadding(??) | |
| -sizeof(Chunk::rows_type) // PREALLOCATED rows | |
| +sizeof(Chunk::Row)*new_chunk_count // neaded rows | |
| +sizeof(Chunk *) // link size | |
| , 10)); | |
| new_chunk->rows[new_chunk->count=new_chunk_count].link=0; | |
| last_chunk->rows[last_chunk->count].link=new_chunk; | |
| last_chunk=new_chunk; | |
| append_here=last_chunk->rows; | |
| } | } |
| String& String::real_append(STRING_APPEND_PARAMS) { | // helpers |
| if(!last_chunk) // growth stopped [we're appended as string to somebody] | |
| throw Exception(0, | |
| this, | |
| "string growth stopped (append cstr)"); | |
| if(!src) | /// String::match uses this as replace & global search table columns |
| return *this; | |
| if(!size) | |
| size=strlen(src); | |
| if(!size) | |
| return *this; | |
| #ifdef DEBUG_STRING_APPENDS_VS_EXPANDS | const int MAX_MATCH_GROUPS=100; |
| string_piece_appends++; | |
| #endif | |
| // manually unrolled to avoid extra check | class String_match_table_template_columns: public ArrayString { |
| while(size>max_integral(Chunk::Row::item_size_type)) { | public: |
| if(chunk_is_full()) | String_match_table_template_columns() { |
| expand(); | *this+=new String("prematch"); |
| *this+=new String("match"); | |
| append_here->item.ptr=src; | *this+=new String("postmatch"); |
| append_here->item.size=max_integral(Chunk::Row::item_size_type); | for(int i=0; i<MAX_MATCH_GROUPS; i++) { |
| append_here->item.lang=lang; | *this+=new String(String::Body::Format(1+i), String::L_CLEAN); |
| #ifndef NO_STRING_ORIGIN | } |
| append_here->item.origin.file=file; | |
| append_here->item.origin.line=line; | |
| #endif | |
| append_here++; | |
| src+=max_integral(Chunk::Row::item_size_type); | |
| size-=max_integral(Chunk::Row::item_size_type); | |
| } | } |
| }; | |
| if(chunk_is_full()) | Table string_match_table_template(new String_match_table_template_columns); |
| expand(); | |
| append_here->item.ptr=src; | // String::Body methods |
| append_here->item.size=size; | |
| append_here->item.lang=lang; | |
| #ifndef NO_STRING_ORIGIN | |
| append_here->item.origin.file=file; | |
| append_here->item.origin.line=line; | |
| #endif | |
| append_here++; | |
| return *this; | String::Body String::Body::Format(int value) { |
| char local[MAX_NUMBER]; | |
| size_t length=snprintf(local, MAX_NUMBER, "%d", value); | |
| return String::Body(pa_strdup(local, length), length); | |
| } | } |
| char String::first_char() const { | String::Body String::Body::trim(String::Trim_kind kind, const char* chars, |
| if(is_empty()) | size_t* out_start, size_t* out_length) const { |
| throw Exception(0, | size_t our_length=length(); |
| this, | if(!our_length) |
| "getting first char of empty string"); | return *this; |
| if(!chars) | |
| chars=" \t\n"; // white space | |
| return *head.chunk.rows[0].item.ptr; | size_t start=0; |
| } | size_t end=our_length; |
| // from left... | |
| if(kind!=TRIM_END) { | |
| CORD_pos pos; set_pos(pos, 0); | |
| while(true) { | |
| char c=CORD_pos_fetch(pos); | |
| if(strchr(chars, c)) { | |
| if(++start==our_length) | |
| return 0; // all chars are empty, just return empty string | |
| } else | |
| break; | |
| uint String::hash_code() const { | CORD_next(pos); |
| } | |
| } | |
| // from right.. | |
| if(kind!=TRIM_START) { | |
| CORD_pos pos; set_pos(pos, end-1); | |
| while(true) { | |
| char c=CORD_pos_fetch(pos); | |
| if(strchr(chars, c)) { | |
| if(--end==0) // optimization: NO need to check for 'end>=start', that's(<) impossible | |
| return 0; // all chars are empty, just return empty string | |
| } else | |
| break; | |
| CORD_prev(pos); | |
| } | |
| } | |
| if(start==0 && end==our_length) // nobody moved a thing | |
| return *this; | |
| if(out_start) | |
| *out_start=start; | |
| size_t new_length=end-start; | |
| if(out_length) | |
| *out_length=new_length; | |
| return mid(start, new_length); | |
| } | |
| static int CORD_batched_iter_fn_generic_hash_code(char c, void * client_data) { | |
| uint& result=*static_cast<uint*>(client_data); | |
| generic_hash_code(result, c); | |
| return 0; | |
| } | |
| static int CORD_batched_iter_fn_generic_hash_code(const char* s, void * client_data) { | |
| uint& result=*static_cast<uint*>(client_data); | |
| generic_hash_code(result, s); | |
| return 0; | |
| }; | |
| uint String::Body::hash_code() const { | |
| uint result=0; | uint result=0; |
| STRING_FOREACH_ROW( | CORD_iter5(body, 0, |
| result=Hash::generic_code(result, row->item.ptr, row->item.size); | CORD_batched_iter_fn_generic_hash_code, |
| ); | CORD_batched_iter_fn_generic_hash_code, &result); |
| return result; | return result; |
| } | } |
| /// @todo move 'lang' skipping to pos | // String methods |
| int String::cmp(int& partial, const String& src, | |
| size_t this_offset, Untaint_lang lang) const { | |
| partial=-1; | |
| size_t a_size=size(); | |
| this_offset=min(this_offset, a_size-1); | |
| const Chunk *a_chunk=&head.chunk; | |
| const Chunk *b_chunk=&src.head.chunk; | |
| const Chunk::Row *a_row=a_chunk->rows; | |
| const Chunk::Row *b_row=b_chunk->rows; | |
| size_t a_offset=this_offset; | |
| size_t b_offset=0; | |
| Chunk::Row *a_end=append_here; | |
| Chunk::Row *b_end=src.append_here; | |
| uint a_countdown=a_chunk->count; | |
| uint b_countdown=b_chunk->count; | |
| int result; | |
| size_t pos=0; | |
| bool a_break=a_size==0; | |
| bool b_break=src.is_empty(); | |
| if(!(a_break || b_break)) while(true) { | |
| if(pos+a_row->item.size > this_offset) { | |
| if(lang!=UL_UNSPECIFIED && a_row->item.lang>lang) | |
| return -1; // wrong lang -- bail out | |
| int size_diff= | |
| (a_row->item.size-a_offset)- | |
| (b_row->item.size-b_offset); | |
| if(size_diff==0) { // a has same size as b | |
| result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, | |
| a_row->item.size-a_offset); | |
| if(result) | |
| return result; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; a_offset=0; | |
| b_row++; b_countdown--; b_offset=0; | |
| } else if (size_diff>0) { // a longer | |
| result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, | |
| b_row->item.size-b_offset); | |
| if(result) | |
| return result; | |
| a_offset+=b_row->item.size-b_offset; | |
| b_row++; b_countdown--; b_offset=0; | |
| } else { // b longer | |
| result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, | |
| a_row->item.size-a_offset); | |
| if(result) | |
| return result; | |
| b_offset+=a_row->item.size-a_offset; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; a_offset=0; | |
| } | |
| if(b_break=b_row==b_end) { | |
| a_break=a_row==a_end; | |
| break; | |
| } | |
| if(!b_countdown) { | |
| b_chunk=b_row->link; | |
| b_row=b_chunk->rows; | |
| b_countdown=b_chunk->count; | |
| } | |
| } else { | |
| a_offset-=a_row->item.size; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; | |
| } | |
| if(a_break=a_row==a_end) { | String::String(const char* cstr, size_t helper_length, bool tainted): body(CORD_EMPTY) { |
| b_break=b_row==b_end; | append_help_length(cstr, helper_length, tainted?L_TAINTED:L_CLEAN); |
| break; | } |
| } | String::String(const String::C cstr, bool tainted): body(CORD_EMPTY) { |
| if(!a_countdown) { | append_know_length(cstr.str, cstr.length, tainted?L_TAINTED:L_CLEAN); |
| a_chunk=a_row->link; | |
| a_row=a_chunk->rows; | |
| a_countdown=a_chunk->count; | |
| } | |
| } | |
| if(a_break==b_break) { // ended simultaneously | |
| partial=0; return 0; | |
| } else if(a_break) { // first bytes equal, but a ended before b | |
| partial=1; return -1; | |
| } else { | |
| partial=2; return +1; | |
| } | |
| } | } |
| /// @todo move 'lang' skipping to pos | String& String::append_know_length(const char* str, size_t known_length, Language lang) { |
| int String::cmp(int& partial, const char* b_ptr, size_t src_size, | if(!known_length) |
| size_t this_offset, Untaint_lang lang) const { | return *this; |
| partial=-1; | |
| size_t a_size=size(); | |
| size_t b_size=src_size?src_size:b_ptr?strlen(b_ptr):0; | |
| this_offset=min(this_offset, a_size-1); | |
| const Chunk *a_chunk=&head.chunk; | |
| const Chunk::Row *a_row=a_chunk->rows; | |
| size_t a_offset=this_offset; | |
| size_t b_offset=0; | |
| Chunk::Row *a_end=append_here; | |
| uint a_countdown=a_chunk->count; | |
| size_t pos=0; | |
| bool a_break=a_size==0; | |
| bool b_break=b_size==0; | |
| if(!(a_break || b_break)) while(true) { | |
| if(pos+a_row->item.size > this_offset) { | |
| if(lang!=UL_UNSPECIFIED && a_row->item.lang>lang) | |
| return -1; // wrong lang -- bail out | |
| int size_diff= | |
| (a_row->item.size-a_offset)- | |
| (b_size-b_offset); | |
| if(size_diff==0) { // a has same size as b | |
| if(int result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, | |
| a_row->item.size-a_offset)!=0) | |
| return result; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; a_offset=0; | |
| b_break=true; | |
| } else if (size_diff>0) { // a longer | |
| if(int result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, | |
| b_size-b_offset)!=0) | |
| return result; | |
| a_offset+=b_size-b_offset; | |
| b_break=true; | |
| } else { // b longer | |
| if(int result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, | |
| a_row->item.size-a_offset)!=0) | |
| return result; | |
| b_offset+=a_row->item.size-a_offset; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; a_offset=0; | |
| } | |
| } else { | |
| a_offset-=a_row->item.size; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; | |
| } | |
| a_break=a_row==a_end; | // first: langs |
| if(a_break || b_break) | langs.append(body, lang, known_length); |
| break; | // next: letters themselves |
| if(!a_countdown) { | body.append_know_length(str, known_length); |
| a_chunk=a_row->link; | |
| a_row=a_chunk->rows; | ASSERT_STRING_INVARIANT(*this); |
| a_countdown=a_chunk->count; | return *this; |
| } | |
| } | |
| if(a_break==b_break) { // ended simultaneously | |
| partial=0; return 0; | |
| } else if(a_break) { // first bytes equal, but a ended before b | |
| partial=1; return -1; | |
| } else { | |
| partial=2; return +1; | |
| } | |
| } | } |
| String& String::append_help_length(const char* str, size_t helper_length, Language lang) { | |
| if(!str) | |
| return *this; | |
| size_t known_length=helper_length?helper_length:strlen(str); | |
| if(!known_length) | |
| return *this; | |
| #ifndef NO_STRING_ORIGIN | return append_know_length(str, known_length, lang); |
| const Origin& String::origin() const { | |
| if(is_empty()) { | |
| static const Origin empty_origin={"empty string"}; | |
| return empty_origin; | |
| } | |
| // determining origin by first piece or last appended piece | |
| // because any of them can be constant=without origin: | |
| // ex: ^load[/file] "document_root" + "/file" | |
| // when last peice is constant, | |
| // ex: parser_root_auto_path{dynamic} / auto.p{const} | |
| // using first piece | |
| Origin& first_origin=head.chunk.rows[0].item.origin; | |
| return first_origin.file ? first_origin : append_here[-1].item.origin; | |
| } | } |
| #endif | String& String::append_strdup(const char* str, size_t helper_length, Language lang) { |
| size_t known_length=helper_length?helper_length:strlen(str); | |
| if(!known_length) | |
| return *this; | |
| // first: langs | |
| langs.append(body, lang, known_length); | |
| // next: letters themselves | |
| body.append_strdup_know_length(str, known_length); | |
| String& String::mid(size_t start, size_t finish) const { | ASSERT_STRING_INVARIANT(*this); |
| String& result=*NEW String(pool()); | return *this; |
| } | |
| start=max(size_t(0), start); | /// @todo check in doc: whether it documents NOW bad situation "abc".mid(-1, 3) =were?="ab" |
| finish=min(size(), finish); | String& String::mid(size_t substr_begin, size_t substr_end) const { |
| if(start==finish) | String& result=*new String; |
| size_t self_length=length(); | |
| substr_begin=min(substr_begin, self_length); | |
| substr_end=min(max(substr_end, substr_begin), self_length); | |
| size_t substr_length=substr_end-substr_begin; | |
| if(!substr_length) | |
| return result; | return result; |
| size_t pos=0; | // first: their langs |
| STRING_FOREACH_ROW( | result.langs.append(result.body, langs, substr_begin, substr_length); |
| size_t item_finish=pos+row->item.size; | // next: letters themselves |
| if(item_finish > start) { // started now or already? | result.body=body.mid(substr_begin, substr_length); |
| bool started=result.is_empty(); // started now? | |
| bool finished=finish <= item_finish; // finished now? | // SAPI::log("piece of '%s' from %d to %d is '%s'", |
| size_t offset=started?start-pos:0; | //cstr(), substr_begin, substr_end, result.cstr()); |
| size_t size=finished?finish-pos:row->item.size; | ASSERT_STRING_INVARIANT(result); |
| result.APPEND( | |
| row->item.ptr+offset, size-offset, | |
| row->item.lang, | |
| row->item.origin.file, row->item.origin.line); | |
| if(finished) | |
| goto break2; | |
| } | |
| pos+=row->item.size; | |
| ); | |
| break2: | |
| // SAPI::log(pool(), "piece of '%s' from %d to %d is '%s'", | |
| //cstr(), start, finish, result.cstr()); | |
| return result; | return result; |
| } | } |
| int String::pos(const String& substr, | size_t String::pos(const String::Body substr, size_t this_offset, Language lang) const { |
| int result, Untaint_lang lang) const { | size_t substr_length=substr.length(); |
| size_t self_size=size(); | while(true) { |
| for(; size_t(result)<self_size; result++) { | size_t substr_begin=body.pos(substr, this_offset); |
| int partial; cmp(partial, substr, result, lang); | |
| if( | if(substr_begin==CORD_NOT_FOUND) |
| partial==0 || // full match | return STRING_NOT_FOUND; |
| partial==2) // 'substr' starts 'this'+'result' | |
| return result; | if(langs.check_lang(lang, substr_begin, substr_length)) |
| return substr_begin; | |
| this_offset=substr_begin+substr_length; | |
| } | } |
| return -1; | |
| } | } |
| int String::pos(const char *substr, size_t substr_size, | size_t String::pos(const String& substr, |
| int result, Untaint_lang lang) const { | size_t this_offset, Language lang) const { |
| size_t self_size=size(); | return pos(substr.body, this_offset, lang); |
| for(; size_t(result)<self_size; result++) { | |
| int partial; cmp(partial, substr, substr_size, result, lang); | |
| if( | |
| partial==0 || // full match | |
| partial==2) // 'substr' starts 'this'+'result' | |
| return result; | |
| } | |
| return -1; | |
| } | } |
| void String::split(Array& result, | void String::split(ArrayString& result, |
| size_t* pos_after_ref, | size_t& pos_after, |
| const char *delim, size_t delim_size, | const char* delim, |
| Untaint_lang lang, int limit) const { | Language lang, int limit) const { |
| size_t self_size=size(); | size_t self_length=length(); |
| if(delim_size) { | if(size_t delim_length=strlen(delim)) { |
| size_t pos_after=pos_after_ref?*pos_after_ref:0; | size_t pos_before; |
| int pos_before; | |
| // while we have 'delim'... | // while we have 'delim'... |
| for(; (pos_before=pos(delim, delim_size, pos_after, lang))>=0 && limit; limit--) { | for(; (pos_before=pos(delim, pos_after, lang))!=STRING_NOT_FOUND && limit; limit--) { |
| result+=&mid(pos_after, pos_before); | result+=&mid(pos_after, pos_before); |
| pos_after=pos_before+delim_size; | pos_after=pos_before+delim_length; |
| } | } |
| // last piece | // last piece |
| if(pos_after<self_size && limit) { | if(pos_after<self_length && limit) { |
| result+=&mid(pos_after, self_size); | result+=&mid(pos_after, self_length); |
| pos_after=self_size; | pos_after=self_length; |
| } | } |
| if(pos_after_ref) | |
| *pos_after_ref=pos_after; | |
| } else { // empty delim | } else { // empty delim |
| result+=this; | result+=this; |
| if(pos_after_ref) | pos_after+=self_length; |
| *pos_after_ref+=self_size; | |
| } | } |
| } | } |
| void String::split(Array& result, | void String::split(ArrayString& result, |
| size_t* pos_after_ref, | size_t& pos_after, |
| const String& delim, Untaint_lang lang, | const String& delim, Language lang, |
| int limit) const { | int limit) const { |
| if(!delim.is_empty()) { | if(!delim.is_empty()) { |
| size_t pos_after=pos_after_ref?*pos_after_ref:0; | size_t pos_before; |
| int pos_before; | |
| // while we have 'delim'... | // while we have 'delim'... |
| for(; (pos_before=pos(delim, pos_after, lang))>=0 && limit; limit--) { | for(; (pos_before=pos(delim, pos_after, lang))!=STRING_NOT_FOUND && limit; limit--) { |
| result+=&mid(pos_after, pos_before); | result+=&mid(pos_after, pos_before); |
| pos_after=pos_before+delim.size(); | pos_after=pos_before+delim.length(); |
| } | } |
| // last piece | // last piece |
| if(pos_after<size() && limit) { | if(pos_after<length() && limit) { |
| result+=&mid(pos_after, size()); | result+=&mid(pos_after, length()); |
| pos_after=size(); | pos_after=length(); |
| } | } |
| if(pos_after_ref) | |
| *pos_after_ref=pos_after; | |
| } else { // empty delim | } else { // empty delim |
| result+=this; | result+=this; |
| if(pos_after_ref) | pos_after+=length(); |
| *pos_after_ref+=size(); | |
| } | } |
| } | } |
| static void regex_options(char *options, int *result){ | static void regex_options(const String* options, int *result, bool& need_pre_post_match){ |
| struct Regex_option { | struct Regex_option { |
| char key; | const char* keyL; |
| const char* keyU; | |
| int clear, set; | int clear, set; |
| int *result; | int *result; |
| bool *flag; | |
| } regex_option[]={ | } regex_option[]={ |
| {'i', 0, PCRE_CASELESS, result}, // a=A | {"i", "I", 0, PCRE_CASELESS, result, 0}, // a=A |
| {'s', 0, PCRE_DOTALL, result}, // \n\n$ [default] | {"s", "S", 0, PCRE_DOTALL, result, 0}, // \n\n$ [default] |
| {'x', 0, PCRE_EXTENDED, result}, // whitespace in regex ignored | {"x", "U", 0, PCRE_EXTENDED, result, 0}, // whitespace in regex ignored |
| {'m', PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$ | {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result, 0}, // ^aaa\n$^bbb\n$ |
| {'g', 0, true, result+1}, // many rows | {"g", "G", 0, 1, result+1, 0}, // many rows |
| {0}, | {"'", 0, 0, 0, 0, &need_pre_post_match}, |
| {0, 0, 0, 0, 0, 0} | |
| }; | }; |
| result[0]=PCRE_EXTRA | PCRE_DOTALL; | result[0]=PCRE_EXTRA | PCRE_DOTALL | PCRE_DOLLAR_ENDONLY; |
| result[1]=0; | result[1]=0; |
| if(options) | if(options && !options->is_empty()) |
| for(Regex_option *o=regex_option; o->key; o++) | for(Regex_option *o=regex_option; o->keyL; o++) |
| if( | if(options->pos(o->keyL)!=STRING_NOT_FOUND |
| strchr(options, o->key) || | || (o->keyU && options->pos(o->keyU)!=STRING_NOT_FOUND)) { |
| strchr(options, toupper(o->key))) { | if(o->flag) |
| *(o->result)&=~o->clear; | *o->flag=true; |
| *(o->result)|=o->set; | else { // result |
| *o->result &= ~o->clear; | |
| *o->result |= o->set; | |
| } | |
| } | } |
| } | } |
| /// @todo maybe need speedup: some option to remove pre/match/post string generation | Table* String::match(Charset& source_charset, |
| bool String::match( | const String& regexp, |
| const String *aorigin, | const String* options, |
| const String& regexp, | Row_action row_action, void *info, |
| const String *options, | bool& just_matched) const { |
| Table **table, | |
| Row_action row_action, void *info, | |
| bool *was_global) const { | |
| if(regexp.is_empty()) | if(regexp.is_empty()) |
| throw Exception(0, | throw Exception(0, |
| aorigin, | 0, |
| "regexp is empty"); | "regexp is empty"); |
| const char *pattern=regexp.cstr(); | |
| const char *errptr; | const char* pattern=regexp.cstr(); |
| const char* errptr; | |
| int erroffset; | int erroffset; |
| int option_bits[2]; regex_options(options?options->cstr():0, option_bits); | bool need_pre_post_match=false; |
| if(was_global) | int option_bits[2]={0}; regex_options(options, option_bits, need_pre_post_match); |
| *was_global=option_bits[1]!=0; | bool global=option_bits[1]!=0; |
| pcre *code=pcre_compile(pattern, option_bits[0], | pcre *code=pcre_compile(pattern, option_bits[0], |
| &errptr, &erroffset, | &errptr, &erroffset, |
| pool().get_source_charset().pcre_tables); | source_charset.pcre_tables); |
| if(!code) | if(!code) |
| throw Exception(0, | throw Exception(0, |
| ®exp.mid(erroffset, regexp.size()), | ®exp.mid(erroffset, regexp.length()), |
| "regular expression syntax error - %s", errptr); | "regular expression syntax error - %s", errptr); |
| int info_substrings=pcre_info(code, 0, 0); | int subpatterns=pcre_info(code, 0, 0); |
| if(info_substrings<0) { | if(subpatterns<0) { |
| pcre_free(code); | pcre_free(code); |
| throw Exception(0, | throw Exception(0, |
| aorigin, | ®exp, |
| "pcre_info error (%d)", | "pcre_info error (%d)", |
| info_substrings); | subpatterns); |
| } | } |
| int startoffset=0; | const char* subject=cstr(); |
| const char *subject=cstr(); | size_t subject_length=strlen(subject); |
| int length=strlen(subject); | const int oveclength=(1/*match*/+MAX_MATCH_GROUPS)*3; |
| int ovecsize; | int ovector[oveclength]; |
| int *ovector=(int *)malloc(sizeof(int)* | |
| (ovecsize=(1/*match*/+info_substrings)*3), 11); | // create table |
| Table::Action_options table_options; | |
| { // create table | Table& table=*new Table(string_match_table_template, table_options); |
| Array& columns=*NEW Array(pool()); | |
| columns+=string_pre_match_name; | |
| columns+=string_match_name; | |
| columns+=string_post_match_name; | |
| for(int i=1; i<=info_substrings; i++) { | |
| char *column=(char *)malloc(MAX_NUMBER); | |
| snprintf(column, MAX_NUMBER, "%d", i); | |
| columns+=NEW String(pool(), column); // .i column name | |
| } | |
| *table=NEW Table(pool(), aorigin, &columns); | |
| } | |
| int exec_option_bits=0; | int exec_option_bits=0; |
| int prestart=0; | |
| int poststart=0; | |
| int postfinish=length(); | |
| while(true) { | while(true) { |
| int exec_substrings=pcre_exec(code, 0, | int exec_substrings=pcre_exec(code, 0, |
| subject, length, startoffset, | subject, subject_length, prestart, |
| exec_option_bits, ovector, ovecsize); | exec_option_bits, ovector, oveclength); |
| if(exec_substrings==PCRE_ERROR_NOMATCH) { | if(exec_substrings==PCRE_ERROR_NOMATCH) { |
| pcre_free(code); | pcre_free(code); |
| row_action(**table, 0/*last time, no row*/, 0, 0, info); | row_action(table, 0/*last time, no raw*/, 0, 0, poststart, postfinish, info); |
| return option_bits[1]!=0; // global=true+table, not global=false | if(global || subpatterns) |
| return &table; // global or with subpatterns=true+result | |
| else { | |
| just_matched=false; return 0; // not global=no result | |
| } | |
| } | } |
| if(exec_substrings<0) { | if(exec_substrings<0) { |
| pcre_free(code); | pcre_free(code); |
| throw Exception(0, | throw Exception(0, |
| aorigin, | ®exp, |
| "regular expression execute error (%d)", | "regular expression execute error (%d)", |
| exec_substrings); | exec_substrings); |
| } | } |
| Array& row=*NEW Array(pool()); | int prefinish=ovector[0]; |
| row+=&mid(0, ovector[0]); // .prematch column value | poststart=ovector[1]; |
| row+=&mid(ovector[0], ovector[1]); // .match | ArrayString* row=new ArrayString; |
| row+=&mid(ovector[1], size()); // .postmatch | if(need_pre_post_match) { |
| *row+=&mid(0, prefinish); // .prematch column value | |
| *row+=&mid(prefinish, poststart); // .match | |
| *row+=&mid(poststart, postfinish); // .postmatch | |
| } else { | |
| *row+=&Empty; // .prematch column value | |
| *row+=&Empty; // .match | |
| *row+=&Empty; // .postmatch | |
| } | |
| for(int i=1; i<exec_substrings; i++) { | for(int i=1; i<exec_substrings; i++) { |
| // -1:-1 case handled peacefully by mid() itself | // -1:-1 case handled peacefully by mid() itself |
| row+=&mid(ovector[i*2+0], ovector[i*2+1]); // .i column value | *row+=&mid(ovector[i*2+0], ovector[i*2+1]); // .i column value |
| } | } |
| row_action(**table, &row, startoffset, ovector[0], info); | row_action(table, row, prestart, prefinish, poststart, postfinish, info); |
| if(!option_bits[1] || startoffset==ovector[1]) { // not global | going to hang | if(!global || prestart==poststart) { // not global | going to hang |
| pcre_free(code); | pcre_free(code); |
| row_action(**table, 0/*last time, no row*/, 0, 0, info); | row_action(table, 0/*last time, no row*/, 0, 0, poststart, postfinish, info); |
| return true; | return &table; |
| } | } |
| startoffset=ovector[1]; | prestart=poststart; |
| /* | /* |
| if(option_bits[0] & PCRE_MULTILINE) | if(option_bits[0] & PCRE_MULTILINE) |
| Line 571 bool String::match( | Line 502 bool String::match( |
| } | } |
| } | } |
| String& String::change_case(Pool& pool, | String& String::change_case(Charset& source_charset, Change_case_kind kind) const { |
| Change_case_kind kind) const { | String& result=*new String(); |
| const unsigned char *tables=pool.get_source_charset().pcre_tables; | if(is_empty()) |
| String& result=*new(pool) String(pool); | return result; |
| const unsigned char *a; | char* new_cstr=cstrm(); |
| const unsigned char *b; | size_t new_cstr_len=length(); |
| switch(kind) { | if(source_charset.isUTF8()) { |
| case CC_UPPER: | switch(kind) { |
| a=tables+lcc_offset; | case CC_UPPER: |
| b=tables+fcc_offset; | change_case_UTF8((const XMLByte*)new_cstr, new_cstr_len, (XMLByte*)new_cstr, new_cstr_len, UTF8CaseToUpper); |
| break; | break; |
| case CC_LOWER: | case CC_LOWER: |
| a=tables+lcc_offset; | change_case_UTF8((const XMLByte*)new_cstr, new_cstr_len, (XMLByte*)new_cstr, new_cstr_len, UTF8CaseToLower); |
| b=0; | break; |
| break; | default: |
| default: | assert(!"unknown change case kind"); |
| throw Exception(0, | break; // never |
| this, | } |
| "unknown change case kind #%d", | |
| static_cast<int>(kind)); // never | } else { |
| a=b=0; // calm, compiler | const unsigned char *tables=source_charset.pcre_tables; |
| break; // never | |
| } | const unsigned char *a; |
| const unsigned char *b; | |
| switch(kind) { | |
| case CC_UPPER: | |
| a=tables+lcc_offset; | |
| b=tables+fcc_offset; | |
| break; | |
| case CC_LOWER: | |
| a=tables+lcc_offset; | |
| b=0; | |
| break; | |
| default: | |
| assert(!"unknown change case kind"); | |
| a=b=0; // calm, compiler | |
| break; // never | |
| } | |
| STRING_FOREACH_ROW( | |
| char *new_cstr=(char *)pool.malloc(row->item.size, 12); | |
| char *dest=new_cstr; | char *dest=new_cstr; |
| const char *src=row->item.ptr; | unsigned char index; |
| for(int size=row->item.size; size--; src++) { | for(const char* current=new_cstr; (index=(unsigned char)*current); current++) { |
| unsigned char c=a[(unsigned char)*src]; | unsigned char c=a[index]; |
| if(b) | if(b) |
| c=b[c]; | c=b[c]; |
| *dest++=(char)c; | *dest++=(char)c; |
| } | } |
| } | |
| result.APPEND(new_cstr, row->item.size, | result.langs=langs; |
| row->item.lang, | result.body=new_cstr; |
| row->item.origin.file, row->item.origin.line); | |
| ); | |
| return result; | return result; |
| } | } |
| void String::join_chain(Pool& pool, | const String& String::replace(const Dictionary& dict) const { |
| const Chunk*& achunk, const Chunk::Row*& arow, uint& acountdown, | String& result=*new String(); |
| uchar& joined_lang, const char *& joined_ptr, size_t& joined_size) const { | const char* old_cstr=cstr(); |
| joined_lang=arow->item.lang; | const char* prematch_begin=old_cstr; |
| // calc size | const char* current=old_cstr; |
| joined_size=0; | while(*current) { |
| { | if(Dictionary::Subst subst=dict.first_that_begins(current)) { |
| const Chunk* chunk=achunk; | // prematch |
| const Chunk::Row* row=arow; | if(size_t prematch_length=current-prematch_begin) { |
| uint countdown=acountdown; | result.langs.append(result.body, langs, prematch_begin-old_cstr, prematch_length); |
| STRING_PREPARED_FOREACH_ROW(*this, | result.body.append_strdup_know_length(prematch_begin, prematch_length); |
| if(row->item.lang==joined_lang) | |
| joined_size+=row->item.size; | |
| else | |
| break; | |
| ); | |
| } | |
| // if one row, return simply itself | |
| if(joined_size==arow->item.size) { | |
| joined_ptr=arow->item.ptr; | |
| } else { | |
| // join adjacent rows | |
| char *ptr=(char *)pool.malloc(joined_size,13); | |
| joined_ptr=ptr; | |
| const Chunk* chunk=achunk; | |
| const Chunk::Row* row=arow; | |
| uint countdown=acountdown; | |
| STRING_PREPARED_FOREACH_ROW(*this, | |
| if(row->item.lang==joined_lang) { | |
| memcpy(ptr, row->item.ptr, row->item.size); ptr+=row->item.size; | |
| } else | |
| break; // before non-ours | |
| ); | |
| // set pointers after joined piece | |
| achunk=chunk; arow=row; acountdown=countdown; | |
| // & one step back, see String::reconstruct | |
| --arow; ++acountdown; | |
| } | |
| } | |
| /// @test if in some piece were found no dict words, append it, not it's duplicate | |
| String& String::replace(Pool& pool, Dictionary& dict) const { | |
| // return reconstruct(pool).replace_in_reconstructed(pool, dict); | |
| String& result=*new(pool) String(pool); | |
| STRING_FOREACH_ROW( | |
| uchar joined_lang; | |
| const char *joined_ptr; | |
| size_t joined_size; | |
| IFNDEF_NO_STRING_ORIGIN( | |
| const char *joined_origin_file=row->item.origin.file; | |
| const size_t joined_origin_line=row->item.origin.line; | |
| ); | |
| join_chain(pool, chunk, row, countdown, | |
| joined_lang, joined_ptr, joined_size); | |
| const char *src=joined_ptr; | |
| size_t src_size=joined_size; | |
| char *new_cstr=(char *)pool.malloc((size_t)ceil(src_size*dict.max_ratio()), 14); | |
| char *dest=new_cstr; | |
| while(src_size) { | |
| // there is a row where first column starts 'src' | |
| if(Table::Item *item=dict.first_that_starts(src, src_size)) { | |
| // get a=>b values | |
| const String& a=*static_cast<Array *>(item)->get_string(0); | |
| const String& b=*static_cast<Array *>(item)->get_string(1); | |
| // skip 'a' in 'src' && reduce work size | |
| src+=a.size(); src_size-=a.size(); | |
| // write 'b' to 'dest' && skip 'b' in 'dest' | |
| b.store_to(dest); dest+=b.size(); | |
| } else { | |
| // write a char to b && reduce work size | |
| *dest++=*src++; src_size--; | |
| } | } |
| } | |
| result.APPEND(new_cstr, dest-new_cstr, joined_lang, | // match |
| joined_origin_file, joined_origin_line); | // skip 'a' in 'current'; move prematch_begin |
| ); | current+=subst.from_length; prematch_begin=current; |
| return result; | |
| } | |
| double String::as_double() const { | if(const String* b=subst.to) // are there any b? |
| double result; | result<<*b; |
| const char *cstr; | } else // simply advance |
| char buf[MAX_NUMBER]; | current++; |
| if(head.chunk.rows+1==append_here) { | } |
| int size=min(head.chunk.rows[0].item.size, MAX_NUMBER-1); | |
| memcpy(buf, head.chunk.rows[0].item.ptr, size); | |
| buf[size]=0; | |
| cstr=buf; | |
| } else | |
| cstr=this->cstr(); | |
| char *error_pos; | |
| // 0xABC | |
| if(cstr[0]=='0') | |
| if(cstr[1]=='x' || cstr[1]=='X') | |
| result=(double)(unsigned long)strtol(cstr, &error_pos, 0); | |
| else | |
| result=(double)strtod(cstr+1/*skip leading 0*/, &error_pos); | |
| else | |
| result=(double)strtod(cstr, &error_pos); | |
| if(*error_pos/*not EOS*/) | // postmatch |
| throw Exception("number.format", | if(size_t postmatch_length=current-prematch_begin) { |
| this, | result.langs.append(result.body, langs, prematch_begin-old_cstr, postmatch_length); |
| "invalid number (double)"); | result.body.append_strdup_know_length(prematch_begin, postmatch_length); |
| } | |
| ASSERT_STRING_INVARIANT(result); | |
| return result; | return result; |
| } | } |
| int String::as_int() const { | |
| int result; | |
| const char *cstr; | |
| char buf[MAX_NUMBER]; | |
| if(head.chunk.rows+1==append_here) { | |
| int size=min(head.chunk.rows[0].item.size, MAX_NUMBER-1); | |
| memcpy(buf, head.chunk.rows[0].item.ptr, size); | |
| buf[size]=0; | |
| cstr=buf; | |
| } else | |
| cstr=this->cstr(); | |
| char *error_pos; | |
| // 0xABC | |
| if(cstr[0]=='0') | |
| if(cstr[1]=='x' || cstr[1]=='X') | |
| result=(int)(unsigned long)strtol(cstr, &error_pos, 0); | |
| else | |
| result=(int)strtol(cstr+1/*skip leading 0*/, &error_pos, 0); | |
| else | |
| result=(int)strtol(cstr, &error_pos, 0); | |
| if(*error_pos/*not EOS*/) | static int serialize_body_char(char c, char** cur) { |
| throw Exception("number.format", | *((*cur)++)=c; |
| this, | return 0; // 0=continue |
| "invalid number (int)"); | }; |
| static int serialize_body_piece(const char* s, char** cur) { | |
| size_t length=strlen(s); | |
| memcpy(*cur, s, length); *cur+=length; | |
| return 0; // 0=continue | |
| }; | |
| static int serialize_lang_piece(char alang, size_t asize, char** cur) { | |
| // lang | |
| **cur=alang; (*cur)++; | |
| // length [WARNING: not cast, addresses must be %4=0 on sparc] | |
| memcpy(*cur, &asize, sizeof(asize)); *cur+=sizeof(asize); | |
| return 0; // 0=continue | |
| } | |
| String::Cm String::serialize(size_t prolog_length) const { | |
| size_t fragments_count=langs.count(); | |
| size_t buf_length= | |
| prolog_length //1 | |
| +sizeof(size_t) //2 | |
| +fragments_count*(sizeof(char)+sizeof(size_t)) //3 | |
| +body.length() //4 | |
| +1; // for zero terminator used in deserialize | |
| String::Cm result(new(PointerFreeGC) char[buf_length], buf_length); | |
| // 1: prolog | |
| char *cur=result.str+prolog_length; | |
| // 2: langs.count [WARNING: not cast, addresses must be %4=0 on sparc] | |
| memcpy(cur, &fragments_count, sizeof(fragments_count)); cur+=sizeof(fragments_count); | |
| // 3: lang info | |
| langs.for_each(body, serialize_lang_piece, &cur); | |
| // 4: letters | |
| body.for_each(serialize_body_char, serialize_body_piece, &cur); | |
| // 5: zero terminator | |
| *cur=0; | |
| return result; | return result; |
| } | } |
| bool String::deserialize(size_t prolog_length, void *buf, size_t buf_length) { | |
| inline void ushort2uchars(ushort word, uchar& byte1, uchar& byte2) { | if(buf_length<=prolog_length) |
| byte1=word&0xFF; | |
| byte2=word>>8; | |
| } | |
| inline ushort uchars2ushort(uchar byte1, uchar byte2) { | |
| return (byte2<<8) | byte1; | |
| } | |
| /* @todo maybe network order worth spending some effort? | |
| don't bothering myself with network byte order, | |
| am not planning to be able to move resulting file across platforms | |
| for now | |
| */ | |
| void String::serialize(size_t prolog_size, void *& buf, size_t& buf_size) const { | |
| buf_size= | |
| prolog_size | |
| +used_rows()*(sizeof(uchar)+sizeof(ushort)) | |
| +size(); | |
| buf=malloc(buf_size,15); | |
| char *cur=(char *)buf+prolog_size; | |
| STRING_FOREACH_ROW( | |
| // lang | |
| memcpy(cur, &row->item.lang, sizeof(uchar)); | |
| cur+=sizeof(uchar); | |
| // size | |
| uchar byte1; uchar byte2; | |
| ushort2uchars(row->item.size, byte1, byte2); | |
| memcpy(cur, &byte1, sizeof(uchar)); cur+=sizeof(uchar); | |
| memcpy(cur, &byte2, sizeof(uchar)); cur+=sizeof(uchar); | |
| // bytes | |
| memcpy(cur, row->item.ptr, row->item.size); | |
| cur+=row->item.size; | |
| ); | |
| } | |
| bool String::deserialize(size_t prolog_size, void *buf, size_t buf_size, const char *file) { | |
| if(buf_size<=prolog_size) | |
| return false; | return false; |
| buf_length-=prolog_length; | |
| buf_length-=1; // 5: zero terminator | |
| char *cur=(char *)buf+prolog_size; | // 1: prolog |
| buf_size-=prolog_size; | const char* cur=(const char* )buf+prolog_length; |
| while(buf_size) { | // 2: langs.count |
| if(sizeof(uchar)+sizeof(ushort)>buf_size) // lang+size | size_t fragments_count; |
| return false; | if(buf_length<sizeof(fragments_count)) // langs.count don't fit? |
| return false; | |
| // [WARNING: not cast, addresses must be %4=0 on sparc] | |
| memcpy(&fragments_count, cur, sizeof(fragments_count)); cur+=sizeof(fragments_count); | |
| buf_length-=sizeof(fragments_count); | |
| if(fragments_count) { | |
| // 3: lang info | |
| size_t total_length=0; | |
| for(size_t f=0; f<fragments_count; f++) { | |
| char lang; | |
| size_t fragment_length; | |
| size_t piece_length=sizeof(lang)+sizeof(fragment_length); | |
| if(buf_length<piece_length) // lang+length | |
| return false; | |
| // lang | |
| lang=*cur++; | |
| // length [WARNING: not cast, addresses must be %4=0 on sparc] | |
| memcpy(&fragment_length, cur, sizeof(fragment_length)); cur+=sizeof(fragment_length); | |
| // uchar needed to prevent propagating 0x80 bit to upper bytes | |
| langs.append(total_length, (String::Language)(uchar)lang, fragment_length); | |
| total_length+=fragment_length; | |
| uchar lang=*(uchar *)(cur); | buf_length-=piece_length; |
| ushort size=uchars2ushort( | } |
| *(uchar*)(cur+sizeof(uchar)*1), | |
| *(uchar*)(cur+sizeof(uchar)*2) | |
| ); | |
| size_t piece_size=sizeof(uchar)+sizeof(ushort)+size; | // 4: letters |
| if(piece_size>buf_size) // buffer overrun, can be on incomplete cache files | if(buf_length!=total_length) |
| return false; | return false; |
| const char *ptr=(const char*)(cur+sizeof(uchar)*3); | // serialize wrote extra zero byte there, we can rely on that |
| APPEND(ptr, size, lang, file, 0); | body=String::Body(cur, buf_length); |
| cur+=piece_size; | |
| buf_size-=piece_size; | |
| } | } |
| ASSERT_STRING_INVARIANT(*this); | |
| return true; | return true; |
| } | } |
| const char* String::Body::v() const { | |
| return CORD_to_const_char_star(body); | |
| } | |
| const char* String::Languages::v() const { | |
| if(opt.is_not_just_lang) | |
| return CORD_to_const_char_star(langs); | |
| else | |
| return (const char*)&langs; | |
| } | |
| const char* String::v() const { | |
| const uint LIMIT_VIEW=20; | |
| char* buf=(char*)malloc(MAX_STRING); | |
| const char*body_view=body.v(); | |
| const char*langs_view=langs.v(); | |
| snprintf(buf, MAX_STRING, | |
| "%d:%.*s%s} " | |
| "{%d:%s", | |
| langs.count(), LIMIT_VIEW, langs_view, strlen(langs_view)>LIMIT_VIEW?"...":"", | |
| strlen(body_view), body_view | |
| ); | |
| return buf; | |
| } | |
| const String& String::trim(String::Trim_kind kind, const char* chars) const { | |
| if(!length()) | |
| return *this; | |
| size_t substr_begin, substr_length; | |
| Body new_body=body.trim(kind, chars, &substr_begin, &substr_length); | |
| if(new_body==body) // we received unchanged pointer, do likewise | |
| return *this; | |
| // new_body differs from body, adjust langs along | |
| String& result=*new String; | |
| if(!new_body) // body.trim produced empty result | |
| return result; | |
| // body.trim produced nonempty result | |
| // first: their langs | |
| result.langs.append(result.body, langs, substr_begin, substr_length); | |
| // next: letters themselves | |
| result.body=new_body; | |
| ASSERT_STRING_INVARIANT(result); | |
| return result; | |
| } |