|
|
| version 1.166, 2002/08/07 13:44:00 | version 1.200, 2004/09/13 10:39:17 |
|---|---|
| Line 1 | Line 1 |
| /** @file | /** @file |
| Parser: string class. @see untasize_t.C. | Parser: string class. @see untalength_t.C. |
| Copyright (c) 2001, 2002 ArtLebedev Group (http://www.artlebedev.com) | Copyright (c) 2001-2004 ArtLebedev Group (http://www.artlebedev.com) |
| Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru) | Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru) |
| */ | */ |
| static const char* IDENT_STRING_C="$Date$"; | static const char * const IDENT_STRING_C="$Date$"; |
| #include "pcre.h" | #include "pcre.h" |
| #include "pa_pool.h" | |
| #include "pa_string.h" | #include "pa_string.h" |
| #include "pa_hash.h" | |
| #include "pa_exception.h" | #include "pa_exception.h" |
| #include "pa_common.h" | |
| #include "pa_array.h" | |
| #include "pa_globals.h" | |
| #include "pa_table.h" | #include "pa_table.h" |
| #include "pa_dictionary.h" | #include "pa_dictionary.h" |
| #include "pa_charset.h" | #include "pa_charset.h" |
| #define DEBUG_STRING_APPENDS_VS_EXPANDS | const String String::Empty; |
| int pa_atoi(const char* str, const String* problem_source) { | |
| if(!str) | |
| return 0; | |
| #ifdef DEBUG_STRING_APPENDS_VS_EXPANDS | while(*str && isspace((unsigned char)*str)) |
| ulong string_piece_appends=0; | str++; |
| #endif | if(!*str) |
| return 0; | |
| String& String::OnPool(Pool& apool, const char *local_src, size_t src_size, bool tainted) { | int result; |
| if(local_src && *local_src) { | char *error_pos; |
| if(src_size==0) | bool negative=false; |
| src_size=strlen(local_src); | if(str[0]=='-') { |
| negative=true; | |
| char *pooled_src=(char *)apool.malloc(src_size); | str++; |
| memcpy(pooled_src, local_src, src_size); | } else if(str[0]=='+') { |
| return *new(apool) String(apool, pooled_src, src_size, tainted); | str++; |
| } else | } |
| return *new(apool) String(apool); | // 0xABC |
| } | if(str[0]=='0') |
| String::String(Pool& apool, const char *src, size_t src_size, bool tainted) : | if(str[1]=='x' || str[1]=='X') |
| Pooled(apool) { | result=(int)(unsigned long)strtol(str, &error_pos, 0); |
| last_chunk=&head.chunk; | else { |
| head.chunk.count=CR_PREALLOCATED_COUNT; | // skip leading 0000, to disable octal interpretation |
| append_here=head.chunk.rows; | do str++; while(*str=='0'); |
| result=(int)strtol(str, &error_pos, 0); | |
| if(src) | } |
| if(tainted) | else |
| APPEND_TAINTED(src, src_size, 0, 0); | result=(int)strtol(str, &error_pos, 0); |
| else | if(negative) |
| APPEND_CLEAN(src, src_size, 0, 0); | result=-result; |
| } | |
| String::String(const String& src) : | |
| Pooled(src.pool()) { | |
| last_chunk=&head.chunk; | |
| head.chunk.count=CR_PREALLOCATED_COUNT; | |
| append_here=head.chunk.rows; | |
| append(src, UL_UNSPECIFIED); | while(char c=*error_pos++) |
| } | if(!isspace((unsigned char)c)) |
| throw Exception("number.format", | |
| problem_source, | |
| problem_source?"invalid number (int)": "'%s' is invalid number (int)", str); | |
| size_t String::size() const { | |
| size_t result=0; | |
| STRING_FOREACH_ROW( | |
| result+=row->item.size; | |
| ); | |
| return result; | return result; |
| } | } |
| /// @todo not very optimal | double pa_atod(const char* str, const String* problem_source) { |
| uint String::used_rows() const { | if(!str) |
| uint result=0; | return 0; |
| STRING_FOREACH_ROW( | |
| result++; | |
| ); | |
| return result; | |
| } | |
| void String::expand() { | |
| uint new_chunk_count=last_chunk->count+CR_GROW_COUNT; | |
| if(new_chunk_count>max_integral(Chunk::count_type)) | |
| new_chunk_count=max_integral(Chunk::count_type); | |
| Chunk *new_chunk=static_cast<Chunk *>(malloc( | |
| sizeof(Chunk)// count+interpadding(?)+rows[CR_PREALLOCATED_COUNT]+tailpadding(??) | |
| -sizeof(Chunk::rows_type) // PREALLOCATED rows | |
| +sizeof(Chunk::Row)*new_chunk_count // neaded rows | |
| +sizeof(Chunk *) // link size | |
| , 10)); | |
| new_chunk->rows[new_chunk->count=new_chunk_count].link=0; | |
| last_chunk->rows[last_chunk->count].link=new_chunk; | |
| last_chunk=new_chunk; | |
| append_here=last_chunk->rows; | |
| } | |
| String& String::real_append(STRING_APPEND_PARAMS) { | while(*str && isspace((unsigned char)*str)) |
| if(!last_chunk) // growth stopped [we're appended as string to somebody] | str++; |
| throw Exception(0, | if(!*str) |
| this, | return 0; |
| "string growth stopped (append cstr)"); | |
| if(!src) | double result; |
| return *this; | char *error_pos; |
| if(!size) | bool negative=false; |
| size=strlen(src); | if(str[0]=='-') { |
| if(!size) | negative=true; |
| return *this; | str++; |
| } else if(str[0]=='+') { | |
| str++; | |
| } | |
| // 0xABC | |
| if(str[0]=='0') | |
| if(str[1]=='x' || str[1]=='X') | |
| result=(double)(unsigned long)strtol(str, &error_pos, 0); | |
| else { | |
| // skip leading 0000, to disable octal interpretation | |
| do str++; while(*str=='0'); | |
| result=(double)strtod(str, &error_pos); | |
| } | |
| else | |
| result=(double)strtod(str, &error_pos); | |
| if(negative) | |
| result=-result; | |
| #ifdef DEBUG_STRING_APPENDS_VS_EXPANDS | while(char c=*error_pos++) |
| string_piece_appends++; | if(!isspace((unsigned char)c)) |
| #endif | throw Exception("number.format", |
| problem_source, | |
| problem_source?"invalid number (double)": "'%s' is invalid number (double)", str); | |
| return result; | |
| } | |
| // manually unrolled to avoid extra check | // cord lib extension |
| while(size>max_integral(Chunk::Row::item_size_type)) { | |
| if(chunk_is_full()) | #ifndef DOXYGEN |
| expand(); | typedef struct { |
| ssize_t countdown; | |
| append_here->item.ptr=src; | char target; /* Character we're looking for */ |
| append_here->item.size=max_integral(Chunk::Row::item_size_type); | } chr_data; |
| append_here->item.lang=lang; | |
| #ifndef NO_STRING_ORIGIN | |
| append_here->item.origin.file=file; | |
| append_here->item.origin.line=line; | |
| #endif | #endif |
| append_here++; | static int CORD_range_contains_chr_greater_then_proc(char c, size_t size, void* client_data) |
| { | |
| register chr_data * d = (chr_data *)client_data; | |
| if (d -> countdown<=0) return(2); | |
| d -> countdown -= size; | |
| if (c > d -> target) return(1); | |
| return(0); | |
| } | |
| int CORD_range_contains_chr_greater_then(CORD x, size_t i, size_t n, int c) | |
| { | |
| chr_data d; | |
| d.countdown = n; | |
| d.target = c; | |
| return(CORD_block_iter(x, i, CORD_range_contains_chr_greater_then_proc, &d) == 1/*alternatives: 0 normally ended, 2=struck 'n'*/); | |
| } | |
| static int CORD_block_count_proc(char /*c*/, size_t /*size*/, void* client_data) | |
| { | |
| int* result=(int*)client_data; | |
| (*result)++; | |
| return(0); // 0=continue | |
| } | |
| size_t CORD_block_count(CORD x) | |
| { | |
| size_t result=0; | |
| CORD_block_iter(x, 0, CORD_block_count_proc, &result); | |
| return result; | |
| } | |
| src+=max_integral(Chunk::Row::item_size_type); | // helpers |
| size-=max_integral(Chunk::Row::item_size_type); | |
| } | |
| if(chunk_is_full()) | /// String::match uses this as replace & global search table columns |
| expand(); | |
| append_here->item.ptr=src; | const int MAX_MATCH_GROUPS=100; |
| append_here->item.size=size; | |
| append_here->item.lang=lang; | |
| #ifndef NO_STRING_ORIGIN | |
| append_here->item.origin.file=file; | |
| append_here->item.origin.line=line; | |
| #endif | |
| append_here++; | |
| return *this; | class String_match_table_template_columns: public ArrayString { |
| } | public: |
| String_match_table_template_columns() { | |
| *this+=new String("prematch"); | |
| *this+=new String("match"); | |
| *this+=new String("postmatch"); | |
| for(int i=0; i<MAX_MATCH_GROUPS; i++) { | |
| *this+=new String(String::Body::Format(1+i), String::L_CLEAN); | |
| } | |
| } | |
| }; | |
| char String::first_char() const { | Table string_match_table_template(new String_match_table_template_columns); |
| if(is_empty()) | |
| throw Exception(0, | |
| this, | |
| "getting first char of empty string"); | |
| return *head.chunk.rows[0].item.ptr; | // String::Body methods |
| } | |
| uint String::hash_code() const { | String::Body String::Body::Format(int value) { |
| uint result=0; | char local[MAX_NUMBER]; |
| STRING_FOREACH_ROW( | size_t length=snprintf(local, MAX_NUMBER, "%d", value); |
| result=Hash::generic_code(result, row->item.ptr, row->item.size); | return String::Body(pa_strdup(local, length), length); |
| ); | |
| return result; | |
| } | } |
| /// @todo move 'lang' skipping to pos | String::Body String::Body::trim(String::Trim_kind kind, const char* chars, |
| int String::cmp(int& partial, const String& src, | size_t* out_start, size_t* out_length) const { |
| size_t this_offset, Untaint_lang lang) const { | size_t our_length=length(); |
| partial=-1; | if(!our_length) |
| size_t a_size=size(); | return *this; |
| this_offset=min(this_offset, a_size-1); | if(!chars) |
| chars=" \t\n"; // white space | |
| const Chunk *a_chunk=&head.chunk; | |
| const Chunk *b_chunk=&src.head.chunk; | |
| const Chunk::Row *a_row=a_chunk->rows; | |
| const Chunk::Row *b_row=b_chunk->rows; | |
| size_t a_offset=this_offset; | |
| size_t b_offset=0; | |
| Chunk::Row *a_end=append_here; | |
| Chunk::Row *b_end=src.append_here; | |
| uint a_countdown=a_chunk->count; | |
| uint b_countdown=b_chunk->count; | |
| int result; | |
| size_t pos=0; | |
| bool a_break=a_size==0; | size_t start=0; |
| bool b_break=src.is_empty(); | size_t end=our_length; |
| if(!(a_break || b_break)) while(true) { | // from left... |
| if(pos+a_row->item.size > this_offset) { | if(kind!=TRIM_END) { |
| if(lang!=UL_UNSPECIFIED && a_row->item.lang>lang) | CORD_pos pos; set_pos(pos, 0); |
| return -1; // wrong lang -- bail out | while(true) { |
| char c=CORD_pos_fetch(pos); | |
| int size_diff= | if(strchr(chars, c)) { |
| (a_row->item.size-a_offset)- | if(++start==our_length) |
| (b_row->item.size-b_offset); | return 0; // all chars are empty, just return empty string |
| } else | |
| if(size_diff==0) { // a has same size as b | |
| result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, | |
| a_row->item.size-a_offset); | |
| if(result) | |
| return result; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; a_offset=0; | |
| b_row++; b_countdown--; b_offset=0; | |
| } else if (size_diff>0) { // a longer | |
| result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, | |
| b_row->item.size-b_offset); | |
| if(result) | |
| return result; | |
| a_offset+=b_row->item.size-b_offset; | |
| b_row++; b_countdown--; b_offset=0; | |
| } else { // b longer | |
| result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, | |
| a_row->item.size-a_offset); | |
| if(result) | |
| return result; | |
| b_offset+=a_row->item.size-a_offset; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; a_offset=0; | |
| } | |
| if(b_break=b_row==b_end) { | |
| a_break=a_row==a_end; | |
| break; | break; |
| } | |
| if(!b_countdown) { | |
| b_chunk=b_row->link; | |
| b_row=b_chunk->rows; | |
| b_countdown=b_chunk->count; | |
| } | |
| } else { | |
| a_offset-=a_row->item.size; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; | |
| } | |
| if(a_break=a_row==a_end) { | CORD_next(pos); |
| b_break=b_row==b_end; | |
| break; | |
| } | |
| if(!a_countdown) { | |
| a_chunk=a_row->link; | |
| a_row=a_chunk->rows; | |
| a_countdown=a_chunk->count; | |
| } | } |
| } | } |
| if(a_break==b_break) { // ended simultaneously | // from right.. |
| partial=0; return 0; | if(kind!=TRIM_START) { |
| } else if(a_break) { // first bytes equal, but a ended before b | CORD_pos pos; set_pos(pos, end-1); |
| partial=1; return -1; | while(true) { |
| } else { | char c=CORD_pos_fetch(pos); |
| partial=2; return +1; | if(strchr(chars, c)) { |
| if(--end==0) // optimization: NO need to check for 'end>=start', that's(<) impossible | |
| return 0; // all chars are empty, just return empty string | |
| } else | |
| break; | |
| CORD_prev(pos); | |
| } | |
| } | } |
| if(start==0 && end==our_length) // nobody moved a thing | |
| return *this; | |
| if(out_start) | |
| *out_start=start; | |
| size_t new_length=end-start; | |
| if(out_length) | |
| *out_length=new_length; | |
| return mid(start, new_length); | |
| } | |
| static int CORD_batched_iter_fn_generic_hash_code(char c, void * client_data) { | |
| uint& result=*static_cast<uint*>(client_data); | |
| generic_hash_code(result, c); | |
| return 0; | |
| } | |
| static int CORD_batched_iter_fn_generic_hash_code(const char* s, void * client_data) { | |
| uint& result=*static_cast<uint*>(client_data); | |
| generic_hash_code(result, s); | |
| return 0; | |
| }; | |
| uint String::Body::hash_code() const { | |
| uint result=0; | |
| CORD_iter5(body, 0, | |
| CORD_batched_iter_fn_generic_hash_code, | |
| CORD_batched_iter_fn_generic_hash_code, &result); | |
| return result; | |
| } | } |
| /// @todo move 'lang' skipping to pos | // String methods |
| int String::cmp(int& partial, const char* b_ptr, size_t src_size, | |
| size_t this_offset, Untaint_lang lang) const { | |
| partial=-1; | |
| size_t a_size=size(); | |
| size_t b_size=src_size?src_size:b_ptr?strlen(b_ptr):0; | |
| this_offset=min(this_offset, a_size-1); | |
| const Chunk *a_chunk=&head.chunk; | |
| const Chunk::Row *a_row=a_chunk->rows; | |
| size_t a_offset=this_offset; | |
| size_t b_offset=0; | |
| Chunk::Row *a_end=append_here; | |
| uint a_countdown=a_chunk->count; | |
| size_t pos=0; | |
| bool a_break=a_size==0; | |
| bool b_break=b_size==0; | |
| if(!(a_break || b_break)) while(true) { | |
| if(pos+a_row->item.size > this_offset) { | |
| if(lang!=UL_UNSPECIFIED && a_row->item.lang>lang) | |
| return -1; // wrong lang -- bail out | |
| int size_diff= | |
| (a_row->item.size-a_offset)- | |
| (b_size-b_offset); | |
| if(size_diff==0) { // a has same size as b | |
| if(int result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, | |
| a_row->item.size-a_offset)!=0) | |
| return result; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; a_offset=0; | |
| b_break=true; | |
| } else if (size_diff>0) { // a longer | |
| if(int result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, | |
| b_size-b_offset)!=0) | |
| return result; | |
| a_offset+=b_size-b_offset; | |
| b_break=true; | |
| } else { // b longer | |
| if(int result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, | |
| a_row->item.size-a_offset)!=0) | |
| return result; | |
| b_offset+=a_row->item.size-a_offset; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; a_offset=0; | |
| } | |
| } else { | |
| a_offset-=a_row->item.size; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; | |
| } | |
| a_break=a_row==a_end; | String::String(const char* cstr, size_t helper_length, bool tainted): body(CORD_EMPTY) { |
| if(a_break || b_break) | append_help_length(cstr, helper_length, tainted?L_TAINTED:L_CLEAN); |
| break; | } |
| if(!a_countdown) { | String::String(const String::C cstr, bool tainted): body(CORD_EMPTY) { |
| a_chunk=a_row->link; | append_know_length(cstr.str, cstr.length, tainted?L_TAINTED:L_CLEAN); |
| a_row=a_chunk->rows; | |
| a_countdown=a_chunk->count; | |
| } | |
| } | |
| if(a_break==b_break) { // ended simultaneously | |
| partial=0; return 0; | |
| } else if(a_break) { // first bytes equal, but a ended before b | |
| partial=1; return -1; | |
| } else { | |
| partial=2; return +1; | |
| } | |
| } | } |
| #ifndef NO_STRING_ORIGIN | String& String::append_know_length(const char* str, size_t known_length, Language lang) { |
| const Origin& String::origin() const { | if(!known_length) |
| if(is_empty()) { | return *this; |
| static const Origin empty_origin={"empty string"}; | |
| return empty_origin; | // first: langs |
| } | langs.append(body, lang, known_length); |
| // next: letters themselves | |
| // determining origin by first piece or last appended piece | body.append_know_length(str, known_length); |
| // because any of them can be constant=without origin: | |
| // ex: ^load[/file] "document_root" + "/file" | ASSERT_STRING_INVARIANT(*this); |
| // when last peice is constant, | return *this; |
| // ex: parser_root_auto_path{dynamic} / auto.p{const} | |
| // using first piece | |
| Origin& first_origin=head.chunk.rows[0].item.origin; | |
| return first_origin.file ? first_origin : append_here[-1].item.origin; | |
| } | } |
| #endif | String& String::append_help_length(const char* str, size_t helper_length, Language lang) { |
| if(!str) | |
| return *this; | |
| size_t known_length=helper_length?helper_length:strlen(str); | |
| if(!known_length) | |
| return *this; | |
| String& String::mid(size_t start, size_t finish) const { | return append_know_length(str, known_length, lang); |
| String& result=*NEW String(pool()); | } |
| String& String::append_strdup(const char* str, size_t helper_length, Language lang) { | |
| size_t known_length=helper_length?helper_length:strlen(str); | |
| if(!known_length) | |
| return *this; | |
| start=min(start, size()); | // first: langs |
| finish=max(finish, start); | langs.append(body, lang, known_length); |
| if(start==finish) | // next: letters themselves |
| body.append_strdup_know_length(str, known_length); | |
| ASSERT_STRING_INVARIANT(*this); | |
| return *this; | |
| } | |
| /// @todo check in doc: whether it documents NOW bad situation "abc".mid(-1, 3) =were?="ab" | |
| String& String::mid(size_t substr_begin, size_t substr_end) const { | |
| String& result=*new String; | |
| size_t self_length=length(); | |
| substr_begin=min(substr_begin, self_length); | |
| substr_end=min(max(substr_end, substr_begin), self_length); | |
| size_t substr_length=substr_end-substr_begin; | |
| if(!substr_length) | |
| return result; | return result; |
| size_t pos=0; | // first: their langs |
| STRING_FOREACH_ROW( | result.langs.append(result.body, langs, substr_begin, substr_length); |
| size_t item_finish=pos+row->item.size; | // next: letters themselves |
| if(item_finish > start) { // started now or already? | result.body=body.mid(substr_begin, substr_length); |
| bool started=result.is_empty(); // started now? | |
| bool finished=finish <= item_finish; // finished now? | // SAPI::log("piece of '%s' from %d to %d is '%s'", |
| size_t offset=started?start-pos:0; | //cstr(), substr_begin, substr_end, result.cstr()); |
| size_t size=finished?finish-pos:row->item.size; | ASSERT_STRING_INVARIANT(result); |
| result.APPEND( | |
| row->item.ptr+offset, size-offset, | |
| row->item.lang, | |
| row->item.origin.file, row->item.origin.line); | |
| if(finished) | |
| goto break2; | |
| } | |
| pos+=row->item.size; | |
| ); | |
| break2: | |
| // SAPI::log(pool(), "piece of '%s' from %d to %d is '%s'", | |
| //cstr(), start, finish, result.cstr()); | |
| return result; | return result; |
| } | } |
| int String::pos(const String& substr, | size_t String::pos(const String::Body substr, size_t this_offset, Language lang) const { |
| int result, Untaint_lang lang) const { | size_t substr_length=substr.length(); |
| size_t self_size=size(); | while(true) { |
| for(; size_t(result)<self_size; result++) { | size_t substr_begin=body.pos(substr, this_offset); |
| int partial; cmp(partial, substr, result, lang); | |
| if( | if(substr_begin==CORD_NOT_FOUND) |
| partial==0 || // full match | return STRING_NOT_FOUND; |
| partial==2) // 'substr' starts 'this'+'result' | |
| return result; | if(langs.check_lang(lang, substr_begin, substr_length)) |
| return substr_begin; | |
| this_offset=substr_begin+substr_length; | |
| } | } |
| return -1; | |
| } | } |
| int String::pos(const char *substr, size_t substr_size, | size_t String::pos(const String& substr, |
| int result, Untaint_lang lang) const { | size_t this_offset, Language lang) const { |
| size_t self_size=size(); | return pos(substr.body, this_offset, lang); |
| for(; size_t(result)<self_size; result++) { | |
| int partial; cmp(partial, substr, substr_size, result, lang); | |
| if( | |
| partial==0 || // full match | |
| partial==2) // 'substr' starts 'this'+'result' | |
| return result; | |
| } | |
| return -1; | |
| } | } |
| void String::split(Array& result, | void String::split(ArrayString& result, |
| size_t* pos_after_ref, | size_t& pos_after, |
| const char *delim, size_t delim_size, | const char* delim, |
| Untaint_lang lang, int limit) const { | Language lang, int limit) const { |
| size_t self_size=size(); | size_t self_length=length(); |
| if(delim_size) { | if(size_t delim_length=strlen(delim)) { |
| size_t pos_after=pos_after_ref?*pos_after_ref:0; | size_t pos_before; |
| int pos_before; | |
| // while we have 'delim'... | // while we have 'delim'... |
| for(; (pos_before=pos(delim, delim_size, pos_after, lang))>=0 && limit; limit--) { | for(; (pos_before=pos(delim, pos_after, lang))!=STRING_NOT_FOUND && limit; limit--) { |
| result+=&mid(pos_after, pos_before); | result+=&mid(pos_after, pos_before); |
| pos_after=pos_before+delim_size; | pos_after=pos_before+delim_length; |
| } | } |
| // last piece | // last piece |
| if(pos_after<self_size && limit) { | if(pos_after<self_length && limit) { |
| result+=&mid(pos_after, self_size); | result+=&mid(pos_after, self_length); |
| pos_after=self_size; | pos_after=self_length; |
| } | } |
| if(pos_after_ref) | |
| *pos_after_ref=pos_after; | |
| } else { // empty delim | } else { // empty delim |
| result+=this; | result+=this; |
| if(pos_after_ref) | pos_after+=self_length; |
| *pos_after_ref+=self_size; | |
| } | } |
| } | } |
| void String::split(Array& result, | void String::split(ArrayString& result, |
| size_t* pos_after_ref, | size_t& pos_after, |
| const String& delim, Untaint_lang lang, | const String& delim, Language lang, |
| int limit) const { | int limit) const { |
| if(!delim.is_empty()) { | if(!delim.is_empty()) { |
| size_t pos_after=pos_after_ref?*pos_after_ref:0; | size_t pos_before; |
| int pos_before; | |
| // while we have 'delim'... | // while we have 'delim'... |
| for(; (pos_before=pos(delim, pos_after, lang))>=0 && limit; limit--) { | for(; (pos_before=pos(delim, pos_after, lang))!=STRING_NOT_FOUND && limit; limit--) { |
| result+=&mid(pos_after, pos_before); | result+=&mid(pos_after, pos_before); |
| pos_after=pos_before+delim.size(); | pos_after=pos_before+delim.length(); |
| } | } |
| // last piece | // last piece |
| if(pos_after<size() && limit) { | if(pos_after<length() && limit) { |
| result+=&mid(pos_after, size()); | result+=&mid(pos_after, length()); |
| pos_after=size(); | pos_after=length(); |
| } | } |
| if(pos_after_ref) | |
| *pos_after_ref=pos_after; | |
| } else { // empty delim | } else { // empty delim |
| result+=this; | result+=this; |
| if(pos_after_ref) | pos_after+=length(); |
| *pos_after_ref+=size(); | |
| } | } |
| } | } |
| static void regex_options(const String *options, int *result, bool& need_pre_post_match){ | static void regex_options(const String* options, int *result, bool& need_pre_post_match){ |
| struct Regex_option { | struct Regex_option { |
| const char *keyL; | const char* keyL; |
| const char *keyU; | const char* keyU; |
| int clear, set; | int clear, set; |
| int *result; | int *result; |
| bool *flag; | bool *flag; |
| } regex_option[]={ | } regex_option[]={ |
| {"i", "I", 0, PCRE_CASELESS, result}, // a=A | {"i", "I", 0, PCRE_CASELESS, result, 0}, // a=A |
| {"s", "S", 0, PCRE_DOTALL, result}, // \n\n$ [default] | {"s", "S", 0, PCRE_DOTALL, result, 0}, // \n\n$ [default] |
| {"x", "U", 0, PCRE_EXTENDED, result}, // whitespace in regex ignored | {"x", "U", 0, PCRE_EXTENDED, result, 0}, // whitespace in regex ignored |
| {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$ | {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result, 0}, // ^aaa\n$^bbb\n$ |
| {"g", "G", 0, true, result+1}, // many rows | {"g", "G", 0, 1, result+1, 0}, // many rows |
| {"'", 0, 0, 0, 0, &need_pre_post_match}, | {"'", 0, 0, 0, 0, &need_pre_post_match}, |
| {0} | {0, 0, 0, 0, 0, 0} |
| }; | }; |
| result[0]=PCRE_EXTRA | PCRE_DOTALL; | result[0]=PCRE_EXTRA | PCRE_DOTALL | PCRE_DOLLAR_ENDONLY; |
| result[1]=0; | result[1]=0; |
| if(options) | if(options && !options->is_empty()) |
| for(Regex_option *o=regex_option; o->keyL; o++) | for(Regex_option *o=regex_option; o->keyL; o++) |
| if(options->pos(o->keyL)>=0 | if(options->pos(o->keyL)!=STRING_NOT_FOUND |
| || (o->keyU && options->pos(o->keyU)>=0)) { | || (o->keyU && options->pos(o->keyU)!=STRING_NOT_FOUND)) { |
| if(o->flag) | if(o->flag) |
| *o->flag=true; | *o->flag=true; |
| else { // result | else { // result |
| Line 484 static void regex_options(const String * | Line 398 static void regex_options(const String * |
| } | } |
| } | } |
| /// @todo make replacement Table stacked | Table* String::match(Charset& source_charset, |
| bool String::match( | const String& regexp, |
| const String *aorigin, | const String* options, |
| const String& regexp, | Row_action row_action, void *info, |
| const String *options, | bool& just_matched) const { |
| Table **table, | |
| Row_action row_action, void *info, | |
| bool *was_global) const { | |
| if(regexp.is_empty()) | if(regexp.is_empty()) |
| throw Exception(0, | throw Exception(0, |
| aorigin, | 0, |
| "regexp is empty"); | "regexp is empty"); |
| const char *pattern=regexp.cstr(); | const char* pattern=regexp.cstr(); |
| const char *errptr; | const char* errptr; |
| int erroffset; | int erroffset; |
| bool need_pre_post_match=false; | bool need_pre_post_match=false; |
| int option_bits[2]; regex_options(options, option_bits, need_pre_post_match); | int option_bits[2]={0}; regex_options(options, option_bits, need_pre_post_match); |
| if(was_global) | bool global=option_bits[1]!=0; |
| *was_global=option_bits[1]!=0; | |
| pcre *code=pcre_compile(pattern, option_bits[0], | pcre *code=pcre_compile(pattern, option_bits[0], |
| &errptr, &erroffset, | &errptr, &erroffset, |
| pool().get_source_charset().pcre_tables); | source_charset.pcre_tables); |
| if(!code) | if(!code) |
| throw Exception(0, | throw Exception(0, |
| ®exp.mid(erroffset, regexp.size()), | ®exp.mid(erroffset, regexp.length()), |
| "regular expression syntax error - %s", errptr); | "regular expression syntax error - %s", errptr); |
| int info_substrings=pcre_info(code, 0, 0); | int subpatterns=pcre_info(code, 0, 0); |
| if(info_substrings<0) { | if(subpatterns<0) { |
| pcre_free(code); | pcre_free(code); |
| throw Exception(0, | throw Exception(0, |
| aorigin, | ®exp, |
| "pcre_info error (%d)", | "pcre_info error (%d)", |
| info_substrings); | subpatterns); |
| } | } |
| const char *subject=cstr(); | const char* subject=cstr(); |
| int length=strlen(subject); | size_t subject_length=strlen(subject); |
| const int ovecsize=(1/*match*/+MAX_STRING_MATCH_TABLE_COLUMNS)*3; | const int oveclength=(1/*match*/+MAX_MATCH_GROUPS)*3; |
| int ovector[ovecsize]; | int ovector[oveclength]; |
| // create table | // create table |
| *table=NEW Table(pool(), *string_match_table_template); | Table::Action_options table_options; |
| Table& table=*new Table(string_match_table_template, table_options); | |
| int exec_option_bits=0; | int exec_option_bits=0; |
| int prestart=0; | int prestart=0; |
| int poststart=0; | int poststart=0; |
| int postfinish=size(); | int postfinish=length(); |
| while(true) { | while(true) { |
| int exec_substrings=pcre_exec(code, 0, | int exec_substrings=pcre_exec(code, 0, |
| subject, length, prestart, | subject, subject_length, prestart, |
| exec_option_bits, ovector, ovecsize); | exec_option_bits, ovector, oveclength); |
| if(exec_substrings==PCRE_ERROR_NOMATCH) { | if(exec_substrings==PCRE_ERROR_NOMATCH) { |
| pcre_free(code); | pcre_free(code); |
| row_action(**table, 0/*last time, no row*/, 0, 0, poststart, postfinish, info); | row_action(table, 0/*last time, no raw*/, 0, 0, poststart, postfinish, info); |
| return option_bits[1]!=0; // global=true+table, not global=false | if(global || subpatterns) |
| return &table; // global or with subpatterns=true+result | |
| else { | |
| just_matched=false; return 0; // not global=no result | |
| } | |
| } | } |
| if(exec_substrings<0) { | if(exec_substrings<0) { |
| pcre_free(code); | pcre_free(code); |
| throw Exception(0, | throw Exception(0, |
| aorigin, | ®exp, |
| "regular expression execute error (%d)", | "regular expression execute error (%d)", |
| exec_substrings); | exec_substrings); |
| } | } |
| int prefinish=ovector[0]; | int prefinish=ovector[0]; |
| poststart=ovector[1]; | poststart=ovector[1]; |
| Array& row=*NEW Array(pool()); | ArrayString* row=new ArrayString; |
| row+=need_pre_post_match?&mid(0, prefinish):0; // .prematch column value | if(need_pre_post_match) { |
| row+=need_pre_post_match?&mid(prefinish, poststart):0; // .match | *row+=&mid(0, prefinish); // .prematch column value |
| row+=need_pre_post_match?&mid(poststart, postfinish):0; // .postmatch | *row+=&mid(prefinish, poststart); // .match |
| *row+=&mid(poststart, postfinish); // .postmatch | |
| } else { | |
| *row+=&Empty; // .prematch column value | |
| *row+=&Empty; // .match | |
| *row+=&Empty; // .postmatch | |
| } | |
| for(int i=1; i<exec_substrings; i++) { | for(int i=1; i<exec_substrings; i++) { |
| // -1:-1 case handled peacefully by mid() itself | // -1:-1 case handled peacefully by mid() itself |
| row+=&mid(ovector[i*2+0], ovector[i*2+1]); // .i column value | *row+=&mid(ovector[i*2+0], ovector[i*2+1]); // .i column value |
| } | } |
| row_action(**table, &row, prestart, prefinish, poststart, postfinish, info); | row_action(table, row, prestart, prefinish, poststart, postfinish, info); |
| if(!option_bits[1] || prestart==poststart) { // not global | going to hang | if(!global || prestart==poststart) { // not global | going to hang |
| pcre_free(code); | pcre_free(code); |
| row_action(**table, 0/*last time, no row*/, 0, 0, poststart, postfinish, info); | row_action(table, 0/*last time, no row*/, 0, 0, poststart, postfinish, info); |
| return true; | return &table; |
| } | } |
| prestart=poststart; | prestart=poststart; |
| Line 582 bool String::match( | Line 502 bool String::match( |
| } | } |
| } | } |
| String& String::change_case(Pool& pool, | String& String::change_case(Charset& source_charset, Change_case_kind kind) const { |
| Change_case_kind kind) const { | String& result=*new String(); |
| const unsigned char *tables=pool.get_source_charset().pcre_tables; | if(is_empty()) |
| String& result=*new(pool) String(pool); | return result; |
| const unsigned char *a; | char* new_cstr=cstrm(); |
| const unsigned char *b; | size_t new_cstr_len=length(); |
| switch(kind) { | if(source_charset.isUTF8()) { |
| case CC_UPPER: | switch(kind) { |
| a=tables+lcc_offset; | case CC_UPPER: |
| b=tables+fcc_offset; | change_case_UTF8((const XMLByte*)new_cstr, new_cstr_len, (XMLByte*)new_cstr, new_cstr_len, UTF8CaseToUpper); |
| break; | break; |
| case CC_LOWER: | case CC_LOWER: |
| a=tables+lcc_offset; | change_case_UTF8((const XMLByte*)new_cstr, new_cstr_len, (XMLByte*)new_cstr, new_cstr_len, UTF8CaseToLower); |
| b=0; | break; |
| break; | default: |
| default: | assert(!"unknown change case kind"); |
| throw Exception(0, | break; // never |
| this, | } |
| "unknown change case kind #%d", | |
| static_cast<int>(kind)); // never | } else { |
| a=b=0; // calm, compiler | const unsigned char *tables=source_charset.pcre_tables; |
| break; // never | |
| } | const unsigned char *a; |
| const unsigned char *b; | |
| switch(kind) { | |
| case CC_UPPER: | |
| a=tables+lcc_offset; | |
| b=tables+fcc_offset; | |
| break; | |
| case CC_LOWER: | |
| a=tables+lcc_offset; | |
| b=0; | |
| break; | |
| default: | |
| assert(!"unknown change case kind"); | |
| a=b=0; // calm, compiler | |
| break; // never | |
| } | |
| STRING_FOREACH_ROW( | |
| char *new_cstr=(char *)pool.malloc(row->item.size, 12); | |
| char *dest=new_cstr; | char *dest=new_cstr; |
| const char *src=row->item.ptr; | unsigned char index; |
| for(int size=row->item.size; size--; src++) { | for(const char* current=new_cstr; (index=(unsigned char)*current); current++) { |
| unsigned char c=a[(unsigned char)*src]; | unsigned char c=a[index]; |
| if(b) | if(b) |
| c=b[c]; | c=b[c]; |
| *dest++=(char)c; | *dest++=(char)c; |
| } | } |
| } | |
| result.APPEND(new_cstr, row->item.size, | result.langs=langs; |
| row->item.lang, | result.body=new_cstr; |
| row->item.origin.file, row->item.origin.line); | |
| ); | |
| return result; | return result; |
| } | } |
| /// @test if in some piece were found no dict words, append it, not it's duplicate | const String& String::replace(const Dictionary& dict) const { |
| String& String::replace(Pool& pool, Dictionary& dict) const { | String& result=*new String(); |
| // return reconstruct(pool).replace_in_reconstructed(pool, dict); | const char* old_cstr=cstr(); |
| String& result=*new(pool) String(pool); | const char* prematch_begin=old_cstr; |
| STRING_FOREACH_ROW( | const char* current=old_cstr; |
| const char *src=row->item.ptr; | while(*current) { |
| size_t src_size=row->item.size; | if(Dictionary::Subst subst=dict.first_that_begins(current)) { |
| char *new_cstr=(char *)pool.malloc((size_t)ceil(src_size*dict.max_ratio()), 14); | // prematch |
| char *dest=new_cstr; | if(size_t prematch_length=current-prematch_begin) { |
| while(src_size) { | result.langs.append(result.body, langs, prematch_begin-old_cstr, prematch_length); |
| // there is a row where first column starts 'src' | result.body.append_strdup_know_length(prematch_begin, prematch_length); |
| if(Table::Item *item=dict.first_that_starts(src, src_size)) { | |
| // get a=>b values | |
| const String& a=*static_cast<Array *>(item)->get_string(0); | |
| const String& b=*static_cast<Array *>(item)->get_string(1); | |
| // skip 'a' in 'src' && reduce work size | |
| src+=a.size(); src_size-=a.size(); | |
| // write 'b' to 'dest' && skip 'b' in 'dest' | |
| b.store_to(dest); dest+=b.size(); | |
| } else { | |
| // write a char to b && reduce work size | |
| *dest++=*src++; src_size--; | |
| } | } |
| } | |
| result.APPEND(new_cstr, dest-new_cstr, row->item.lang, | // match |
| row->item.origin.file, row->item.origin.line); | // skip 'a' in 'current'; move prematch_begin |
| ); | current+=subst.from_length; prematch_begin=current; |
| if(const String* b=subst.to) // are there any b? | |
| result<<*b; | |
| } else // simply advance | |
| current++; | |
| } | |
| // postmatch | |
| if(size_t postmatch_length=current-prematch_begin) { | |
| result.langs.append(result.body, langs, prematch_begin-old_cstr, postmatch_length); | |
| result.body.append_strdup_know_length(prematch_begin, postmatch_length); | |
| } | |
| ASSERT_STRING_INVARIANT(result); | |
| return result; | return result; |
| } | } |
| String& String::join_chains(Pool& pool, char** acstr) const { | static int serialize_body_char(char c, char** cur) { |
| char *lcstr=cstr(); | *((*cur)++)=c; |
| const char *current=lcstr; | return 0; // 0=continue |
| }; | |
| String& result=*new(pool) String(pool); | static int serialize_body_piece(const char* s, char** cur) { |
| STRING_FOREACH_ROW( | size_t length=strlen(s); |
| IFNDEF_NO_STRING_ORIGIN( | memcpy(*cur, s, length); *cur+=length; |
| const char *joined_origin_file=row->item.origin.file; | return 0; // 0=continue |
| const size_t joined_origin_line=row->item.origin.line; | }; |
| ); | static int serialize_lang_piece(char alang, size_t asize, char** cur) { |
| uchar joined_lang=row->item.lang; | // lang |
| const char *joined_ptr=current; | **cur=alang; (*cur)++; |
| // calc size | // length [WARNING: not cast, addresses must be %4=0 on sparc] |
| size_t joined_size=0; | memcpy(*cur, &asize, sizeof(asize)); *cur+=sizeof(asize); |
| STRING_PREPARED_FOREACH_ROW(*this, | |
| if(row->item.lang==joined_lang) | return 0; // 0=continue |
| joined_size+=row->item.size; | } |
| else | String::Cm String::serialize(size_t prolog_length) const { |
| break; // before non-ours | size_t fragments_count=langs.count(); |
| ); | size_t buf_length= |
| current+=joined_size; | prolog_length //1 |
| +sizeof(size_t) //2 | |
| // pointers are after joined piece | +fragments_count*(sizeof(char)+sizeof(size_t)) //3 |
| // & one step back, see STRING_FOREACH_ROW | +body.length() //4 |
| --row; ++countdown; | +1; // for zero terminator used in deserialize |
| String::Cm result(new(PointerFreeGC) char[buf_length], buf_length); | |
| result.APPEND(joined_ptr, joined_size, joined_lang, | |
| joined_origin_file, joined_origin_line); | // 1: prolog |
| ); | char *cur=result.str+prolog_length; |
| // 2: langs.count [WARNING: not cast, addresses must be %4=0 on sparc] | |
| memcpy(cur, &fragments_count, sizeof(fragments_count)); cur+=sizeof(fragments_count); | |
| // 3: lang info | |
| langs.for_each(body, serialize_lang_piece, &cur); | |
| // 4: letters | |
| body.for_each(serialize_body_char, serialize_body_piece, &cur); | |
| // 5: zero terminator | |
| *cur=0; | |
| if(acstr) | |
| *acstr=lcstr; | |
| return result; | return result; |
| } | } |
| bool String::deserialize(size_t prolog_length, void *buf, size_t buf_length) { | |
| if(buf_length<=prolog_length) | |
| return false; | |
| buf_length-=prolog_length; | |
| buf_length-=1; // 5: zero terminator | |
| double String::as_double() const { | // 1: prolog |
| double result; | const char* cur=(const char* )buf+prolog_length; |
| const char *cstr; | |
| char buf[MAX_NUMBER]; | |
| if(head.chunk.rows+1==append_here) { | |
| int size=min(head.chunk.rows[0].item.size, MAX_NUMBER-1); | |
| memcpy(buf, head.chunk.rows[0].item.ptr, size); | |
| buf[size]=0; | |
| cstr=buf; | |
| } else | |
| cstr=this->cstr(); | |
| while(*cstr && isspace(*cstr)) | |
| cstr++; | |
| if(!*cstr) | |
| return 0; | |
| char *error_pos; | |
| // 0xABC | |
| if(cstr[0]=='0') | |
| if(cstr[1]=='x' || cstr[1]=='X') | |
| result=(double)(unsigned long)strtol(cstr, &error_pos, 0); | |
| else | |
| result=(double)strtod(cstr+1/*skip leading 0*/, &error_pos); | |
| else | |
| result=(double)strtod(cstr, &error_pos); | |
| while(char c=*error_pos++) | // 2: langs.count |
| if(!isspace(c)) | size_t fragments_count; |
| throw Exception("number.format", | if(buf_length<sizeof(fragments_count)) // langs.count don't fit? |
| this, | return false; |
| "invalid number (double)"); | // [WARNING: not cast, addresses must be %4=0 on sparc] |
| memcpy(&fragments_count, cur, sizeof(fragments_count)); cur+=sizeof(fragments_count); | |
| buf_length-=sizeof(fragments_count); | |
| if(fragments_count) { | |
| // 3: lang info | |
| size_t total_length=0; | |
| for(size_t f=0; f<fragments_count; f++) { | |
| char lang; | |
| size_t fragment_length; | |
| size_t piece_length=sizeof(lang)+sizeof(fragment_length); | |
| if(buf_length<piece_length) // lang+length | |
| return false; | |
| // lang | |
| lang=*cur++; | |
| // length [WARNING: not cast, addresses must be %4=0 on sparc] | |
| memcpy(&fragment_length, cur, sizeof(fragment_length)); cur+=sizeof(fragment_length); | |
| // uchar needed to prevent propagating 0x80 bit to upper bytes | |
| langs.append(total_length, (String::Language)(uchar)lang, fragment_length); | |
| total_length+=fragment_length; | |
| return result; | buf_length-=piece_length; |
| } | } |
| int String::as_int() const { | |
| int result; | |
| const char *cstr; | |
| char buf[MAX_NUMBER]; | |
| if(head.chunk.rows+1==append_here) { | |
| size_t size=min(head.chunk.rows[0].item.size, MAX_NUMBER-1); | |
| memcpy(buf, head.chunk.rows[0].item.ptr, size); | |
| buf[size]=0; | |
| cstr=buf; | |
| } else | |
| cstr=this->cstr(); | |
| while(*cstr && isspace(*cstr)) | |
| cstr++; | |
| if(!*cstr) | |
| return 0; | |
| char *error_pos; | // 4: letters |
| // 0xABC | if(buf_length!=total_length) |
| if(cstr[0]=='0') | return false; |
| if(cstr[1]=='x' || cstr[1]=='X') | |
| result=(int)(unsigned long)strtol(cstr, &error_pos, 0); | |
| else | |
| result=(int)strtol(cstr+1/*skip leading 0*/, &error_pos, 0); | |
| else | |
| result=(int)strtol(cstr, &error_pos, 0); | |
| while(char c=*error_pos++) | // serialize wrote extra zero byte there, we can rely on that |
| if(!isspace(c)) | body=String::Body(cur, buf_length); |
| throw Exception("number.format", | } |
| this, | |
| "invalid number (int)"); | |
| return result; | ASSERT_STRING_INVARIANT(*this); |
| return true; | |
| } | } |
| inline void ushort2uchars(ushort word, uchar& byte1, uchar& byte2) { | const char* String::Body::v() const { |
| byte1=word&0xFF; | return CORD_to_const_char_star(body); |
| byte2=word>>8; | |
| } | |
| inline ushort uchars2ushort(uchar byte1, uchar byte2) { | |
| return (byte2<<8) | byte1; | |
| } | |
| /* @todo maybe network order worth spending some effort? | |
| don't bothering myself with network byte order, | |
| am not planning to be able to move resulting file across platforms | |
| for now | |
| */ | |
| void String::serialize(size_t prolog_size, void *& buf, size_t& buf_size) const { | |
| buf_size= | |
| prolog_size | |
| +used_rows()*(sizeof(uchar)+sizeof(ushort)) | |
| +size(); | |
| buf=malloc(buf_size,15); | |
| char *cur=(char *)buf+prolog_size; | |
| STRING_FOREACH_ROW( | |
| // lang | |
| memcpy(cur, &row->item.lang, sizeof(uchar)); | |
| cur+=sizeof(uchar); | |
| // size | |
| uchar byte1; uchar byte2; | |
| ushort2uchars(row->item.size, byte1, byte2); | |
| memcpy(cur, &byte1, sizeof(uchar)); cur+=sizeof(uchar); | |
| memcpy(cur, &byte2, sizeof(uchar)); cur+=sizeof(uchar); | |
| // bytes | |
| memcpy(cur, row->item.ptr, row->item.size); | |
| cur+=row->item.size; | |
| ); | |
| } | } |
| bool String::deserialize(size_t prolog_size, void *buf, size_t buf_size, const char *file) { | const char* String::Languages::v() const { |
| if(buf_size<=prolog_size) | if(opt.is_not_just_lang) |
| return false; | return CORD_to_const_char_star(langs); |
| else | |
| return (const char*)&langs; | |
| } | |
| const char* String::v() const { | |
| const uint LIMIT_VIEW=20; | |
| char* buf=(char*)malloc(MAX_STRING); | |
| const char*body_view=body.v(); | |
| const char*langs_view=langs.v(); | |
| snprintf(buf, MAX_STRING, | |
| "%d:%.*s%s} " | |
| "{%d:%s", | |
| langs.count(), LIMIT_VIEW, langs_view, strlen(langs_view)>LIMIT_VIEW?"...":"", | |
| strlen(body_view), body_view | |
| ); | |
| char *cur=(char *)buf+prolog_size; | return buf; |
| buf_size-=prolog_size; | } |
| while(buf_size) { | const String& String::trim(String::Trim_kind kind, const char* chars) const { |
| if(sizeof(uchar)+sizeof(ushort)>buf_size) // lang+size | if(!length()) |
| return false; | return *this; |
| uchar lang=*(uchar *)(cur); | size_t substr_begin, substr_length; |
| ushort size=uchars2ushort( | Body new_body=body.trim(kind, chars, &substr_begin, &substr_length); |
| *(uchar*)(cur+sizeof(uchar)*1), | if(new_body==body) // we received unchanged pointer, do likewise |
| *(uchar*)(cur+sizeof(uchar)*2) | return *this; |
| ); | // new_body differs from body, adjust langs along |
| size_t piece_size=sizeof(uchar)+sizeof(ushort)+size; | String& result=*new String; |
| if(piece_size>buf_size) // buffer overrun, can be on incomplete cache files | if(!new_body) // body.trim produced empty result |
| return false; | return result; |
| // body.trim produced nonempty result | |
| const char *ptr=(const char*)(cur+sizeof(uchar)*3); | // first: their langs |
| APPEND(ptr, size, lang, file, 0); | result.langs.append(result.body, langs, substr_begin, substr_length); |
| // next: letters themselves | |
| result.body=new_body; | |
| cur+=piece_size; | ASSERT_STRING_INVARIANT(result); |
| buf_size-=piece_size; | return result; |
| } | |
| return true; | |
| } | } |