|
|
| version 1.140, 2002/02/20 10:40:08 | version 1.151, 2002/04/10 08:53:55 |
|---|---|
| Line 29 ulong string_piece_appends=0; | Line 29 ulong string_piece_appends=0; |
| String::String(Pool& apool, const char *src, size_t src_size, bool tainted) : | String::String(Pool& apool, const char *src, size_t src_size, bool tainted) : |
| Pooled(apool) { | Pooled(apool) { |
| last_chunk=&head; | last_chunk=&head.chunk; |
| head.count=CR_PREALLOCATED_COUNT; | head.chunk.count=CR_PREALLOCATED_COUNT; |
| append_here=head.rows; | append_here=head.chunk.rows; |
| initial_head_link=0; | |
| link_row=&head.rows[head.count]; | |
| if(src) | if(src) |
| if(tainted) | if(tainted) |
| Line 41 String::String(Pool& apool, const char * | Line 39 String::String(Pool& apool, const char * |
| else | else |
| APPEND_CLEAN(src, src_size, 0, 0); | APPEND_CLEAN(src, src_size, 0, 0); |
| } | } |
| /* | |
| String::String(const String& src) : | |
| Pooled(src.pool()) { | |
| head.count=CR_PREALLOCATED_COUNT; | |
| uint src_used_rows=src.used_rows(); | |
| if(src_used_rows<=head.count) { | |
| // all new rows fit size_to preallocated area | |
| last_chunk=&head; | |
| uint curr_chunk_rows=head.count; | |
| memcpy(head.rows, src.head.rows, sizeof(Chunk::Row)*src_used_rows); | |
| append_here=&head.rows[src_used_rows]; | |
| link_row=&head.rows[curr_chunk_rows]; | |
| } else { | |
| // warning: | |
| // heavily relies on the fact | |
| // "preallocated area is the same for all strings" | |
| // | |
| // info: | |
| // allocating only enough mem to fit src string rows | |
| // next append would allocate a new chunk | |
| // | |
| // new rows don't fit size_to preallocated area: splitting size_to two chunks | |
| // preallocated chunk src to constructing head | |
| memcpy(head.rows, src.head.rows, sizeof(Chunk::Row)*head.count); | |
| // remaining rows size_to new_chunk | |
| uint curr_chunk_rows=src_used_rows-head.count; | |
| last_chunk=static_cast<Chunk *>( | |
| malloc(sizeof(Chunk::count_type)+sizeof(Chunk::Row)*curr_chunk_rows+sizeof(Chunk *), 9)); | |
| last_chunk->count=curr_chunk_rows; | |
| append_here=link_row=&last_chunk->rows[last_chunk->count]; | |
| Chunk *old_chunk=src.head.rows[src.head.count].link; | |
| Chunk::Row *new_rows=last_chunk->rows; | |
| uint rows_left_to_copy=last_chunk->count; | |
| while(true) { | |
| Chunk::count_type old_count=old_chunk->count; | |
| Chunk *next_chunk=old_chunk->rows[old_count].link; | |
| if(next_chunk) { | |
| // not last source chunk | |
| // taking it all | |
| memcpy(new_rows, old_chunk->rows, sizeof(Chunk::Row)*old_count); | |
| new_rows+=old_count; | |
| rows_left_to_copy-=old_count; | |
| old_chunk=next_chunk; | |
| } else { | |
| // the last source chunk | |
| // taking only those rows of chunk that _left_to_copy | |
| memcpy(new_rows, old_chunk->rows, sizeof(Chunk::Row)*rows_left_to_copy); | |
| break; | |
| } | |
| } | |
| } | |
| link_row->link=0; | |
| } | |
| */ | |
| String::String(const String& src) : | String::String(const String& src) : |
| Pooled(src.pool()) { | Pooled(src.pool()) { |
| last_chunk=&head; | last_chunk=&head.chunk; |
| head.count=CR_PREALLOCATED_COUNT; | head.chunk.count=CR_PREALLOCATED_COUNT; |
| append_here=head.rows; | append_here=head.chunk.rows; |
| initial_head_link=0; | |
| link_row=&head.rows[head.count]; | |
| append(src, UL_UNSPECIFIED); | append(src, UL_UNSPECIFIED); |
| } | } |
| Line 115 size_t String::size() const { | Line 54 size_t String::size() const { |
| STRING_FOREACH_ROW( | STRING_FOREACH_ROW( |
| result+=row->item.size; | result+=row->item.size; |
| ); | ); |
| break2: | |
| return result; | return result; |
| } | } |
| Line 125 uint String::used_rows() const { | Line 63 uint String::used_rows() const { |
| STRING_FOREACH_ROW( | STRING_FOREACH_ROW( |
| result++; | result++; |
| ); | ); |
| break2: | |
| return result; | return result; |
| } | } |
| void String::expand() { | void String::expand() { |
| Chunk::count_type new_chunk_count=last_chunk->count+CR_GROW_COUNT; | uint new_chunk_count=last_chunk->count+CR_GROW_COUNT; |
| if(new_chunk_count>max_integral(Chunk::count_type)) | if(new_chunk_count>max_integral(Chunk::count_type)) |
| new_chunk_count=max_integral(Chunk::count_type); | new_chunk_count=max_integral(Chunk::count_type); |
| last_chunk=static_cast<Chunk *>( | Chunk *new_chunk=static_cast<Chunk *>( |
| malloc(sizeof(Chunk::count_type)+sizeof(Chunk::Row)*new_chunk_count+sizeof(Chunk *), 10)); | malloc( |
| last_chunk->count=new_chunk_count; | sizeof(Chunk)// count+interpadding(?)+rows[CR_PREALLOCATED_COUNT]+tailpadding(??) |
| link_row->link=last_chunk; | -sizeof(Chunk::rows_type) // PREALLOCATED rows |
| +sizeof(Chunk::Row)*new_chunk_count // neaded rows | |
| +sizeof(Chunk *) // link size | |
| , 10)); | |
| new_chunk->rows[new_chunk->count=new_chunk_count].link=0; | |
| last_chunk->rows[last_chunk->count].link=new_chunk; | |
| last_chunk=new_chunk; | |
| append_here=last_chunk->rows; | append_here=last_chunk->rows; |
| link_row=&last_chunk->rows[last_chunk->count]; | |
| link_row->link=0; | |
| } | } |
| String& String::real_append(STRING_APPEND_PARAMS) { | String& String::real_append(STRING_APPEND_PARAMS) { |
| if(!last_chunk) // growth stopped [we're appended as string to somebody] | if(!last_chunk) // growth stopped [we're appended as string to somebody] |
| throw Exception(0, 0, | throw Exception(0, |
| this, | this, |
| "string growth stopped"); | "string growth stopped (append cstr)"); |
| if(!src) | if(!src) |
| return *this; | return *this; |
| Line 194 String& String::real_append(STRING_APPEN | Line 136 String& String::real_append(STRING_APPEN |
| char String::first_char() const { | char String::first_char() const { |
| if(is_empty()) | if(is_empty()) |
| throw Exception(0, 0, | throw Exception(0, |
| this, | this, |
| "getting first char of empty string"); | "getting first char of empty string"); |
| return *head.rows[0].item.ptr; | return *head.chunk.rows[0].item.ptr; |
| } | } |
| uint String::hash_code() const { | uint String::hash_code() const { |
| Line 206 uint String::hash_code() const { | Line 148 uint String::hash_code() const { |
| STRING_FOREACH_ROW( | STRING_FOREACH_ROW( |
| result=Hash::generic_code(result, row->item.ptr, row->item.size); | result=Hash::generic_code(result, row->item.ptr, row->item.size); |
| ); | ); |
| break2: | |
| return result; | return result; |
| } | } |
| Line 217 int String::cmp(int& partial, const Stri | Line 158 int String::cmp(int& partial, const Stri |
| size_t a_size=size(); | size_t a_size=size(); |
| this_offset=min(this_offset, a_size-1); | this_offset=min(this_offset, a_size-1); |
| const Chunk *a_chunk=&head; | const Chunk *a_chunk=&head.chunk; |
| const Chunk *b_chunk=&src.head; | const Chunk *b_chunk=&src.head.chunk; |
| const Chunk::Row *a_row=a_chunk->rows; | const Chunk::Row *a_row=a_chunk->rows; |
| const Chunk::Row *b_row=b_chunk->rows; | const Chunk::Row *b_row=b_chunk->rows; |
| size_t a_offset=this_offset; | size_t a_offset=this_offset; |
| Line 307 int String::cmp(int& partial, const char | Line 248 int String::cmp(int& partial, const char |
| size_t b_size=src_size?src_size:b_ptr?strlen(b_ptr):0; | size_t b_size=src_size?src_size:b_ptr?strlen(b_ptr):0; |
| this_offset=min(this_offset, a_size-1); | this_offset=min(this_offset, a_size-1); |
| const Chunk *a_chunk=&head; | const Chunk *a_chunk=&head.chunk; |
| const Chunk::Row *a_row=a_chunk->rows; | const Chunk::Row *a_row=a_chunk->rows; |
| size_t a_offset=this_offset; | size_t a_offset=this_offset; |
| size_t b_offset=0; | size_t b_offset=0; |
| Line 378 const Origin& String::origin() const { | Line 319 const Origin& String::origin() const { |
| return empty_origin; | return empty_origin; |
| } | } |
| // determining origin by last appended piece | // determining origin by first piece or last appended piece |
| // because first one frequently constant. | // because any of them can be constant=without origin: |
| // ex: ^load[/file] "document_root" + "/file" | // ex: ^load[/file] "document_root" + "/file" |
| // when last peice is constant, | // when last peice is constant, |
| // ex: parser_root_auto_path{dynamic} / auto.p{const} | // ex: parser_root_auto_path{dynamic} / auto.p{const} |
| // using first piece | // using first piece |
| Origin& last_origin=append_here[-1].item.origin; | Origin& first_origin=head.chunk.rows[0].item.origin; |
| return last_origin.file ? last_origin : head.rows[0].item.origin; | return first_origin.file ? first_origin : append_here[-1].item.origin; |
| } | } |
| #endif | #endif |
| Line 537 bool String::match( | Line 478 bool String::match( |
| bool *was_global) const { | bool *was_global) const { |
| if(regexp.is_empty()) | if(regexp.is_empty()) |
| throw Exception(0, 0, | throw Exception(0, |
| aorigin, | aorigin, |
| "regexp is empty"); | "regexp is empty"); |
| const char *pattern=regexp.cstr(); | const char *pattern=regexp.cstr(); |
| Line 551 bool String::match( | Line 492 bool String::match( |
| pool().get_source_charset().pcre_tables); | pool().get_source_charset().pcre_tables); |
| if(!code) | if(!code) |
| throw Exception(0, 0, | throw Exception(0, |
| ®exp.mid(erroffset, regexp.size()), | ®exp.mid(erroffset, regexp.size()), |
| "regular expression syntax error - %s", errptr); | "regular expression syntax error - %s", errptr); |
| int info_substrings=pcre_info(code, 0, 0); | int info_substrings=pcre_info(code, 0, 0); |
| if(info_substrings<0) { | if(info_substrings<0) { |
| pcre_free(code); | pcre_free(code); |
| throw Exception(0, 0, | throw Exception(0, |
| aorigin, | aorigin, |
| "pcre_info error (%d)", | "pcre_info error (%d)", |
| info_substrings); | info_substrings); |
| Line 598 bool String::match( | Line 539 bool String::match( |
| if(exec_substrings<0) { | if(exec_substrings<0) { |
| pcre_free(code); | pcre_free(code); |
| throw Exception(0, 0, | throw Exception(0, |
| aorigin, | aorigin, |
| "regular expression execute error (%d)", | "regular expression execute error (%d)", |
| exec_substrings); | exec_substrings); |
| Line 647 String& String::change_case(Pool& pool, | Line 588 String& String::change_case(Pool& pool, |
| b=0; | b=0; |
| break; | break; |
| default: | default: |
| throw Exception(0, 0, | throw Exception(0, |
| this, | this, |
| "unknown change case kind #%d", | "unknown change case kind #%d", |
| static_cast<int>(kind)); // never | static_cast<int>(kind)); // never |
| Line 655 String& String::change_case(Pool& pool, | Line 596 String& String::change_case(Pool& pool, |
| break; // never | break; // never |
| } | } |
| const Chunk *chunk=&head; | STRING_FOREACH_ROW( |
| do { | char *new_cstr=(char *)pool.malloc(row->item.size, 12); |
| const Chunk::Row *row=chunk->rows; | char *dest=new_cstr; |
| for(Chunk::count_type i=0; i<chunk->count; i++, row++) { | const char *src=row->item.ptr; |
| if(row==append_here) | for(int size=row->item.size; size--; src++) { |
| goto break2; | unsigned char c=a[(unsigned char)*src]; |
| if(b) | |
| char *new_cstr=(char *)pool.malloc(row->item.size, 12); | c=b[c]; |
| char *dest=new_cstr; | |
| const char *src=row->item.ptr; | |
| for(int size=row->item.size; size--; src++) { | |
| unsigned char c=a[(unsigned char)*src]; | |
| if(b) | |
| c=b[c]; | |
| *dest++=(char)c; | *dest++=(char)c; |
| } | |
| result.APPEND(new_cstr, row->item.size, | |
| row->item.lang, | |
| row->item.origin.file, row->item.origin.line); | |
| } | } |
| chunk=row->link; | |
| } while(chunk); | result.APPEND(new_cstr, row->item.size, |
| break2: | row->item.lang, |
| row->item.origin.file, row->item.origin.line); | |
| ); | |
| return result; | return result; |
| } | } |
| void String::join_chain(Pool& pool, | void String::join_chain(Pool& pool, |
| uint& ai, const Chunk*& achunk, const Chunk::Row*& arow, | const Chunk*& achunk, const Chunk::Row*& arow, uint& acountdown, |
| uchar& joined_lang, const char *& joined_ptr, size_t& joined_size) const { | uchar& joined_lang, const char *& joined_ptr, size_t& joined_size) const { |
| joined_lang=arow->item.lang; | joined_lang=arow->item.lang; |
| // calc size | // calc size |
| joined_size=0; | joined_size=0; |
| { | { |
| uint start_i=ai; | const Chunk* chunk=achunk; |
| const Chunk::Row *start_row=arow; | const Chunk::Row* row=arow; |
| const Chunk *chunk=achunk; | uint countdown=acountdown; |
| do { | STRING_PREPARED_FOREACH_ROW(*this, |
| const Chunk::Row *row=start_row; | if(row->item.lang==joined_lang) |
| for(uint i=start_i; i<chunk->count; i++, row++) { | joined_size+=row->item.size; |
| if(row==append_here) | else |
| goto break21; | |
| if(row->item.lang==joined_lang) | |
| joined_size+=row->item.size; | |
| else | |
| goto break21; | |
| } | |
| if(chunk=row->link) { | |
| start_i=0; | |
| start_row=chunk->rows; | |
| } else | |
| break; | break; |
| } while(true); | ); |
| break21:; | |
| } | } |
| // if one row, return simply itself | // if one row, return simply itself |
| if(joined_size==arow->item.size) { | if(joined_size==arow->item.size) { |
| joined_ptr=arow->item.ptr; | joined_ptr=arow->item.ptr; |
| ai++; arow++; | |
| if(ai==achunk->count) { | |
| if(achunk=arow->link) { | |
| ai=0; | |
| arow=achunk->rows; | |
| } | |
| } | |
| } else { | } else { |
| // join adjacent rows | // join adjacent rows |
| char *ptr=(char *)pool.malloc(joined_size,13); | char *ptr=(char *)pool.malloc(joined_size,13); |
| joined_ptr=ptr; | joined_ptr=ptr; |
| uint start_i=ai; | |
| const Chunk::Row *start_row=arow; | const Chunk* chunk=achunk; |
| const Chunk *chunk=achunk; | const Chunk::Row* row=arow; |
| uint i; | uint countdown=acountdown; |
| const Chunk::Row *row; | STRING_PREPARED_FOREACH_ROW(*this, |
| do { | if(row->item.lang==joined_lang) { |
| row=start_row; | memcpy(ptr, row->item.ptr, row->item.size); ptr+=row->item.size; |
| for(i=start_i; i<chunk->count; i++, row++) { | |
| if(row==append_here) | |
| goto break22; | |
| if(row->item.lang==joined_lang) { | |
| memcpy(ptr, row->item.ptr, row->item.size); | |
| ptr+=row->item.size; | |
| } else | |
| goto break22; | |
| } | |
| if(chunk=row->link) { | |
| start_i=0; | |
| start_row=chunk->rows; | |
| } else | } else |
| break; | break; // before non-ours |
| } while(true); | ); |
| break22:; | |
| // return joined rows | // set pointers after joined piece |
| ai=i; | achunk=chunk; arow=row; acountdown=countdown; |
| arow=row; | // & one step back, see String::reconstruct |
| achunk=chunk; | --arow; ++acountdown; |
| } | } |
| } | } |
| String& String::reconstruct(Pool& pool) const { | /// @test if in some piece were found no dict words, append it, not it's duplicate |
| //_asm int 3; | String& String::replace(Pool& pool, Dictionary& dict) const { |
| // return reconstruct(pool).replace_in_reconstructed(pool, dict); | |
| String& result=*new(pool) String(pool); | String& result=*new(pool) String(pool); |
| const Chunk *chunk=&head; | |
| const Chunk::Row *row=chunk->rows; | |
| for(uint i=0; i<chunk->count; ) { | |
| if(row==append_here) | |
| break; | |
| STRING_FOREACH_ROW( | |
| uchar joined_lang; | uchar joined_lang; |
| const char *joined_ptr; | const char *joined_ptr; |
| size_t joined_size; | size_t joined_size; |
| #ifndef NO_STRING_ORIGIN | IFNDEF_NO_STRING_ORIGIN( |
| const char *joined_origin_file=row->item.origin.file; | const char *joined_origin_file=row->item.origin.file; |
| const size_t joined_origin_line=row->item.origin.line; | const size_t joined_origin_line=row->item.origin.line; |
| #endif | ); |
| join_chain(pool, i, chunk, row, | join_chain(pool, chunk, row, countdown, |
| joined_lang, joined_ptr, joined_size); | joined_lang, joined_ptr, joined_size); |
| result.APPEND(joined_ptr, joined_size, joined_lang, | const char *src=joined_ptr; |
| joined_origin_file, joined_origin_line); | size_t src_size=joined_size; |
| if(!chunk) | |
| break; | |
| } | |
| return result; | |
| }; | |
| String& String::replace_in_reconstructed(Pool& pool, Dictionary& dict) const { | |
| //_asm int 3; | |
| String& result=*new(pool) String(pool); | |
| STRING_FOREACH_ROW( | |
| const char *src=row->item.ptr; | |
| size_t src_size=row->item.size; | |
| char *new_cstr=(char *)pool.malloc((size_t)ceil(src_size*dict.max_ratio()), 14); | char *new_cstr=(char *)pool.malloc((size_t)ceil(src_size*dict.max_ratio()), 14); |
| char *dest=new_cstr; | char *dest=new_cstr; |
| while(src_size) { | while(src_size) { |
| Line 814 String& String::replace_in_reconstructed | Line 696 String& String::replace_in_reconstructed |
| } | } |
| } | } |
| result.APPEND(new_cstr, dest-new_cstr, | result.APPEND(new_cstr, dest-new_cstr, joined_lang, |
| row->item.lang, | joined_origin_file, joined_origin_line); |
| row->item.origin.file, row->item.origin.line); | |
| ); | ); |
| break2: | |
| return result; | return result; |
| } | } |
| String& String::replace(Pool& pool, Dictionary& dict) const { | |
| return reconstruct(pool).replace_in_reconstructed(pool, dict); | |
| } | |
| double String::as_double() const { | double String::as_double() const { |
| double result; | double result; |
| const char *cstr; | const char *cstr; |
| char buf[MAX_NUMBER]; | char buf[MAX_NUMBER]; |
| if(head.rows+1==append_here) { | if(head.chunk.rows+1==append_here) { |
| int size=min(head.rows[0].item.size, MAX_NUMBER-1); | int size=min(head.chunk.rows[0].item.size, MAX_NUMBER-1); |
| memcpy(buf, head.rows[0].item.ptr, size); | memcpy(buf, head.chunk.rows[0].item.ptr, size); |
| buf[size]=0; | buf[size]=0; |
| cstr=buf; | cstr=buf; |
| } else | } else |
| Line 848 double String::as_double() const { | Line 724 double String::as_double() const { |
| result=(double)strtod(cstr, &error_pos); | result=(double)strtod(cstr, &error_pos); |
| if(*error_pos/*not EOS*/) | if(*error_pos/*not EOS*/) |
| throw Exception(0, 0, | throw Exception("number.format", |
| this, | this, |
| "invalid number (double)"); | "invalid number (double)"); |
| Line 858 int String::as_int() const { | Line 734 int String::as_int() const { |
| int result; | int result; |
| const char *cstr; | const char *cstr; |
| char buf[MAX_NUMBER]; | char buf[MAX_NUMBER]; |
| if(head.rows+1==append_here) { | if(head.chunk.rows+1==append_here) { |
| int size=min(head.rows[0].item.size, MAX_NUMBER-1); | int size=min(head.chunk.rows[0].item.size, MAX_NUMBER-1); |
| memcpy(buf, head.rows[0].item.ptr, size); | memcpy(buf, head.chunk.rows[0].item.ptr, size); |
| buf[size]=0; | buf[size]=0; |
| cstr=buf; | cstr=buf; |
| } else | } else |
| Line 876 int String::as_int() const { | Line 752 int String::as_int() const { |
| result=(int)strtol(cstr, &error_pos, 0); | result=(int)strtol(cstr, &error_pos, 0); |
| if(*error_pos/*not EOS*/) | if(*error_pos/*not EOS*/) |
| throw Exception(0, 0, | throw Exception("number.format", |
| this, | this, |
| "invalid number (int)"); | "invalid number (int)"); |
| Line 916 void String::serialize(size_t prolog_siz | Line 792 void String::serialize(size_t prolog_siz |
| memcpy(cur, row->item.ptr, row->item.size); | memcpy(cur, row->item.ptr, row->item.size); |
| cur+=row->item.size; | cur+=row->item.size; |
| ); | ); |
| break2: | |
| ; | |
| } | } |
| void String::deserialize(size_t prolog_size, void *buf, size_t buf_size, const char *file) { | bool String::deserialize(size_t prolog_size, void *buf, size_t buf_size, const char *file) { |
| if(buf_size<=prolog_size) | if(buf_size<=prolog_size) |
| return; | return false; |
| char *cur=(char *)buf+prolog_size; | char *cur=(char *)buf+prolog_size; |
| buf_size-=prolog_size; | buf_size-=prolog_size; |
| while(buf_size) { | while(buf_size) { |
| uchar lang=*(uchar *)(cur); | if(sizeof(uchar)+sizeof(ushort)>buf_size) // lang+size |
| return false; | |
| uchar lang=*(uchar *)(cur); | |
| ushort size=uchars2ushort( | ushort size=uchars2ushort( |
| *(uchar*)(cur+sizeof(uchar)*1), | *(uchar*)(cur+sizeof(uchar)*1), |
| *(uchar*)(cur+sizeof(uchar)*2) | *(uchar*)(cur+sizeof(uchar)*2) |
| ); | ); |
| size_t piece_size=sizeof(uchar)+sizeof(ushort)+size; | |
| if(piece_size>buf_size) // buffer overrun, can be on incomplete cache files | |
| return false; | |
| const char *ptr=(const char*)(cur+sizeof(uchar)*3); | const char *ptr=(const char*)(cur+sizeof(uchar)*3); |
| APPEND(ptr, size, lang, file, 0); | APPEND(ptr, size, lang, file, 0); |
| size_t piece_size=sizeof(uchar)+sizeof(ushort)+size; | |
| cur+=piece_size; | cur+=piece_size; |
| buf_size-=piece_size; | buf_size-=piece_size; |
| } | } |
| return true; | |
| } | } |