|
|
| version 1.8, 2001/01/27 12:04:53 | version 1.50, 2001/03/24 19:12:20 |
|---|---|
| Line 1 | Line 1 |
| /* | /** @file |
| $Id$ | Parser: string class. @see untaint.C. |
| Copyright (c) 2001 ArtLebedev Group (http://www.artlebedev.com) | |
| Author: Alexander Petrosyan <paf@design.ru> (http://design.ru/paf) | |
| $Id$ | |
| */ | */ |
| #include <string.h> | #include "pa_config_includes.h" |
| #include "pa_pool.h" | #include "pa_pool.h" |
| #include "pa_string.h" | |
| #include "pa_hash.h" | #include "pa_hash.h" |
| #include "pa_exception.h" | |
| void *String::operator new(size_t size, Pool *apool) { | // String |
| return apool->malloc(size); | |
| } | |
| void String::construct(Pool *apool) { | String::String(Pool& apool, const char *src, bool tainted) : |
| pool=apool; | Pooled(apool) { |
| head.count=curr_chunk_rows=CR_PREALLOCATED_COUNT; | last_chunk=&head; |
| head.count=CR_PREALLOCATED_COUNT; | |
| append_here=head.rows; | append_here=head.rows; |
| head.preallocated_link=0; | head.preallocated_link=0; |
| link_row=&head.rows[curr_chunk_rows]; | link_row=&head.rows[head.count]; |
| fused_rows=fsize=0; | fused_rows=fsize=0; |
| if(src) | |
| if(tainted) | |
| APPEND_TAINTED(src, 0, 0, 0); | |
| else | |
| APPEND(src, 0, 0, 0); | |
| } | } |
| void String::expand() { | void String::expand() { |
| curr_chunk_rows+=curr_chunk_rows*CR_GROW_PERCENT/100; | int new_chunk_count=last_chunk->count+last_chunk->count*CR_GROW_PERCENT/100; |
| Chunk *chunk=static_cast<Chunk *>( | last_chunk=static_cast<Chunk *>( |
| pool->malloc(sizeof(Chunk::Row)*curr_chunk_rows+sizeof(Chunk *))); | malloc(sizeof(int)+sizeof(Chunk::Row)*new_chunk_count+sizeof(Chunk *))); |
| chunk->count=curr_chunk_rows; | last_chunk->count=new_chunk_count; |
| link_row->link=chunk; | link_row->link=last_chunk; |
| append_here=chunk->rows; | append_here=last_chunk->rows; |
| link_row=&chunk->rows[curr_chunk_rows]; | link_row=&last_chunk->rows[last_chunk->count]; |
| link_row->link=0; | link_row->link=0; |
| } | } |
| String::String(String& src) { | String::String(const String& src) : Pooled(src.pool()) { |
| pool=src.pool; | |
| head.count=CR_PREALLOCATED_COUNT; | head.count=CR_PREALLOCATED_COUNT; |
| int src_used_rows=src.used_rows(); | int src_used_rows=src.fused_rows; |
| if(src_used_rows<=head.count) { | if(src_used_rows<=head.count) { |
| // new rows fit into preallocated area | // all new rows fit into preallocated area |
| curr_chunk_rows=head.count; | int curr_chunk_rows=head.count; |
| memcpy(head.rows, src.head.rows, sizeof(Chunk::Row)*src_used_rows); | memcpy(head.rows, src.head.rows, sizeof(Chunk::Row)*src_used_rows); |
| append_here=&head.rows[src_used_rows]; | append_here=&head.rows[src_used_rows]; |
| head.preallocated_link=0; | |
| link_row=&head.rows[curr_chunk_rows]; | link_row=&head.rows[curr_chunk_rows]; |
| } else { | } else { |
| // warning: | // warning: |
| // heavy relies on the fact | // heavily relies on the fact |
| // that preallocated area is the same for all strings | // "preallocated area is the same for all strings" |
| // | // |
| // info: | // info: |
| // allocating only enough mem to fit src string rows | // allocating only enough mem to fit src string rows |
| Line 56 String::String(String& src) { | Line 67 String::String(String& src) { |
| // preallocated chunk src to constructing head | // preallocated chunk src to constructing head |
| memcpy(head.rows, src.head.rows, sizeof(Chunk::Row)*head.count); | memcpy(head.rows, src.head.rows, sizeof(Chunk::Row)*head.count); |
| // remaining rows into new_chunk | // remaining rows into new_chunk |
| curr_chunk_rows=src_used_rows-head.count; | int curr_chunk_rows=src_used_rows-head.count; |
| Chunk *new_chunk=static_cast<Chunk *>( | Chunk *new_chunk=static_cast<Chunk *>( |
| pool->malloc(sizeof(Chunk::Row)*curr_chunk_rows+sizeof(Chunk *))); | malloc(sizeof(int)+sizeof(Chunk::Row)*curr_chunk_rows+sizeof(Chunk *))); |
| new_chunk->count=curr_chunk_rows; | new_chunk->count=curr_chunk_rows; |
| head.preallocated_link=new_chunk; | head.preallocated_link=new_chunk; |
| append_here=link_row=&new_chunk->rows[curr_chunk_rows]; | append_here=link_row=&new_chunk->rows[new_chunk->count]; |
| Chunk *old_chunk=src.head.preallocated_link; | Chunk *old_chunk=src.head.preallocated_link; |
| Chunk::Row *new_rows=new_chunk->rows; | Chunk::Row *new_rows=new_chunk->rows; |
| int rows_left_to_copy=curr_chunk_rows; | int rows_left_to_copy=new_chunk->count; |
| while(true) { | while(true) { |
| int old_count=old_chunk->count; | int old_count=old_chunk->count; |
| Chunk *next_chunk=old_chunk->rows[old_count].link; | Chunk *next_chunk=old_chunk->rows[old_count].link; |
| Line 90 String::String(String& src) { | Line 101 String::String(String& src) { |
| fsize=src.fsize; | fsize=src.fsize; |
| } | } |
| String& String::operator += (char *src) { | String& String::append(const String& src, Untaint_lang lang, bool forced) { |
| if(chunk_is_full()) | int src_used_rows=src.fused_rows; |
| expand(); | int dst_free_rows=link_row-append_here; |
| if(src_used_rows<=dst_free_rows) { | |
| // all new rows fit into last chunk | |
| memcpy(append_here, src.head.rows, sizeof(Chunk::Row)*src_used_rows); | |
| set_lang(append_here, lang, forced, src_used_rows); | |
| append_here+=src_used_rows; | |
| } else { | |
| // not all new rows fit into last chunk: shrinking it to used part, | |
| int used_rows=last_chunk->count-dst_free_rows; | |
| //int *countp=append_here | |
| link_row=&last_chunk->rows[last_chunk->count=used_rows]; | |
| // allocating only enough mem to fit src string rows | |
| // next append would allocate a new chunk | |
| last_chunk=static_cast<Chunk *>( | |
| malloc(sizeof(int)+sizeof(Chunk::Row)*src_used_rows+sizeof(Chunk *))); | |
| last_chunk->count=src_used_rows; | |
| link_row->link=last_chunk; | |
| append_here=link_row=&last_chunk->rows[src_used_rows]; | |
| const Chunk *old_chunk=&src.head; | |
| Chunk::Row *new_rows=last_chunk->rows; | |
| int rows_left_to_copy=src_used_rows; | |
| while(true) { | |
| int old_count=old_chunk->count; | |
| Chunk *next_chunk=old_chunk->rows[old_count].link; | |
| if(next_chunk) { | |
| // not last source chunk | |
| // taking it all | |
| memcpy(new_rows, old_chunk->rows, sizeof(Chunk::Row)*old_count); | |
| set_lang(new_rows, lang, forced, old_count); | |
| new_rows+=old_count; | |
| rows_left_to_copy-=old_count; | |
| append_here->item.ptr=src; | old_chunk=next_chunk; |
| fsize+=append_here->item.size=strlen(src); | } else { |
| append_here++; fused_rows++; | // the last source chunk |
| // taking only those rows of chunk that _left_to_copy | |
| memcpy(new_rows, old_chunk->rows, sizeof(Chunk::Row)*rows_left_to_copy); | |
| set_lang(new_rows, lang, forced, rows_left_to_copy); | |
| break; | |
| } | |
| } | |
| link_row->link=0; | |
| } | |
| fused_rows+=src_used_rows; | |
| fsize+=src.fsize; | |
| return *this; | return *this; |
| } | } |
| void String::set_lang(Chunk::Row *row, Untaint_lang lang, bool forced, size_t size) { | |
| if(lang==UL_PASS_APPENDED) | |
| return; | |
| while(size--) { | |
| Untaint_lang& item_lang=(row++)->item.lang; | |
| if(item_lang==UL_YES || forced) // tainted? need untaint language assignment | |
| item_lang=lang; // assign untaint language | |
| } | |
| } | |
| char *String::c_str() { | /*void String::change_lang(Untaint_lang lang) { |
| char *result=static_cast<char *>(pool->malloc(size()+1)); | |
| char *copy_here=result; | |
| Chunk *chunk=&head; | Chunk *chunk=&head; |
| do { | do { |
| Chunk::Row *row=chunk->rows; | Chunk::Row *row=chunk->rows; |
| Line 112 char *String::c_str() { | Line 172 char *String::c_str() { |
| if(row==append_here) | if(row==append_here) |
| goto break2; | goto break2; |
| memcpy(copy_here, row->item.ptr, row->item.size); | row->item.lang=lang; |
| copy_here+=row->item.size; | |
| row++; | row++; |
| } | } |
| chunk=row->link; | chunk=row->link; |
| } while(chunk); | } while(chunk); |
| break2: | break2: |
| *copy_here=0; | return; |
| return result; | } |
| */ | |
| String& String::real_append(STRING_APPEND_PARAMS) { | |
| if(!src) | |
| return *this; | |
| if(!size) | |
| size=strlen(src); | |
| if(!size) | |
| return *this; | |
| if(chunk_is_full()) | |
| expand(); | |
| append_here->item.ptr=src; | |
| fsize+=append_here->item.size=size; | |
| append_here->item.lang=tainted?UL_YES:UL_NO; | |
| #ifndef NO_STRING_ORIGIN | |
| append_here->item.origin.file=file; | |
| append_here->item.origin.line=line; | |
| #endif | |
| append_here++; fused_rows++; | |
| return *this; | |
| } | } |
| uint String::hash_code() { | uint String::hash_code() const { |
| uint result=0; | uint result=0; |
| Chunk *chunk=&head; | const Chunk *chunk=&head; |
| do { | do { |
| Chunk::Row *row=chunk->rows; | const Chunk::Row *row=chunk->rows; |
| for(int i=0; i<chunk->count; i++) { | for(int i=0; i<chunk->count; i++) { |
| if(row==append_here) | if(row==append_here) |
| goto break2; | goto break2; |
| Line 142 break2: | Line 223 break2: |
| return result; | return result; |
| } | } |
| bool String::operator == (String& src) { | int String::cmp(const String& src) const { |
| if(size() != src.size()) | const Chunk *a_chunk=&head; |
| return false; | const Chunk *b_chunk=&src.head; |
| const Chunk::Row *a_row=a_chunk->rows; | |
| // FIX: 0 approach! | const Chunk::Row *b_row=b_chunk->rows; |
| // use: in Hash it's "this" that has less chunks | int a_offset=0; |
| if(head.rows[0].item.size==src.head.rows[0].item.size) | int b_offset=0; |
| if(memcmp(head.rows[0].item.ptr, src.head.rows[0].item.ptr, head.rows[0].item.size)==0) | Chunk::Row *a_end=append_here; |
| return true; | Chunk::Row *b_end=src.append_here; |
| return false; | int a_countdown=a_chunk->count; |
| int b_countdown=b_chunk->count; | |
| bool a_break=false; | |
| bool b_break=false; | |
| int result; | |
| while(true) { | |
| a_break=a_row==a_end; | |
| b_break=b_row==b_end; | |
| if(a_break || b_break) | |
| break; | |
| int size_diff= | |
| (a_row->item.size-a_offset)- | |
| (b_row->item.size-b_offset); | |
| if(size_diff==0) { // a has same size as b | |
| result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, a_row->item.size-a_offset); | |
| if(result) | |
| return result; | |
| a_row++; a_countdown--; a_offset=0; | |
| b_row++; b_countdown--; b_offset=0; | |
| } else if (size_diff>0) { // a longer | |
| result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, b_row->item.size-b_offset); | |
| if(result) | |
| return result; | |
| a_offset+=b_row->item.size-b_offset; | |
| b_row++; b_countdown--; b_offset=0; | |
| } else { // b longer | |
| result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, a_row->item.size-a_offset); | |
| if(result) | |
| return result; | |
| b_offset+=a_row->item.size-a_offset; | |
| a_row++; a_countdown--; a_offset=0; | |
| } | |
| if(!a_countdown) { | |
| a_chunk=a_row->link; | |
| a_row=a_chunk->rows; | |
| a_countdown=a_chunk->count; | |
| } | |
| if(!b_countdown) { | |
| b_chunk=b_row->link; | |
| b_row=b_chunk->rows; | |
| b_countdown=b_chunk->count; | |
| } | |
| } | |
| if(a_break==b_break) // ended simultaneously | |
| result=0; | |
| else if(a_break) // first bytes equal, but a ended before b | |
| result=-1; | |
| else | |
| result=+1; | |
| return result; | |
| } | |
| int String::cmp(const char* b_ptr, int& partial, size_t src_size) const { | |
| size_t b_size=src_size?src_size:b_ptr?strlen(b_ptr):0; | |
| partial=0; | |
| const Chunk *a_chunk=&head; | |
| const Chunk::Row *a_row=a_chunk->rows; | |
| int a_offset=0; | |
| int b_offset=0; | |
| Chunk::Row *a_end=append_here; | |
| int a_countdown=a_chunk->count; | |
| bool a_break=false; | |
| bool b_break=false; | |
| while(true) { | |
| int size_diff= | |
| (a_row->item.size-a_offset)- | |
| (b_size-b_offset); | |
| if(size_diff==0) { // a has same size as b | |
| if(int result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, a_row->item.size-a_offset)!=0) | |
| return result; | |
| a_row++; a_countdown--; a_offset=0; | |
| b_break=true; | |
| } else if (size_diff>0) { // a longer | |
| if(int result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, b_size-b_offset)!=0) | |
| return result; | |
| a_offset+=b_size-b_offset; | |
| b_break=true; | |
| } else { // b longer | |
| if(int result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, a_row->item.size-a_offset)!=0) | |
| return result; | |
| b_offset+=a_row->item.size-a_offset; | |
| a_row++; a_countdown--; a_offset=0; | |
| } | |
| a_break=a_row==a_end; | |
| if(a_break || b_break) | |
| break; | |
| if(!a_countdown) { | |
| a_chunk=a_row->link; | |
| a_row=a_chunk->rows; | |
| a_countdown=a_chunk->count; | |
| } | |
| } | |
| if(a_break==b_break) // ended simultaneously | |
| return 0; | |
| else if(a_break) // first bytes equal, but a ended before b | |
| return partial=-1; | |
| else | |
| return partial=+1; | |
| } | |
| #ifndef NO_STRING_ORIGIN | |
| const Origin& String::origin() const { | |
| if(!fused_rows) | |
| THROW(0, 0, | |
| 0, | |
| "String::origin() of empty string called"); | |
| // determining origin by last appended piece | |
| // because first one frequently constant. | |
| // ex: ^load[/file] "document_root" + "/file" | |
| return append_here[-1].item.origin; | |
| } | } |
| #endif |