Annotation of parser3/src/main/pa_string.C, revision 1.40
1.4 paf 1: /*
1.36 paf 2: Parser
3: Copyright (c) 2001 ArtLebedev Group (http://www.artlebedev.com)
1.37 paf 4: Author: Alexander Petrosyan <paf@design.ru> (http://design.ru/paf)
1.36 paf 5:
1.40 ! paf 6: $Id: pa_string.C,v 1.39 2001/03/13 14:28:51 paf Exp $
1.4 paf 7: */
8:
1.1 paf 9: #include <string.h>
10:
1.13 paf 11: #include "pa_pool.h"
1.12 paf 12: #include "pa_string.h"
1.5 paf 13: #include "pa_hash.h"
1.22 paf 14: #include "pa_exception.h"
1.1 paf 15:
1.18 paf 16: // String
17:
1.15 paf 18: String::String(Pool& apool) :
1.17 paf 19: Pooled(apool) {
1.28 paf 20: last_chunk=&head;
21: head.count=CR_PREALLOCATED_COUNT;
1.5 paf 22: append_here=head.rows;
1.2 paf 23: head.preallocated_link=0;
1.28 paf 24: link_row=&head.rows[head.count];
1.8 paf 25: fused_rows=fsize=0;
1.1 paf 26: }
27:
28: void String::expand() {
1.28 paf 29: int new_chunk_count=last_chunk->count+last_chunk->count*CR_GROW_PERCENT/100;
30: last_chunk=static_cast<Chunk *>(
1.30 paf 31: malloc(sizeof(int)+sizeof(Chunk::Row)*new_chunk_count+sizeof(Chunk *)));
1.28 paf 32: last_chunk->count=new_chunk_count;
33: link_row->link=last_chunk;
34: append_here=last_chunk->rows;
35: link_row=&last_chunk->rows[last_chunk->count];
1.8 paf 36: link_row->link=0;
1.1 paf 37: }
38:
1.40 ! paf 39: String::String(const String& src) : Pooled(src.pool()) {
1.8 paf 40: head.count=CR_PREALLOCATED_COUNT;
41:
42: int src_used_rows=src.used_rows();
43: if(src_used_rows<=head.count) {
1.10 paf 44: // all new rows fit into preallocated area
1.28 paf 45: int curr_chunk_rows=head.count;
1.8 paf 46: memcpy(head.rows, src.head.rows, sizeof(Chunk::Row)*src_used_rows);
47: append_here=&head.rows[src_used_rows];
48: link_row=&head.rows[curr_chunk_rows];
49: } else {
50: // warning:
1.10 paf 51: // heavily relies on the fact
52: // "preallocated area is the same for all strings"
1.8 paf 53: //
54: // info:
55: // allocating only enough mem to fit src string rows
56: // next append would allocate a new chunk
57: //
58: // new rows don't fit into preallocated area: splitting into two chunks
59: // preallocated chunk src to constructing head
60: memcpy(head.rows, src.head.rows, sizeof(Chunk::Row)*head.count);
61: // remaining rows into new_chunk
1.28 paf 62: int curr_chunk_rows=src_used_rows-head.count;
1.8 paf 63: Chunk *new_chunk=static_cast<Chunk *>(
1.30 paf 64: malloc(sizeof(int)+sizeof(Chunk::Row)*curr_chunk_rows+sizeof(Chunk *)));
1.8 paf 65: new_chunk->count=curr_chunk_rows;
66: head.preallocated_link=new_chunk;
1.28 paf 67: append_here=link_row=&new_chunk->rows[new_chunk->count];
1.8 paf 68:
69: Chunk *old_chunk=src.head.preallocated_link;
70: Chunk::Row *new_rows=new_chunk->rows;
1.28 paf 71: int rows_left_to_copy=new_chunk->count;
1.8 paf 72: while(true) {
73: int old_count=old_chunk->count;
74: Chunk *next_chunk=old_chunk->rows[old_count].link;
75: if(next_chunk) {
76: // not last source chunk
77: // taking it all
78: memcpy(new_rows, old_chunk->rows, sizeof(Chunk::Row)*old_count);
79: new_rows+=old_count;
80: rows_left_to_copy-=old_count;
81:
82: old_chunk=next_chunk;
83: } else {
84: // the last source chunk
85: // taking only those rows of chunk that _left_to_copy
86: memcpy(new_rows, old_chunk->rows, sizeof(Chunk::Row)*rows_left_to_copy);
87: break;
88: }
89: }
1.5 paf 90: }
1.8 paf 91: link_row->link=0;
92: fused_rows=src_used_rows;
93: fsize=src.fsize;
1.5 paf 94: }
1.28 paf 95:
1.34 paf 96: String& String::append(const String& src, Untaint_lang lang) {
1.28 paf 97: int src_used_rows=src.used_rows();
98: int dst_free_rows=link_row-append_here;
99:
100: if(src_used_rows<=dst_free_rows) {
101: // all new rows fit into last chunk
102: memcpy(append_here, src.head.rows, sizeof(Chunk::Row)*src_used_rows);
1.34 paf 103: set_lang(append_here, lang, src_used_rows);
1.28 paf 104: append_here+=src_used_rows;
105: } else {
1.31 paf 106: // not all new rows fit into last chunk: shrinking it to used part,
1.28 paf 107: int used_rows=last_chunk->count-dst_free_rows;
108: //int *countp=append_here
109: link_row=&last_chunk->rows[last_chunk->count=used_rows];
110: // allocating only enough mem to fit src string rows
111: // next append would allocate a new chunk
112: last_chunk=static_cast<Chunk *>(
1.30 paf 113: malloc(sizeof(int)+sizeof(Chunk::Row)*src_used_rows+sizeof(Chunk *)));
1.28 paf 114: last_chunk->count=src_used_rows;
115: link_row->link=last_chunk;
116: append_here=link_row=&last_chunk->rows[src_used_rows];
117:
1.31 paf 118: const Chunk *old_chunk=&src.head;
1.28 paf 119: Chunk::Row *new_rows=last_chunk->rows;
120: int rows_left_to_copy=src_used_rows;
121: while(true) {
122: int old_count=old_chunk->count;
123: Chunk *next_chunk=old_chunk->rows[old_count].link;
124: if(next_chunk) {
125: // not last source chunk
126: // taking it all
127: memcpy(new_rows, old_chunk->rows, sizeof(Chunk::Row)*old_count);
1.34 paf 128: set_lang(new_rows, lang, old_count);
1.28 paf 129: new_rows+=old_count;
130: rows_left_to_copy-=old_count;
131:
132: old_chunk=next_chunk;
133: } else {
134: // the last source chunk
135: // taking only those rows of chunk that _left_to_copy
136: memcpy(new_rows, old_chunk->rows, sizeof(Chunk::Row)*rows_left_to_copy);
1.34 paf 137: set_lang(new_rows, lang, rows_left_to_copy);
1.28 paf 138: break;
139: }
140: }
1.29 paf 141: link_row->link=0;
1.28 paf 142: }
143: fused_rows+=src_used_rows;
144: fsize+=src.fsize;
145:
146: return *this;
1.23 paf 147: }
1.34 paf 148: void String::set_lang(Chunk::Row *row, Untaint_lang lang, size_t size) {
1.40 ! paf 149: if(lang==PASS_APPEND)
1.34 paf 150: return;
151:
152: while(size--) {
153: Untaint_lang& item_lang=(row++)->item.lang;
154: if(item_lang==YES) // tainted? need untaint language assignment
155: item_lang=lang; // assign untaint language
156: }
1.40 ! paf 157: }
! 158:
! 159: void String::change_lang(Untaint_lang lang) {
! 160: Chunk *chunk=&head;
! 161: do {
! 162: Chunk::Row *row=chunk->rows;
! 163: for(int i=0; i<chunk->count; i++) {
! 164: if(row==append_here)
! 165: goto break2;
! 166:
! 167: row->item.lang=lang;
! 168: row++;
! 169: }
! 170: chunk=row->link;
! 171: } while(chunk);
! 172: break2:
! 173: return;
1.34 paf 174: }
1.5 paf 175:
1.13 paf 176: String& String::real_append(STRING_APPEND_PARAMS) {
1.9 paf 177: if(!src)
178: return *this;
1.26 paf 179: if(!size)
180: size=strlen(src);
181: if(!size)
1.9 paf 182: return *this;
183:
1.1 paf 184: if(chunk_is_full())
185: expand();
186:
187: append_here->item.ptr=src;
1.26 paf 188: fsize+=append_here->item.size=size;
1.38 paf 189: append_here->item.lang=tainted?YES:NO;
1.13 paf 190: #ifndef NO_STRING_ORIGIN
1.14 paf 191: append_here->item.origin.file=file;
192: append_here->item.origin.line=line;
1.13 paf 193: #endif
1.8 paf 194: append_here++; fused_rows++;
1.1 paf 195:
196: return *this;
197: }
198:
1.16 paf 199: uint String::hash_code() const {
1.7 paf 200: uint result=0;
1.5 paf 201:
1.16 paf 202: const Chunk *chunk=&head;
1.5 paf 203: do {
1.16 paf 204: const Chunk::Row *row=chunk->rows;
1.5 paf 205: for(int i=0; i<chunk->count; i++) {
206: if(row==append_here)
207: goto break2;
208:
1.6 paf 209: result=Hash::generic_code(result, row->item.ptr, row->item.size);
1.5 paf 210: row++;
211: }
212: chunk=row->link;
213: } while(chunk);
214: break2:
215: return result;
216: }
217:
1.32 paf 218: int String::cmp(const String& src) const {
1.16 paf 219: const Chunk *a_chunk=&head;
220: const Chunk *b_chunk=&src.head;
221: const Chunk::Row *a_row=a_chunk->rows;
222: const Chunk::Row *b_row=b_chunk->rows;
1.9 paf 223: int a_offset=0;
224: int b_offset=0;
225: Chunk::Row *a_end=append_here;
226: Chunk::Row *b_end=src.append_here;
1.11 paf 227: int a_countdown=a_chunk->count;
228: int b_countdown=b_chunk->count;
1.9 paf 229: bool a_break=false;
230: bool b_break=false;
1.32 paf 231: int result;
1.9 paf 232: while(true) {
1.33 paf 233: a_break=a_row==a_end;
234: b_break=b_row==b_end;
235: if(a_break || b_break)
236: break;
237:
1.9 paf 238: int size_diff=
239: (a_row->item.size-a_offset)-
240: (b_row->item.size-b_offset);
241:
242: if(size_diff==0) { // a has same size as b
1.32 paf 243: result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, a_row->item.size-a_offset);
244: if(result)
245: return result;
1.11 paf 246: a_row++; a_countdown--; a_offset=0;
247: b_row++; b_countdown--; b_offset=0;
1.9 paf 248: } else if (size_diff>0) { // a longer
1.32 paf 249: result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, b_row->item.size-b_offset);
250: if(result)
251: return result;
1.9 paf 252: a_offset+=b_row->item.size-b_offset;
1.11 paf 253: b_row++; b_countdown--; b_offset=0;
1.9 paf 254: } else { // b longer
1.32 paf 255: result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, a_row->item.size-a_offset);
256: if(result)
257: return result;
1.9 paf 258: b_offset+=a_row->item.size-a_offset;
1.11 paf 259: a_row++; a_countdown--; a_offset=0;
1.9 paf 260: }
261:
1.11 paf 262: if(!a_countdown) {
1.9 paf 263: a_chunk=a_row->link;
264: a_row=a_chunk->rows;
1.11 paf 265: a_countdown=a_chunk->count;
1.9 paf 266: }
1.11 paf 267: if(!b_countdown) {
1.9 paf 268: b_chunk=b_row->link;
269: b_row=b_chunk->rows;
1.11 paf 270: b_countdown=b_chunk->count;
1.27 paf 271: }
272: }
1.32 paf 273: if(a_break==b_break) // ended simultaneously
274: result=0;
275: else if(a_break) // first bytes equal, but a ended before b
276: result=-1;
277: else
278: result=+1;
279: return result;
1.27 paf 280: }
281:
1.39 paf 282: bool String::operator == (const char* b_ptr) const {
1.27 paf 283: size_t b_size=b_ptr?strlen(b_ptr):0;
284: if(size() != b_size)
285: return false;
286:
287: const Chunk *a_chunk=&head;
288: const Chunk::Row *a_row=a_chunk->rows;
289: int a_offset=0;
290: int b_offset=0;
291: Chunk::Row *a_end=append_here;
292: int a_countdown=a_chunk->count;
293: bool a_break=false;
294: bool b_break=false;
295: while(true) {
296: int size_diff=
297: (a_row->item.size-a_offset)-
298: (b_size-b_offset);
299:
300: if(size_diff==0) { // a has same size as b
301: if(memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, a_row->item.size-a_offset)!=0)
302: return false;
303: a_row++; a_countdown--; a_offset=0;
304: b_break=true;
305: } else if (size_diff>0) { // a longer
306: if(memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, b_size-b_offset)!=0)
307: return false;
308: a_offset+=b_size-b_offset;
309: b_break=true;
310: } else { // b longer
311: if(memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, a_row->item.size-a_offset)!=0)
312: return false;
313: b_offset+=a_row->item.size-a_offset;
314: a_row++; a_countdown--; a_offset=0;
315: }
316:
317: a_break=a_row==a_end;
318: if(a_break || b_break)
319: break;
320:
321: if(!a_countdown) {
322: a_chunk=a_row->link;
323: a_row=a_chunk->rows;
324: a_countdown=a_chunk->count;
1.9 paf 325: }
326: }
327: return a_break==b_break;
1.5 paf 328: }
E-mail: