Annotation of parser3/src/main/pa_string.C, revision 1.172.2.2
1.45 paf 1: /** @file
1.55 paf 2: Parser: string class. @see untasize_t.C.
1.46 paf 3:
1.172 paf 4: Copyright (c) 2001, 2003 ArtLebedev Group (http://www.artlebedev.com)
1.138 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.164 paf 6: */
1.46 paf 7:
1.172.2.2! paf 8: static const char* IDENT_STRING_C="$Date: 2003/01/22 15:39:08 $";
1.4 paf 9:
1.70 paf 10: #include "pcre.h"
11:
1.13 paf 12: #include "pa_pool.h"
1.12 paf 13: #include "pa_string.h"
1.172.2.1 paf 14: //#include "pa_hash.h"
1.22 paf 15: #include "pa_exception.h"
1.172.2.2! paf 16: //#include "pa_common.h"
1.60 paf 17: #include "pa_array.h"
1.172.2.2! paf 18: //#include "pa_globals.h"
1.61 paf 19: #include "pa_table.h"
1.101 parser 20: #include "pa_dictionary.h"
1.132 paf 21: #include "pa_charset.h"
1.60 paf 22:
1.172.2.2! paf 23: // helpers
1.139 paf 24:
1.172.2.2! paf 25: /// String::match uses this as replace & global search table columns
! 26: const int MAX_STRING_MATCH_TABLE_COLUMNS=100;
1.139 paf 27:
1.172.2.2! paf 28: class String_match_table_template_columns: public Array<smart_ptr<String>> {
! 29: char cn_cstr[MAX_STRING_MATCH_TABLE_COLUMNS][3/*strlen("100")*/+1/*terminating 0*/];
! 30: String cn[MAX_STRING_MATCH_TABLE_COLUMNS];
! 31: public:
! 32: string_match_table_template_columns_class() {
! 33: *this+=smart_ptr<String>(new String("prematch"));
! 34: *this+=smart_ptr<String>(new String("match"));
! 35: *this+=smart_ptr<String>(new String("postmatch"));
! 36: for(int i=0; i<MAX_STRING_MATCH_TABLE_COLUMNS; i++) {
! 37: sprintf(cn_cstr[i], "%d", 1+i);
! 38: cn[i].APPEND_CLEAN(cn_cstr[i]);
! 39: *this+=cn[i];
! 40: }
! 41: }
! 42: } string_match_table_template_columns;
1.41 paf 43:
1.172.2.2! paf 44: Table string_match_table_template(0, string_match_table_template_columns);
! 45:
! 46: // methods
! 47:
! 48: String::String(const char *src, size_t src_size, bool tainted): Array(1), fsize(0) {
1.41 paf 49: if(src)
1.75 paf 50: if(tainted)
51: APPEND_TAINTED(src, src_size, 0, 0);
1.41 paf 52: else
1.75 paf 53: APPEND_CLEAN(src, src_size, 0, 0);
1.1 paf 54: }
1.140 paf 55:
1.172.2.2! paf 56: String::String(const String& src): Array(src.count()) {
1.169 paf 57: append(src, UL_PASS_APPENDED);
1.120 paf 58: }
1.28 paf 59:
1.13 paf 60: String& String::real_append(STRING_APPEND_PARAMS) {
1.9 paf 61: if(!src)
62: return *this;
1.26 paf 63: if(!size)
64: size=strlen(src);
65: if(!size)
1.9 paf 66: return *this;
1.122 paf 67:
1.172.2.2! paf 68: if(is_full())
! 69: expand(fdelta);
1.1 paf 70:
1.172.2.2! paf 71: String_fragment *fragment=felements[fused++];
! 72: fragment->item.ptr=src;
! 73: fragment->item.size=size;
! 74: fragment->item.lang=lang;
1.13 paf 75: #ifndef NO_STRING_ORIGIN
1.172.2.2! paf 76: fragment->item.origin.file=file;
! 77: fragment->item.origin.line=line;
1.13 paf 78: #endif
1.1 paf 79:
80: return *this;
81: }
82:
1.16 paf 83: uint String::hash_code() const {
1.7 paf 84: uint result=0;
1.123 paf 85: STRING_FOREACH_ROW(
1.6 paf 86: result=Hash::generic_code(result, row->item.ptr, row->item.size);
1.123 paf 87: );
1.5 paf 88: return result;
89: }
90:
1.60 paf 91: /// @todo move 'lang' skipping to pos
92: int String::cmp(int& partial, const String& src,
93: size_t this_offset, Untaint_lang lang) const {
1.59 paf 94: partial=-1;
1.125 paf 95: size_t a_size=size();
96: this_offset=min(this_offset, a_size-1);
1.55 paf 97:
1.151 paf 98: const Chunk *a_chunk=&head.chunk;
99: const Chunk *b_chunk=&src.head.chunk;
1.16 paf 100: const Chunk::Row *a_row=a_chunk->rows;
101: const Chunk::Row *b_row=b_chunk->rows;
1.55 paf 102: size_t a_offset=this_offset;
103: size_t b_offset=0;
1.9 paf 104: Chunk::Row *a_end=append_here;
105: Chunk::Row *b_end=src.append_here;
1.116 paf 106: uint a_countdown=a_chunk->count;
107: uint b_countdown=b_chunk->count;
108: int result;
1.60 paf 109: size_t pos=0;
1.33 paf 110:
1.125 paf 111: bool a_break=a_size==0;
1.140 paf 112: bool b_break=src.is_empty();
1.83 parser 113: if(!(a_break || b_break)) while(true) {
1.55 paf 114: if(pos+a_row->item.size > this_offset) {
1.136 paf 115: if(lang!=UL_UNSPECIFIED && a_row->item.lang>lang)
1.60 paf 116: return -1; // wrong lang -- bail out
117:
1.55 paf 118: int size_diff=
119: (a_row->item.size-a_offset)-
120: (b_row->item.size-b_offset);
121:
122: if(size_diff==0) { // a has same size as b
1.60 paf 123: result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset,
124: a_row->item.size-a_offset);
1.55 paf 125: if(result)
126: return result;
1.60 paf 127: pos+=a_row->item.size;
1.55 paf 128: a_row++; a_countdown--; a_offset=0;
129: b_row++; b_countdown--; b_offset=0;
130: } else if (size_diff>0) { // a longer
1.60 paf 131: result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset,
132: b_row->item.size-b_offset);
1.55 paf 133: if(result)
134: return result;
135: a_offset+=b_row->item.size-b_offset;
136: b_row++; b_countdown--; b_offset=0;
137: } else { // b longer
1.60 paf 138: result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset,
139: a_row->item.size-a_offset);
1.55 paf 140: if(result)
141: return result;
142: b_offset+=a_row->item.size-a_offset;
1.60 paf 143: pos+=a_row->item.size;
1.55 paf 144: a_row++; a_countdown--; a_offset=0;
145: }
1.83 parser 146: if(b_break=b_row==b_end) {
147: a_break=a_row==a_end;
148: break;
149: }
1.55 paf 150: if(!b_countdown) {
151: b_chunk=b_row->link;
152: b_row=b_chunk->rows;
153: b_countdown=b_chunk->count;
154: }
155: } else {
1.60 paf 156: a_offset-=a_row->item.size;
157: pos+=a_row->item.size;
158: a_row++; a_countdown--;
1.9 paf 159: }
160:
1.83 parser 161: if(a_break=a_row==a_end) {
162: b_break=b_row==b_end;
163: break;
164: }
1.11 paf 165: if(!a_countdown) {
1.9 paf 166: a_chunk=a_row->link;
167: a_row=a_chunk->rows;
1.11 paf 168: a_countdown=a_chunk->count;
1.9 paf 169: }
1.27 paf 170: }
1.55 paf 171: if(a_break==b_break) { // ended simultaneously
172: partial=0; return 0;
173: } else if(a_break) { // first bytes equal, but a ended before b
174: partial=1; return -1;
175: } else {
176: partial=2; return +1;
177: }
1.27 paf 178: }
179:
1.60 paf 180: /// @todo move 'lang' skipping to pos
1.59 paf 181: int String::cmp(int& partial, const char* b_ptr, size_t src_size,
1.60 paf 182: size_t this_offset, Untaint_lang lang) const {
1.59 paf 183: partial=-1;
1.125 paf 184: size_t a_size=size();
1.50 paf 185: size_t b_size=src_size?src_size:b_ptr?strlen(b_ptr):0;
1.125 paf 186: this_offset=min(this_offset, a_size-1);
1.27 paf 187:
1.151 paf 188: const Chunk *a_chunk=&head.chunk;
1.27 paf 189: const Chunk::Row *a_row=a_chunk->rows;
1.59 paf 190: size_t a_offset=this_offset;
1.55 paf 191: size_t b_offset=0;
1.27 paf 192: Chunk::Row *a_end=append_here;
1.116 paf 193: uint a_countdown=a_chunk->count;
1.60 paf 194: size_t pos=0;
1.52 paf 195:
1.125 paf 196: bool a_break=a_size==0;
1.83 parser 197: bool b_break=b_size==0;
198: if(!(a_break || b_break)) while(true) {
1.59 paf 199: if(pos+a_row->item.size > this_offset) {
1.136 paf 200: if(lang!=UL_UNSPECIFIED && a_row->item.lang>lang)
1.60 paf 201: return -1; // wrong lang -- bail out
202:
1.59 paf 203: int size_diff=
204: (a_row->item.size-a_offset)-
205: (b_size-b_offset);
206:
207: if(size_diff==0) { // a has same size as b
1.116 paf 208: if(int result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset,
1.59 paf 209: a_row->item.size-a_offset)!=0)
210: return result;
1.60 paf 211: pos+=a_row->item.size;
1.59 paf 212: a_row++; a_countdown--; a_offset=0;
213: b_break=true;
214: } else if (size_diff>0) { // a longer
1.116 paf 215: if(int result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset,
1.59 paf 216: b_size-b_offset)!=0)
217: return result;
218: a_offset+=b_size-b_offset;
219: b_break=true;
220: } else { // b longer
1.116 paf 221: if(int result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset,
1.59 paf 222: a_row->item.size-a_offset)!=0)
223: return result;
224: b_offset+=a_row->item.size-a_offset;
1.60 paf 225: pos+=a_row->item.size;
1.59 paf 226: a_row++; a_countdown--; a_offset=0;
227: }
228: } else {
1.60 paf 229: a_offset-=a_row->item.size;
230: pos+=a_row->item.size;
231: a_row++; a_countdown--;
1.27 paf 232: }
233:
1.86 parser 234: a_break=a_row==a_end;
235: if(a_break || b_break)
1.83 parser 236: break;
1.27 paf 237: if(!a_countdown) {
238: a_chunk=a_row->link;
239: a_row=a_chunk->rows;
240: a_countdown=a_chunk->count;
1.9 paf 241: }
242: }
1.55 paf 243: if(a_break==b_break) { // ended simultaneously
244: partial=0; return 0;
245: } else if(a_break) { // first bytes equal, but a ended before b
246: partial=1; return -1;
247: } else {
248: partial=2; return +1;
249: }
1.5 paf 250: }
1.46 paf 251:
252: #ifndef NO_STRING_ORIGIN
253: const Origin& String::origin() const {
1.140 paf 254: if(is_empty()) {
1.96 parser 255: static const Origin empty_origin={"empty string"};
256: return empty_origin;
257: }
1.46 paf 258:
1.147 paf 259: // determining origin by first piece or last appended piece
260: // because any of them can be constant=without origin:
1.50 paf 261: // ex: ^load[/file] "document_root" + "/file"
1.80 paf 262: // when last peice is constant,
263: // ex: parser_root_auto_path{dynamic} / auto.p{const}
264: // using first piece
1.151 paf 265: Origin& first_origin=head.chunk.rows[0].item.origin;
1.147 paf 266: return first_origin.file ? first_origin : append_here[-1].item.origin;
1.46 paf 267: }
268: #endif
1.53 paf 269:
1.69 paf 270: String& String::mid(size_t start, size_t finish) const {
1.172.2.1 paf 271: String& result=*new String();
1.107 parser 272:
1.166 paf 273: start=min(start, size());
1.167 paf 274: finish=max(start, finish);
1.60 paf 275: if(start==finish)
1.107 parser 276: return result;
1.53 paf 277:
278: size_t pos=0;
1.123 paf 279: STRING_FOREACH_ROW(
280: size_t item_finish=pos+row->item.size;
281: if(item_finish > start) { // started now or already?
1.140 paf 282: bool started=result.is_empty(); // started now?
1.123 paf 283: bool finished=finish <= item_finish; // finished now?
284: size_t offset=started?start-pos:0;
285: size_t size=finished?finish-pos:row->item.size;
286: result.APPEND(
287: row->item.ptr+offset, size-offset,
288: row->item.lang,
289: row->item.origin.file, row->item.origin.line);
290: if(finished)
1.53 paf 291: goto break2;
292: }
1.123 paf 293: pos+=row->item.size;
294: );
1.53 paf 295: break2:
1.60 paf 296: // SAPI::log(pool(), "piece of '%s' from %d to %d is '%s'",
297: //cstr(), start, finish, result.cstr());
1.53 paf 298: return result;
1.54 paf 299: }
300:
1.60 paf 301: int String::pos(const String& substr,
1.116 paf 302: int result, Untaint_lang lang) const {
1.125 paf 303: size_t self_size=size();
1.131 paf 304: for(; size_t(result)<self_size; result++) {
1.60 paf 305: int partial; cmp(partial, substr, result, lang);
1.58 paf 306: if(
307: partial==0 || // full match
308: partial==2) // 'substr' starts 'this'+'result'
309: return result;
310: }
311:
312: return -1;
313: }
314:
1.60 paf 315: int String::pos(const char *substr, size_t substr_size,
1.116 paf 316: int result, Untaint_lang lang) const {
1.125 paf 317: size_t self_size=size();
1.131 paf 318: for(; size_t(result)<self_size; result++) {
1.60 paf 319: int partial; cmp(partial, substr, substr_size, result, lang);
1.55 paf 320: if(
321: partial==0 || // full match
322: partial==2) // 'substr' starts 'this'+'result'
323: return result;
324: }
325:
326: return -1;
1.60 paf 327: }
328:
329: void String::split(Array& result,
330: size_t* pos_after_ref,
331: const char *delim, size_t delim_size,
332: Untaint_lang lang, int limit) const {
1.125 paf 333: size_t self_size=size();
1.60 paf 334: if(delim_size) {
335: size_t pos_after=pos_after_ref?*pos_after_ref:0;
336: int pos_before;
337: // while we have 'delim'...
338: for(; (pos_before=pos(delim, delim_size, pos_after, lang))>=0 && limit; limit--) {
1.69 paf 339: result+=&mid(pos_after, pos_before);
1.60 paf 340: pos_after=pos_before+delim_size;
341: }
342: // last piece
1.124 paf 343: if(pos_after<self_size && limit) {
344: result+=&mid(pos_after, self_size);
345: pos_after=self_size;
1.60 paf 346: }
347: if(pos_after_ref)
348: *pos_after_ref=pos_after;
349: } else { // empty delim
350: result+=this;
351: if(pos_after_ref)
1.124 paf 352: *pos_after_ref+=self_size;
1.60 paf 353: }
354: }
355:
356: void String::split(Array& result,
357: size_t* pos_after_ref,
358: const String& delim, Untaint_lang lang,
359: int limit) const {
1.140 paf 360: if(!delim.is_empty()) {
1.60 paf 361: size_t pos_after=pos_after_ref?*pos_after_ref:0;
362: int pos_before;
363: // while we have 'delim'...
364: for(; (pos_before=pos(delim, pos_after, lang))>=0 && limit; limit--) {
1.69 paf 365: result+=&mid(pos_after, pos_before);
1.60 paf 366: pos_after=pos_before+delim.size();
367: }
368: // last piece
369: if(pos_after<size() && limit) {
1.69 paf 370: result+=&mid(pos_after, size());
1.60 paf 371: pos_after=size();
372: }
373: if(pos_after_ref)
374: *pos_after_ref=pos_after;
375: } else { // empty delim
376: result+=this;
377: if(pos_after_ref)
378: *pos_after_ref+=size();
379: }
1.61 paf 380: }
381:
1.154 paf 382: static void regex_options(const String *options, int *result, bool& need_pre_post_match){
1.63 paf 383: struct Regex_option {
1.153 paf 384: const char *keyL;
385: const char *keyU;
1.63 paf 386: int clear, set;
387: int *result;
1.154 paf 388: bool *flag;
1.63 paf 389: } regex_option[]={
1.153 paf 390: {"i", "I", 0, PCRE_CASELESS, result}, // a=A
391: {"s", "S", 0, PCRE_DOTALL, result}, // \n\n$ [default]
392: {"x", "U", 0, PCRE_EXTENDED, result}, // whitespace in regex ignored
393: {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$
394: {"g", "G", 0, true, result+1}, // many rows
1.154 paf 395: {"'", 0, 0, 0, 0, &need_pre_post_match},
396: {0}
1.63 paf 397: };
1.171 paf 398: result[0]=PCRE_EXTRA | PCRE_DOTALL | PCRE_DOLLAR_ENDONLY;
1.63 paf 399: result[1]=0;
400:
401: if(options)
1.153 paf 402: for(Regex_option *o=regex_option; o->keyL; o++)
1.154 paf 403: if(options->pos(o->keyL)>=0
404: || (o->keyU && options->pos(o->keyU)>=0)) {
405: if(o->flag)
406: *o->flag=true;
407: else { // result
408: *o->result &= ~o->clear;
409: *o->result |= o->set;
410: }
1.63 paf 411: }
412: }
413:
1.155 paf 414: /// @todo make replacement Table stacked
1.172.2.1 paf 415: bool String::match(Charset& source_charset,
1.77 paf 416: const String *aorigin,
1.62 paf 417: const String& regexp,
1.63 paf 418: const String *options,
1.64 paf 419: Table **table,
1.95 parser 420: Row_action row_action, void *info,
421: bool *was_global) const {
1.64 paf 422:
1.140 paf 423: if(regexp.is_empty())
1.149 paf 424: throw Exception(0,
1.73 paf 425: aorigin,
426: "regexp is empty");
1.154 paf 427:
1.118 paf 428: const char *pattern=regexp.cstr();
1.62 paf 429: const char *errptr;
430: int erroffset;
1.154 paf 431: bool need_pre_post_match=false;
432: int option_bits[2]; regex_options(options, option_bits, need_pre_post_match);
1.95 parser 433: if(was_global)
434: *was_global=option_bits[1]!=0;
1.63 paf 435: pcre *code=pcre_compile(pattern, option_bits[0],
1.62 paf 436: &errptr, &erroffset,
1.172.2.1 paf 437: source_charset.pcre_tables);
1.62 paf 438:
1.67 paf 439: if(!code)
1.149 paf 440: throw Exception(0,
1.69 paf 441: ®exp.mid(erroffset, regexp.size()),
1.74 paf 442: "regular expression syntax error - %s", errptr);
1.62 paf 443:
1.63 paf 444: int info_substrings=pcre_info(code, 0, 0);
445: if(info_substrings<0) {
1.100 parser 446: pcre_free(code);
1.149 paf 447: throw Exception(0,
1.73 paf 448: aorigin,
1.76 paf 449: "pcre_info error (%d)",
1.73 paf 450: info_substrings);
1.63 paf 451: }
452:
1.158 paf 453: const char *subject=cstr();
1.62 paf 454: int length=strlen(subject);
1.155 paf 455: const int ovecsize=(1/*match*/+MAX_STRING_MATCH_TABLE_COLUMNS)*3;
456: int ovector[ovecsize];
457:
458: // create table
1.172.2.1 paf 459: *table=new Table(*string_match_table_template);
1.63 paf 460:
1.64 paf 461: int exec_option_bits=0;
1.154 paf 462: int prestart=0;
463: int poststart=0;
464: int postfinish=size();
1.63 paf 465: while(true) {
466: int exec_substrings=pcre_exec(code, 0,
1.154 paf 467: subject, length, prestart,
1.64 paf 468: exec_option_bits, ovector, ovecsize);
1.63 paf 469:
470: if(exec_substrings==PCRE_ERROR_NOMATCH) {
1.100 parser 471: pcre_free(code);
1.154 paf 472: row_action(**table, 0/*last time, no row*/, 0, 0, poststart, postfinish, info);
1.63 paf 473: return option_bits[1]!=0; // global=true+table, not global=false
474: }
475:
476: if(exec_substrings<0) {
1.100 parser 477: pcre_free(code);
1.149 paf 478: throw Exception(0,
1.63 paf 479: aorigin,
1.76 paf 480: "regular expression execute error (%d)",
1.63 paf 481: exec_substrings);
482: }
483:
1.154 paf 484: int prefinish=ovector[0];
485: poststart=ovector[1];
1.172.2.1 paf 486: Array& row=*new Array();
1.154 paf 487: row+=need_pre_post_match?&mid(0, prefinish):0; // .prematch column value
488: row+=need_pre_post_match?&mid(prefinish, poststart):0; // .match
489: row+=need_pre_post_match?&mid(poststart, postfinish):0; // .postmatch
1.63 paf 490:
491: for(int i=1; i<exec_substrings; i++) {
1.69 paf 492: // -1:-1 case handled peacefully by mid() itself
493: row+=&mid(ovector[i*2+0], ovector[i*2+1]); // .i column value
1.63 paf 494: }
495:
1.154 paf 496: row_action(**table, &row, prestart, prefinish, poststart, postfinish, info);
1.63 paf 497:
1.154 paf 498: if(!option_bits[1] || prestart==poststart) { // not global | going to hang
1.100 parser 499: pcre_free(code);
1.154 paf 500: row_action(**table, 0/*last time, no row*/, 0, 0, poststart, postfinish, info);
1.63 paf 501: return true;
502: }
1.154 paf 503: prestart=poststart;
1.63 paf 504:
505: /*
506: if(option_bits[0] & PCRE_MULTILINE)
1.64 paf 507: exec_option_bits|=PCRE_NOTBOL; // start of subject+startoffset not BOL
1.63 paf 508: */
509: }
1.82 parser 510: }
511:
1.172.2.1 paf 512: String& String::change_case(Charset& source_charset, Change_case_kind kind) const {
513: const unsigned char *tables=source_charset.pcre_tables;
514: String& result=*new String();
1.82 parser 515:
516: const unsigned char *a;
517: const unsigned char *b;
518: switch(kind) {
519: case CC_UPPER:
520: a=tables+lcc_offset;
521: b=tables+fcc_offset;
522: break;
523: case CC_LOWER:
524: a=tables+lcc_offset;
525: b=0;
526: break;
527: default:
1.149 paf 528: throw Exception(0,
1.82 parser 529: this,
530: "unknown change case kind #%d",
531: static_cast<int>(kind)); // never
532: a=b=0; // calm, compiler
533: break; // never
534: }
535:
1.143 paf 536: STRING_FOREACH_ROW(
1.172.2.1 paf 537: char *new_cstr=(char *)pa_malloc(row->item.size);
1.143 paf 538: char *dest=new_cstr;
539: const char *src=row->item.ptr;
540: for(int size=row->item.size; size--; src++) {
541: unsigned char c=a[(unsigned char)*src];
542: if(b)
543: c=b[c];
1.82 parser 544:
1.143 paf 545: *dest++=(char)c;
1.82 parser 546: }
1.143 paf 547:
548: result.APPEND(new_cstr, row->item.size,
549: row->item.lang,
550: row->item.origin.file, row->item.origin.line);
551: );
1.89 parser 552:
1.101 parser 553: return result;
554: }
555:
1.150 paf 556: /// @test if in some piece were found no dict words, append it, not it's duplicate
1.172.2.1 paf 557: String& String::replace(Dictionary& dict) const {
1.170 paf 558: char *lcstr=cstr();
559: const char *current=lcstr;
560:
1.172.2.1 paf 561: String& result=*new String();
1.143 paf 562: STRING_FOREACH_ROW(
1.170 paf 563: IFNDEF_NO_STRING_ORIGIN(
564: const char *joined_origin_file=row->item.origin.file;
565: const size_t joined_origin_line=row->item.origin.line;
566: );
567: uchar joined_lang=row->item.lang;
568: const char *joined_ptr=current;
569: // calc size
570: size_t joined_size=0;
571: STRING_PREPARED_FOREACH_ROW(*this,
572: if(row->item.lang==joined_lang)
573: joined_size+=row->item.size;
574: else
575: break; // before non-ours
576: );
577: current+=joined_size;
578:
579: // pointers are after joined piece
580: // & one step back, see STRING_FOREACH_ROW
581: --row; ++countdown;
582:
1.172.2.1 paf 583: char *new_cstr=(char *)pa_malloc((size_t)ceil(joined_size*dict.max_ratio()));
1.123 paf 584: char *dest=new_cstr;
1.170 paf 585: while(joined_size) {
586: // there is a row where first column starts 'joined_ptr'
587: if(Table::Item *item=dict.first_that_starts(joined_ptr, joined_size)) {
1.123 paf 588: // get a=>b values
589: const String& a=*static_cast<Array *>(item)->get_string(0);
590: const String& b=*static_cast<Array *>(item)->get_string(1);
1.170 paf 591: // skip 'a' in 'joined_ptr' && reduce work size
592: joined_ptr+=a.size(); joined_size-=a.size();
1.123 paf 593: // write 'b' to 'dest' && skip 'b' in 'dest'
594: b.store_to(dest); dest+=b.size();
595: } else {
596: // write a char to b && reduce work size
1.170 paf 597: *dest++=*joined_ptr++; joined_size--;
1.101 parser 598: }
599: }
600:
1.170 paf 601: result.APPEND(new_cstr, dest-new_cstr, joined_lang,
602: joined_origin_file, joined_origin_line);
1.156 paf 603: );
1.170 paf 604:
1.156 paf 605: return result;
606: }
607:
1.172.2.1 paf 608: String& String::join_chains(char** acstr) const {
1.156 paf 609: char *lcstr=cstr();
610: const char *current=lcstr;
611:
1.172.2.1 paf 612: String& result=*new String();
1.156 paf 613: STRING_FOREACH_ROW(
614: IFNDEF_NO_STRING_ORIGIN(
615: const char *joined_origin_file=row->item.origin.file;
616: const size_t joined_origin_line=row->item.origin.line;
617: );
618: uchar joined_lang=row->item.lang;
619: const char *joined_ptr=current;
620: // calc size
621: size_t joined_size=0;
622: STRING_PREPARED_FOREACH_ROW(*this,
623: if(row->item.lang==joined_lang)
624: joined_size+=row->item.size;
625: else
626: break; // before non-ours
627: );
628: current+=joined_size;
629:
630: // pointers are after joined piece
631: // & one step back, see STRING_FOREACH_ROW
632: --row; ++countdown;
633:
634: result.APPEND(joined_ptr, joined_size, joined_lang,
1.150 paf 635: joined_origin_file, joined_origin_line);
1.123 paf 636: );
1.156 paf 637:
638: if(acstr)
639: *acstr=lcstr;
1.89 parser 640: return result;
641: }
642:
1.90 parser 643: double String::as_double() const {
1.89 parser 644: double result;
1.114 paf 645: const char *cstr;
646: char buf[MAX_NUMBER];
1.151 paf 647: if(head.chunk.rows+1==append_here) {
648: int size=min(head.chunk.rows[0].item.size, MAX_NUMBER-1);
649: memcpy(buf, head.chunk.rows[0].item.ptr, size);
1.114 paf 650: buf[size]=0;
651: cstr=buf;
652: } else
653: cstr=this->cstr();
1.161 paf 654: while(*cstr && isspace(*cstr))
655: cstr++;
656: if(!*cstr)
1.162 paf 657: return 0;
1.161 paf 658:
1.102 parser 659: char *error_pos;
1.89 parser 660: // 0xABC
1.99 parser 661: if(cstr[0]=='0')
662: if(cstr[1]=='x' || cstr[1]=='X')
663: result=(double)(unsigned long)strtol(cstr, &error_pos, 0);
664: else
1.102 parser 665: result=(double)strtod(cstr+1/*skip leading 0*/, &error_pos);
1.89 parser 666: else
1.99 parser 667: result=(double)strtod(cstr, &error_pos);
1.89 parser 668:
1.159 paf 669: while(char c=*error_pos++)
670: if(!isspace(c))
671: throw Exception("number.format",
672: this,
673: "invalid number (double)");
1.89 parser 674:
675: return result;
676: }
1.90 parser 677: int String::as_int() const {
1.89 parser 678: int result;
1.114 paf 679: const char *cstr;
680: char buf[MAX_NUMBER];
1.151 paf 681: if(head.chunk.rows+1==append_here) {
1.163 paf 682: size_t size=min(head.chunk.rows[0].item.size, MAX_NUMBER-1);
1.151 paf 683: memcpy(buf, head.chunk.rows[0].item.ptr, size);
1.114 paf 684: buf[size]=0;
685: cstr=buf;
686: } else
687: cstr=this->cstr();
1.161 paf 688: while(*cstr && isspace(*cstr))
689: cstr++;
690: if(!*cstr)
1.162 paf 691: return 0;
1.161 paf 692:
1.102 parser 693: char *error_pos;
1.89 parser 694: // 0xABC
1.99 parser 695: if(cstr[0]=='0')
696: if(cstr[1]=='x' || cstr[1]=='X')
697: result=(int)(unsigned long)strtol(cstr, &error_pos, 0);
698: else
1.102 parser 699: result=(int)strtol(cstr+1/*skip leading 0*/, &error_pos, 0);
1.89 parser 700: else
701: result=(int)strtol(cstr, &error_pos, 0);
702:
1.159 paf 703: while(char c=*error_pos++)
704: if(!isspace(c))
705: throw Exception("number.format",
706: this,
707: "invalid number (int)");
1.82 parser 708:
709: return result;
1.61 paf 710: }
1.113 parser 711:
1.128 paf 712: inline void ushort2uchars(ushort word, uchar& byte1, uchar& byte2) {
713: byte1=word&0xFF;
714: byte2=word>>8;
715: }
716: inline ushort uchars2ushort(uchar byte1, uchar byte2) {
717: return (byte2<<8) | byte1;
718: }
1.113 parser 719: /* @todo maybe network order worth spending some effort?
720: don't bothering myself with network byte order,
721: am not planning to be able to move resulting file across platforms
722: for now
723: */
724: void String::serialize(size_t prolog_size, void *& buf, size_t& buf_size) const {
725: buf_size=
726: prolog_size
1.126 paf 727: +used_rows()*(sizeof(uchar)+sizeof(ushort))
1.113 parser 728: +size();
1.172.2.1 paf 729: buf=pa_malloc(buf_size);
1.113 parser 730: char *cur=(char *)buf+prolog_size;
731:
1.123 paf 732: STRING_FOREACH_ROW(
733: // lang
1.126 paf 734: memcpy(cur, &row->item.lang, sizeof(uchar));
735: cur+=sizeof(uchar);
1.123 paf 736: // size
1.128 paf 737: uchar byte1; uchar byte2;
738: ushort2uchars(row->item.size, byte1, byte2);
739: memcpy(cur, &byte1, sizeof(uchar)); cur+=sizeof(uchar);
740: memcpy(cur, &byte2, sizeof(uchar)); cur+=sizeof(uchar);
1.123 paf 741: // bytes
742: memcpy(cur, row->item.ptr, row->item.size);
743: cur+=row->item.size;
744: );
1.113 parser 745: }
1.148 paf 746: bool String::deserialize(size_t prolog_size, void *buf, size_t buf_size, const char *file) {
1.135 paf 747: if(buf_size<=prolog_size)
1.148 paf 748: return false;
1.135 paf 749:
1.126 paf 750: char *cur=(char *)buf+prolog_size;
1.113 parser 751: buf_size-=prolog_size;
752:
753: while(buf_size) {
1.148 paf 754: if(sizeof(uchar)+sizeof(ushort)>buf_size) // lang+size
755: return false;
756:
757: uchar lang=*(uchar *)(cur);
1.128 paf 758: ushort size=uchars2ushort(
759: *(uchar*)(cur+sizeof(uchar)*1),
760: *(uchar*)(cur+sizeof(uchar)*2)
761: );
762:
1.148 paf 763: size_t piece_size=sizeof(uchar)+sizeof(ushort)+size;
764: if(piece_size>buf_size) // buffer overrun, can be on incomplete cache files
765: return false;
766:
1.128 paf 767: const char *ptr=(const char*)(cur+sizeof(uchar)*3);
1.126 paf 768: APPEND(ptr, size, lang, file, 0);
1.113 parser 769:
770: cur+=piece_size;
771: buf_size-=piece_size;
772: }
1.148 paf 773: return true;
1.113 parser 774: }
E-mail: