Annotation of parser3/src/main/pa_string.C, revision 1.172.2.21.2.2
1.45 paf 1: /** @file
1.55 paf 2: Parser: string class. @see untasize_t.C.
1.46 paf 3:
1.172.2.11 paf 4: Copyright (c) 2001-2003 ArtLebedev Group (http://www.artlebedev.com)
1.138 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.164 paf 6: */
1.46 paf 7:
1.172.2.21.2. (paf 8:): static const char* IDENT_STRING_C="$Date: 2003/03/18 15:14:19 $";
1.4 paf 9:
1.70 paf 10: #include "pcre.h"
11:
1.12 paf 12: #include "pa_string.h"
1.22 paf 13: #include "pa_exception.h"
1.61 paf 14: #include "pa_table.h"
1.101 parser 15: #include "pa_dictionary.h"
1.132 paf 16: #include "pa_charset.h"
1.60 paf 17:
1.172.2.2 paf 18: // helpers
1.139 paf 19:
1.172.2.2 paf 20: /// String::match uses this as replace & global search table columns
1.139 paf 21:
1.172.2.4 paf 22: const int MAX_MATCH_GROUPS=100;
23:
1.172.2.14 paf 24: class String_match_table_template_columns: public ArrayString {
1.172.2.2 paf 25: public:
1.172.2.4 paf 26: String_match_table_template_columns() {
1.172.2.21.2. (paf 27:): *this+=new String("prematch");
28:): *this+=new String("match");
29:): *this+=new String("postmatch");
1.172.2.4 paf 30: for(int i=0; i<MAX_MATCH_GROUPS; i++) {
1.172.2.21.2. (paf 31:): char *cname=new char[3/*strlen("100")*/+1/*terminating 0*/];
32:): *this+=new String(cname, sprintf(cname, "%d", 1+i));
1.172.2.2 paf 33: }
1.172.2.21 paf 34: }
1.172.2.4 paf 35: };
36:
37: Table string_match_table_template(
1.172.2.21.2. (paf 38:): Exception::undefined_source,
39:): new String_match_table_template_columns);
1.172.2.2 paf 40:
41: // methods
42:
1.172.2.21.2. (paf 43:): String::String(const char* src, size_t src_size, bool tainted): body(CORD_EMPTY) {
1.41 paf 44: if(src)
1.75 paf 45: if(tainted)
46: APPEND_TAINTED(src, src_size, 0, 0);
1.41 paf 47: else
1.75 paf 48: APPEND_CLEAN(src, src_size, 0, 0);
1.1 paf 49: }
1.140 paf 50:
1.172.2.21.2. (paf 51:): String::String(const String& src): body(src.body), fragment_info(src.fragment_info) {}
1.28 paf 52:
1.13 paf 53: String& String::real_append(STRING_APPEND_PARAMS) {
1.9 paf 54: if(!src)
55: return *this;
1.26 paf 56: if(!size)
57: size=strlen(src);
58: if(!size)
1.9 paf 59: return *this;
1.122 paf 60:
1.172.2.21.2. (paf 61:): body=CORD_cat_char_star(body, src, size);
62:): fragment_info+=Fragment_info(lang, size);
1.1 paf 63:
64: return *this;
65: }
66:
1.172.2.21.2. (paf 67:): static int CORD_batched_iter_fn_generic_hash_code(const char * s, void * client_data) {
68:): uint& result=*static_cast<uint*>(client_data);
69:): generic_hash_code(result, s);
70:): };
1.16 paf 71: uint String::hash_code() const {
1.7 paf 72: uint result=0;
1.172.2.21.2. (paf 73:): CORD_iter5(body, 0, 0, CORD_batched_iter_fn_generic_hash_code, &result);
1.5 paf 74: return result;
75: }
76:
1.60 paf 77: /// @todo move 'lang' skipping to pos
78: int String::cmp(int& partial, const String& src,
79: size_t this_offset, Untaint_lang lang) const {
1.59 paf 80: partial=-1;
1.125 paf 81: size_t a_size=size();
82: this_offset=min(this_offset, a_size-1);
1.55 paf 83:
1.172.2.4 paf 84: const String_fragment *a_current=felements;
85: const String_fragment *b_current=src.felements;
1.55 paf 86: size_t a_offset=this_offset;
87: size_t b_offset=0;
1.172.2.4 paf 88: String_fragment *a_end=felements+fused;
89: String_fragment *b_end=src.felements+src.fused;
1.116 paf 90: int result;
1.60 paf 91: size_t pos=0;
1.33 paf 92:
1.172.2.4 paf 93: bool a_break;
94: bool b_break;
1.172.2.17 paf 95: while(true) {
96: a_break=(a_current==a_end);
97: b_break=(b_current==b_end);
98: if(a_break || b_break)
99: break;
100:
1.172.2.4 paf 101: if(pos+a_current->size > this_offset) {
1.172.2.6 paf 102: if(lang!=UL_UNSPECIFIED && a_current->lang>(String_UL)lang)
1.60 paf 103: return -1; // wrong lang -- bail out
104:
1.55 paf 105: int size_diff=
1.172.2.4 paf 106: (a_current->size-a_offset)-
107: (b_current->size-b_offset);
1.55 paf 108:
109: if(size_diff==0) { // a has same size as b
1.172.2.4 paf 110: result=memcmp(a_current->ptr+a_offset, b_current->ptr+b_offset,
111: a_current->size-a_offset);
1.55 paf 112: if(result)
113: return result;
1.172.2.4 paf 114: pos+=a_current->size;
115: a_current++; a_offset=0;
116: b_current++; b_offset=0;
117: } else if(size_diff>0) { // a longer
118: result=memcmp(a_current->ptr+a_offset, b_current->ptr+b_offset,
119: b_current->size-b_offset);
1.55 paf 120: if(result)
121: return result;
1.172.2.4 paf 122: a_offset+=b_current->size-b_offset;
123: b_current++; b_offset=0;
1.55 paf 124: } else { // b longer
1.172.2.4 paf 125: result=memcmp(a_current->ptr+a_offset, b_current->ptr+b_offset,
126: a_current->size-a_offset);
1.55 paf 127: if(result)
128: return result;
1.172.2.4 paf 129: b_offset+=a_current->size-a_offset;
130: pos+=a_current->size;
131: a_current++; a_offset=0;
1.55 paf 132: }
133: } else {
1.172.2.4 paf 134: a_offset-=a_current->size;
135: pos+=a_current->size;
136: a_current++;
1.9 paf 137: }
1.27 paf 138: }
1.55 paf 139: if(a_break==b_break) { // ended simultaneously
140: partial=0; return 0;
141: } else if(a_break) { // first bytes equal, but a ended before b
142: partial=1; return -1;
143: } else {
144: partial=2; return +1;
145: }
1.27 paf 146: }
147:
1.60 paf 148: /// @todo move 'lang' skipping to pos
1.59 paf 149: int String::cmp(int& partial, const char* b_ptr, size_t src_size,
1.60 paf 150: size_t this_offset, Untaint_lang lang) const {
1.59 paf 151: partial=-1;
1.125 paf 152: size_t a_size=size();
1.50 paf 153: size_t b_size=src_size?src_size:b_ptr?strlen(b_ptr):0;
1.125 paf 154: this_offset=min(this_offset, a_size-1);
1.27 paf 155:
1.172.2.4 paf 156: const String_fragment *a_current=felements;
1.59 paf 157: size_t a_offset=this_offset;
1.55 paf 158: size_t b_offset=0;
1.172.2.4 paf 159: String_fragment *a_end=felements+fused;
1.60 paf 160: size_t pos=0;
1.52 paf 161:
1.172.2.4 paf 162: bool a_break;
1.83 parser 163: bool b_break=b_size==0;
1.172.2.17 paf 164: while(true) {
165: a_break=(a_current==a_end);
166: if(a_break || b_break)
167: break;
1.172.2.4 paf 168: if(pos+a_current->size > this_offset) {
1.172.2.6 paf 169: if(lang!=UL_UNSPECIFIED && a_current->lang>(String_UL)lang)
1.60 paf 170: return -1; // wrong lang -- bail out
171:
1.59 paf 172: int size_diff=
1.172.2.4 paf 173: (a_current->size-a_offset)-
1.59 paf 174: (b_size-b_offset);
175:
176: if(size_diff==0) { // a has same size as b
1.172.2.4 paf 177: if(int result=memcmp(a_current->ptr+a_offset, b_ptr+b_offset,
178: a_current->size-a_offset)!=0)
1.59 paf 179: return result;
1.172.2.4 paf 180: pos+=a_current->size;
181: a_current++; a_offset=0;
1.59 paf 182: b_break=true;
183: } else if (size_diff>0) { // a longer
1.172.2.4 paf 184: if(int result=memcmp(a_current->ptr+a_offset, b_ptr+b_offset,
1.59 paf 185: b_size-b_offset)!=0)
186: return result;
187: a_offset+=b_size-b_offset;
188: b_break=true;
189: } else { // b longer
1.172.2.4 paf 190: if(int result=memcmp(a_current->ptr+a_offset, b_ptr+b_offset,
191: a_current->size-a_offset)!=0)
1.59 paf 192: return result;
1.172.2.4 paf 193: b_offset+=a_current->size-a_offset;
194: pos+=a_current->size;
195: a_current++; a_offset=0;
1.59 paf 196: }
197: } else {
1.172.2.4 paf 198: a_offset-=a_current->size;
199: pos+=a_current->size;
200: a_current++;
1.9 paf 201: }
202: }
1.55 paf 203: if(a_break==b_break) { // ended simultaneously
204: partial=0; return 0;
205: } else if(a_break) { // first bytes equal, but a ended before b
206: partial=1; return -1;
207: } else {
208: partial=2; return +1;
209: }
1.5 paf 210: }
1.46 paf 211:
212: #ifndef NO_STRING_ORIGIN
1.172.2.4 paf 213: const String_fragment::Origin& String::origin() const {
1.140 paf 214: if(is_empty()) {
1.172.2.4 paf 215: static const String_fragment::Origin empty_origin={"empty string"};
1.96 parser 216: return empty_origin;
217: }
1.46 paf 218:
1.147 paf 219: // determining origin by first piece or last appended piece
220: // because any of them can be constant=without origin:
1.50 paf 221: // ex: ^load[/file] "document_root" + "/file"
1.80 paf 222: // when last peice is constant,
223: // ex: parser_root_auto_path{dynamic} / auto.p{const}
224: // using first piece
1.172.2.4 paf 225: String_fragment::Origin& first_origin=felements[0].origin;
226: return first_origin.file ? first_origin : felements[fused-1].origin;
1.46 paf 227: }
228: #endif
1.53 paf 229:
1.172.2.21.2. (paf 230:): const String& String::mid(size_t start, size_t finish) const {
231:): const String& result(new String());
1.107 parser 232:
1.166 paf 233: start=min(start, size());
1.167 paf 234: finish=max(start, finish);
1.60 paf 235: if(start==finish)
1.107 parser 236: return result;
1.53 paf 237:
238: size_t pos=0;
1.172.2.4 paf 239: STRING_FOREACH_FRAGMENT(
240: size_t item_finish=pos+fragment->size;
1.123 paf 241: if(item_finish > start) { // started now or already?
1.172.2.4 paf 242: bool started=result->is_empty(); // started now?
1.123 paf 243: bool finished=finish <= item_finish; // finished now?
244: size_t offset=started?start-pos:0;
1.172.2.4 paf 245: size_t size=finished?finish-pos:fragment->size;
246: result->APPEND(
247: fragment->ptr+offset, size-offset,
248: fragment->lang,
249: fragment->origin.file, fragment->origin.line);
1.123 paf 250: if(finished)
1.53 paf 251: goto break2;
252: }
1.172.2.4 paf 253: pos+=fragment->size;
1.123 paf 254: );
1.53 paf 255: break2:
1.60 paf 256: // SAPI::log(pool(), "piece of '%s' from %d to %d is '%s'",
257: //cstr(), start, finish, result.cstr());
1.53 paf 258: return result;
1.54 paf 259: }
260:
1.60 paf 261: int String::pos(const String& substr,
1.116 paf 262: int result, Untaint_lang lang) const {
1.125 paf 263: size_t self_size=size();
1.131 paf 264: for(; size_t(result)<self_size; result++) {
1.60 paf 265: int partial; cmp(partial, substr, result, lang);
1.58 paf 266: if(
267: partial==0 || // full match
268: partial==2) // 'substr' starts 'this'+'result'
269: return result;
270: }
271:
272: return -1;
273: }
274:
1.172.2.11 paf 275: int String::pos(const char* substr, size_t substr_size,
1.116 paf 276: int result, Untaint_lang lang) const {
1.125 paf 277: size_t self_size=size();
1.131 paf 278: for(; size_t(result)<self_size; result++) {
1.60 paf 279: int partial; cmp(partial, substr, substr_size, result, lang);
1.55 paf 280: if(
281: partial==0 || // full match
282: partial==2) // 'substr' starts 'this'+'result'
283: return result;
284: }
285:
286: return -1;
1.60 paf 287: }
288:
1.172.2.14 paf 289: void String::split(ArrayString& result,
1.60 paf 290: size_t* pos_after_ref,
1.172.2.11 paf 291: const char* delim, size_t delim_size,
1.172.2.10 paf 292: Untaint_lang lang, int limit) {
1.172.2.21.2. (paf 293:): todo:we can ignore lang in cmp and pos really,
294:): but would split properly!
295:):
1.125 paf 296: size_t self_size=size();
1.60 paf 297: if(delim_size) {
298: size_t pos_after=pos_after_ref?*pos_after_ref:0;
299: int pos_before;
300: // while we have 'delim'...
301: for(; (pos_before=pos(delim, delim_size, pos_after, lang))>=0 && limit; limit--) {
1.172.2.4 paf 302: result+=mid(pos_after, pos_before);
1.60 paf 303: pos_after=pos_before+delim_size;
304: }
305: // last piece
1.124 paf 306: if(pos_after<self_size && limit) {
1.172.2.4 paf 307: result+=mid(pos_after, self_size);
1.124 paf 308: pos_after=self_size;
1.60 paf 309: }
310: if(pos_after_ref)
311: *pos_after_ref=pos_after;
312: } else { // empty delim
1.172.2.21.2. (paf 313:): result+=String* (this);
1.60 paf 314: if(pos_after_ref)
1.124 paf 315: *pos_after_ref+=self_size;
1.60 paf 316: }
317: }
318:
1.172.2.14 paf 319: void String::split(ArrayString& result,
1.60 paf 320: size_t* pos_after_ref,
321: const String& delim, Untaint_lang lang,
1.172.2.10 paf 322: int limit) {
1.140 paf 323: if(!delim.is_empty()) {
1.60 paf 324: size_t pos_after=pos_after_ref?*pos_after_ref:0;
325: int pos_before;
326: // while we have 'delim'...
327: for(; (pos_before=pos(delim, pos_after, lang))>=0 && limit; limit--) {
1.172.2.4 paf 328: result+=mid(pos_after, pos_before);
1.60 paf 329: pos_after=pos_before+delim.size();
330: }
331: // last piece
332: if(pos_after<size() && limit) {
1.172.2.4 paf 333: result+=mid(pos_after, size());
1.60 paf 334: pos_after=size();
335: }
336: if(pos_after_ref)
337: *pos_after_ref=pos_after;
338: } else { // empty delim
1.172.2.21.2. (paf 339:): result+=String* (this);
1.60 paf 340: if(pos_after_ref)
341: *pos_after_ref+=size();
342: }
1.61 paf 343: }
344:
1.172.2.21.2. (paf 345:): static void regex_options(const String& options, int *result, bool& need_pre_post_match){
1.63 paf 346: struct Regex_option {
1.172.2.11 paf 347: const char* keyL;
348: const char* keyU;
1.63 paf 349: int clear, set;
350: int *result;
1.154 paf 351: bool *flag;
1.63 paf 352: } regex_option[]={
1.153 paf 353: {"i", "I", 0, PCRE_CASELESS, result}, // a=A
354: {"s", "S", 0, PCRE_DOTALL, result}, // \n\n$ [default]
355: {"x", "U", 0, PCRE_EXTENDED, result}, // whitespace in regex ignored
356: {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$
357: {"g", "G", 0, true, result+1}, // many rows
1.154 paf 358: {"'", 0, 0, 0, 0, &need_pre_post_match},
359: {0}
1.63 paf 360: };
1.171 paf 361: result[0]=PCRE_EXTRA | PCRE_DOTALL | PCRE_DOLLAR_ENDONLY;
1.63 paf 362: result[1]=0;
363:
364: if(options)
1.153 paf 365: for(Regex_option *o=regex_option; o->keyL; o++)
1.154 paf 366: if(options->pos(o->keyL)>=0
367: || (o->keyU && options->pos(o->keyU)>=0)) {
368: if(o->flag)
369: *o->flag=true;
370: else { // result
371: *o->result &= ~o->clear;
372: *o->result |= o->set;
373: }
1.63 paf 374: }
375: }
376:
1.172.2.21.2. (paf 377:): Table* String::match(Charset& source_charset,
378:): const String& aorigin,
1.172.2.4 paf 379: const String& regexp,
1.172.2.21.2. (paf 380:): const String& options,
1.172.2.4 paf 381: Row_action row_action, void *info,
1.172.2.20 paf 382: bool& just_matched) const {
1.140 paf 383: if(regexp.is_empty())
1.149 paf 384: throw Exception(0,
1.73 paf 385: aorigin,
386: "regexp is empty");
1.154 paf 387:
1.172.2.21.2. (paf 388:): const char* pattern=regexp.cstr();
1.172.2.11 paf 389: const char* errptr;
1.62 paf 390: int erroffset;
1.154 paf 391: bool need_pre_post_match=false;
392: int option_bits[2]; regex_options(options, option_bits, need_pre_post_match);
1.172.2.20 paf 393: bool global=option_bits[1]!=0;
1.172.2.4 paf 394: pcre *code=pcre_compile(pattern.get(), option_bits[0],
1.62 paf 395: &errptr, &erroffset,
1.172.2.1 paf 396: source_charset.pcre_tables);
1.62 paf 397:
1.67 paf 398: if(!code)
1.149 paf 399: throw Exception(0,
1.172.2.4 paf 400: regexp.mid(erroffset, regexp.size()),
1.74 paf 401: "regular expression syntax error - %s", errptr);
1.62 paf 402:
1.172.2.20 paf 403: int subpatterns=pcre_info(code, 0, 0);
404: if(subpatterns<0) {
1.100 parser 405: pcre_free(code);
1.149 paf 406: throw Exception(0,
1.73 paf 407: aorigin,
1.76 paf 408: "pcre_info error (%d)",
1.172.2.20 paf 409: subpatterns);
1.63 paf 410: }
411:
1.172.2.21.2. (paf 412:): const char* subject=cstr();
1.172.2.4 paf 413: int length=size();
414: const int ovecsize=(1/*match*/+MAX_MATCH_GROUPS)*3;
1.155 paf 415: int ovector[ovecsize];
416:
417: // create table
1.172.2.21.2. (paf 418:): Table* table(new Table(string_match_table_template));
1.63 paf 419:
1.64 paf 420: int exec_option_bits=0;
1.154 paf 421: int prestart=0;
422: int poststart=0;
423: int postfinish=size();
1.63 paf 424: while(true) {
425: int exec_substrings=pcre_exec(code, 0,
1.172.2.4 paf 426: subject.get(), length, prestart,
1.64 paf 427: exec_option_bits, ovector, ovecsize);
1.63 paf 428:
429: if(exec_substrings==PCRE_ERROR_NOMATCH) {
1.100 parser 430: pcre_free(code);
1.172.2.21.2. (paf 431:): row_action(table, Array0/*last time, no raw*/, 0, 0, poststart, postfinish, info);
1.172.2.20 paf 432: if(global || subpatterns)
433: return table; // global or with subpatterns=true+result
434: else {
1.172.2.21.2. (paf 435:): just_matched=false; return Table*(0); // not global=no result
1.172.2.20 paf 436: }
1.63 paf 437: }
438:
439: if(exec_substrings<0) {
1.100 parser 440: pcre_free(code);
1.149 paf 441: throw Exception(0,
1.63 paf 442: aorigin,
1.76 paf 443: "regular expression execute error (%d)",
1.63 paf 444: exec_substrings);
445: }
446:
1.154 paf 447: int prefinish=ovector[0];
448: poststart=ovector[1];
1.172.2.14 paf 449: object_ptr<ArrayString> row(new ArrayString);
1.172.2.4 paf 450: if(need_pre_post_match) {
451: *row+=mid(0, prefinish); // .prematch column value
452: *row+=mid(prefinish, poststart); // .match
453: *row+=mid(poststart, postfinish); // .postmatch
454: } else {
1.172.2.21.2. (paf 455:): *row+=0; // .prematch column value
456:): *row+=0; // .match
457:): *row+=0; // .postmatch
1.172.2.4 paf 458: }
1.63 paf 459:
460: for(int i=1; i<exec_substrings; i++) {
1.69 paf 461: // -1:-1 case handled peacefully by mid() itself
1.172.2.4 paf 462: *row+=mid(ovector[i*2+0], ovector[i*2+1]); // .i column value
1.63 paf 463: }
464:
1.172.2.20 paf 465: row_action(table, row, prestart, prefinish, poststart, postfinish, info);
1.63 paf 466:
1.172.2.20 paf 467: if(!global || prestart==poststart) { // not global | going to hang
1.100 parser 468: pcre_free(code);
1.172.2.21.2. (paf 469:): row_action(table, Array0/*last time, no row*/, 0, 0, poststart, postfinish, info);
1.172.2.20 paf 470: return table;
1.63 paf 471: }
1.154 paf 472: prestart=poststart;
1.63 paf 473:
474: /*
475: if(option_bits[0] & PCRE_MULTILINE)
1.64 paf 476: exec_option_bits|=PCRE_NOTBOL; // start of subject+startoffset not BOL
1.63 paf 477: */
478: }
1.82 parser 479: }
480:
1.172.2.21.2. (paf 481:): const String& String::change_caseCharset& source_charset, Change_case_kind kind) {
482:): const String& result(new String());
1.172.2.4 paf 483:
1.172.2.1 paf 484: const unsigned char *tables=source_charset.pcre_tables;
1.82 parser 485:
486: const unsigned char *a;
487: const unsigned char *b;
488: switch(kind) {
489: case CC_UPPER:
490: a=tables+lcc_offset;
491: b=tables+fcc_offset;
492: break;
493: case CC_LOWER:
494: a=tables+lcc_offset;
495: b=0;
496: break;
497: default:
1.149 paf 498: throw Exception(0,
1.172.2.21.2. (paf 499:): String* (this),
1.82 parser 500: "unknown change case kind #%d",
501: static_cast<int>(kind)); // never
502: a=b=0; // calm, compiler
503: break; // never
504: }
505:
1.172.2.4 paf 506: STRING_FOREACH_FRAGMENT(
1.172.2.21.2. (paf 507:): char *new_cstr=new char[fragment->size];
1.143 paf 508: char *dest=new_cstr;
1.172.2.11 paf 509: const char* end=fragment->ptr+fragment->size;
510: for(const char* current=fragment->ptr; current<end; current++) {
1.172.2.4 paf 511: unsigned char c=a[(unsigned char)*current];
1.143 paf 512: if(b)
513: c=b[c];
1.82 parser 514:
1.143 paf 515: *dest++=(char)c;
1.82 parser 516: }
1.143 paf 517:
1.172.2.4 paf 518: result->APPEND(new_cstr, fragment->size,
519: fragment->lang,
520: fragment->origin.file, fragment->origin.line);
1.143 paf 521: );
1.89 parser 522:
1.101 parser 523: return result;
524: }
525:
1.150 paf 526: /// @test if in some piece were found no dict words, append it, not it's duplicate
1.172.2.21.2. (paf 527:): const String& String::replaceconst Dictionary& dict) const {
528:): const String& result(new String());
529:): const char* lcstr(cstr());
1.172.2.11 paf 530: const char* current=lcstr.get();
1.170 paf 531:
1.172.2.4 paf 532: STRING_FOREACH_FRAGMENT(
1.170 paf 533: IFNDEF_NO_STRING_ORIGIN(
1.172.2.11 paf 534: const char* joined_origin_file=fragment->origin.file;
1.172.2.4 paf 535: const size_t joined_origin_line=fragment->origin.line;
1.170 paf 536: );
1.172.2.18 paf 537: String_UL joined_lang=fragment->lang;
1.172.2.11 paf 538: const char* joined_ptr=current;
1.170 paf 539: // calc size
540: size_t joined_size=0;
1.172.2.4 paf 541: STRING_PREPARED_FOREACH_FRAGMENT(
542: if(fragment->lang==joined_lang)
543: joined_size+=fragment->size;
1.170 paf 544: else
545: break; // before non-ours
546: );
547: current+=joined_size;
548:
549: // pointers are after joined piece
1.172.2.4 paf 550: // & one step back, see STRING_PREPARED_FOREACH_FRAGMENT
551: --fragment;
1.170 paf 552:
1.172.2.21.2. (paf 553:): char *new_cstr=new char[(size_t)ceil(joined_size*dict.max_ratio())];
1.123 paf 554: char *dest=new_cstr;
1.170 paf 555: while(joined_size) {
1.172.2.4 paf 556: // there is a fragment where first column starts 'joined_ptr'
557: if(Table::element_type row=dict.first_that_starts(joined_ptr, joined_size)) {
1.123 paf 558: // get a=>b values
1.172.2.21.2. (paf 559:): const String& a=row->get(0);
560:): const String& b=row->get(1);
1.170 paf 561: // skip 'a' in 'joined_ptr' && reduce work size
1.172.2.4 paf 562: joined_ptr+=a->size(); joined_size-=a->size();
1.123 paf 563: // write 'b' to 'dest' && skip 'b' in 'dest'
1.172.2.5 paf 564: b->store_to(dest, String::UL_AS_IS); dest+=b->size();
1.123 paf 565: } else {
566: // write a char to b && reduce work size
1.170 paf 567: *dest++=*joined_ptr++; joined_size--;
1.101 parser 568: }
569: }
570:
1.172.2.4 paf 571: result->APPEND(new_cstr, dest-new_cstr, joined_lang,
1.170 paf 572: joined_origin_file, joined_origin_line);
1.156 paf 573: );
1.170 paf 574:
1.156 paf 575: return result;
576: }
577:
1.172.2.21.2. (paf 578:): const String& String::join_chains() const {
579:): const String& result(new String());
1.172.2.4 paf 580:
1.172.2.21.2. (paf 581:): char *pooled_buf=new char[cstr_bufsize()];
1.172.2.5 paf 582: store_to(pooled_buf, String::UL_AS_IS);
1.172.2.11 paf 583: const char* current=pooled_buf;
1.156 paf 584:
1.172.2.4 paf 585: STRING_FOREACH_FRAGMENT(
1.156 paf 586: IFNDEF_NO_STRING_ORIGIN(
1.172.2.11 paf 587: const char* joined_origin_file=fragment->origin.file;
1.172.2.4 paf 588: const size_t joined_origin_line=fragment->origin.line;
1.156 paf 589: );
1.172.2.18 paf 590: String_UL joined_lang=fragment->lang;
1.172.2.11 paf 591: const char* joined_ptr=current;
1.156 paf 592: // calc size
593: size_t joined_size=0;
1.172.2.4 paf 594: STRING_PREPARED_FOREACH_FRAGMENT(
595: if(fragment->lang==joined_lang)
596: joined_size+=fragment->size;
1.156 paf 597: else
598: break; // before non-ours
599: );
600: current+=joined_size;
601:
602: // pointers are after joined piece
1.172.2.4 paf 603: // & one step back, see STRING_PREPARED_FOREACH_FRAGMENT
604: --fragment;
1.156 paf 605:
1.172.2.4 paf 606: result->APPEND(joined_ptr, joined_size, joined_lang,
1.150 paf 607: joined_origin_file, joined_origin_line);
1.123 paf 608: );
1.156 paf 609:
1.89 parser 610: return result;
611: }
612:
1.172.2.10 paf 613: double String::as_double() {
1.89 parser 614: double result;
1.172.2.4 paf 615: char buf[MAX_STRING];
616: if(size()>MAX_STRING-1)
617: throw Exception("number.format",
1.172.2.21.2. (paf 618:): String* (this),
1.172.2.4 paf 619: "invalid number too long a string (%u>%u)", size(), MAX_STRING-1);
1.172.2.5 paf 620: char *eol=store_to(buf, String::UL_AS_IS); *eol=0;
1.172.2.11 paf 621: const char* cstr=buf;
1.172.2.4 paf 622:
1.161 paf 623: while(*cstr && isspace(*cstr))
624: cstr++;
625: if(!*cstr)
1.162 paf 626: return 0;
1.161 paf 627:
1.102 parser 628: char *error_pos;
1.89 parser 629: // 0xABC
1.99 parser 630: if(cstr[0]=='0')
631: if(cstr[1]=='x' || cstr[1]=='X')
632: result=(double)(unsigned long)strtol(cstr, &error_pos, 0);
633: else
1.102 parser 634: result=(double)strtod(cstr+1/*skip leading 0*/, &error_pos);
1.89 parser 635: else
1.99 parser 636: result=(double)strtod(cstr, &error_pos);
1.89 parser 637:
1.159 paf 638: while(char c=*error_pos++)
639: if(!isspace(c))
640: throw Exception("number.format",
1.172.2.21.2. (paf 641:): String* (this),
1.159 paf 642: "invalid number (double)");
1.89 parser 643:
644: return result;
645: }
1.172.2.10 paf 646: int String::as_int() {
1.89 parser 647: int result;
1.172.2.4 paf 648: char buf[MAX_STRING];
649: if(size()>MAX_STRING-1)
650: throw Exception("number.format",
1.172.2.21.2. (paf 651:): String* (this),
1.172.2.4 paf 652: "invalid number too long a string (%u>%u)", size(), MAX_STRING-1);
1.172.2.5 paf 653: char *eol=store_to(buf, String::UL_AS_IS); *eol=0;
1.172.2.11 paf 654: const char* cstr=buf;
1.172.2.4 paf 655:
1.161 paf 656: while(*cstr && isspace(*cstr))
657: cstr++;
658: if(!*cstr)
1.162 paf 659: return 0;
1.161 paf 660:
1.102 parser 661: char *error_pos;
1.89 parser 662: // 0xABC
1.99 parser 663: if(cstr[0]=='0')
664: if(cstr[1]=='x' || cstr[1]=='X')
665: result=(int)(unsigned long)strtol(cstr, &error_pos, 0);
666: else
1.102 parser 667: result=(int)strtol(cstr+1/*skip leading 0*/, &error_pos, 0);
1.89 parser 668: else
669: result=(int)strtol(cstr, &error_pos, 0);
670:
1.159 paf 671: while(char c=*error_pos++)
672: if(!isspace(c))
673: throw Exception("number.format",
1.172.2.21.2. (paf 674:): String* (this),
1.159 paf 675: "invalid number (int)");
1.82 parser 676:
677: return result;
1.61 paf 678: }
1.113 parser 679:
1.172.2.4 paf 680: inline void uint2uchars(uint word, uchar *bytes) {
681: bytes[0]=word&0xFF;
682: bytes[1]=(word>>8)&0xFF;
683: bytes[2]=(word>>16)&0xFF;
684: bytes[3]=(word>>24)&0xFF;
685: }
686: inline uint uchars2uint(uchar *bytes) {
687: return bytes[3]<<24
688: | bytes[2]<<16
689: | bytes[1]<<8
690: | bytes[0];
691: }
692:
1.172.2.21.2. (paf 693:): void String::serializesize_t prolog_size, char *& buf, size_t& buf_size) const {
1.113 parser 694: buf_size=
695: prolog_size
1.172.2.18 paf 696: +fused*(sizeof(String_UL)+sizeof(size_t))
1.113 parser 697: +size();
1.172.2.21.2. (paf 698:): buf=new char[buf_size];
1.172.2.4 paf 699: char *cur=buf+prolog_size;
1.113 parser 700:
1.172.2.4 paf 701: STRING_FOREACH_FRAGMENT(
1.123 paf 702: // lang
1.172.2.4 paf 703: memcpy(cur, &fragment->lang, sizeof(fragment->lang));
704: cur+=sizeof(fragment->lang);
1.123 paf 705: // size
1.172.2.4 paf 706: // bug on some sparc platform [you can't work with integers on odd pointers]
707: // forces us to use byte array instead
708: uchar bytes[4];
709: uint2uchars(fragment->size, bytes);
710: memcpy(cur, &bytes, sizeof(bytes)); cur+=sizeof(bytes);
1.123 paf 711: // bytes
1.172.2.4 paf 712: memcpy(cur, fragment->ptr, fragment->size);
713: cur+=fragment->size;
1.123 paf 714: );
1.113 parser 715: }
1.172.2.11 paf 716: bool String::deserialize(size_t prolog_size, void *buf, size_t buf_size, const char* file) {
1.135 paf 717: if(buf_size<=prolog_size)
1.148 paf 718: return false;
1.135 paf 719:
1.126 paf 720: char *cur=(char *)buf+prolog_size;
1.113 parser 721: buf_size-=prolog_size;
722:
723: while(buf_size) {
1.172.2.18 paf 724: if(sizeof(String_UL)+sizeof(size_t)>buf_size) // lang+size
1.148 paf 725: return false;
726:
1.172.2.6 paf 727: String_UL lang=*(String_UL *)(cur);
1.172.2.4 paf 728: size_t size=uchars2uint((uchar *)cur);
1.128 paf 729:
1.172.2.18 paf 730: size_t piece_size=sizeof(String_UL)+sizeof(size_t)+size;
1.148 paf 731: if(piece_size>buf_size) // buffer overrun, can be on incomplete cache files
732: return false;
733:
1.172.2.18 paf 734: const char* ptr=(const char*)(cur+sizeof(String_UL)+sizeof(size_t));
1.126 paf 735: APPEND(ptr, size, lang, file, 0);
1.113 parser 736:
737: cur+=piece_size;
738: buf_size-=piece_size;
739: }
1.148 paf 740: return true;
1.113 parser 741: }
E-mail: