Annotation of parser3/src/main/pa_string.C, revision 1.172.2.21.2.1
1.45 paf 1: /** @file
1.55 paf 2: Parser: string class. @see untasize_t.C.
1.46 paf 3:
1.172.2.11 paf 4: Copyright (c) 2001-2003 ArtLebedev Group (http://www.artlebedev.com)
1.138 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.164 paf 6: */
1.46 paf 7:
1.172.2.21.2. (paf 8:): static const char* IDENT_STRING_C="$Date: 2003/03/18 11:54:56 $";
1.4 paf 9:
1.70 paf 10: #include "pcre.h"
11:
1.12 paf 12: #include "pa_string.h"
1.22 paf 13: #include "pa_exception.h"
1.61 paf 14: #include "pa_table.h"
1.101 parser 15: #include "pa_dictionary.h"
1.132 paf 16: #include "pa_charset.h"
1.60 paf 17:
1.172.2.2 paf 18: // helpers
1.139 paf 19:
1.172.2.2 paf 20: /// String::match uses this as replace & global search table columns
1.139 paf 21:
1.172.2.4 paf 22: const int MAX_MATCH_GROUPS=100;
23:
1.172.2.14 paf 24: class String_match_table_template_columns: public ArrayString {
1.172.2.2 paf 25: public:
1.172.2.4 paf 26: String_match_table_template_columns() {
1.172.2.21.2. (paf 27:): *this+=new String("prematch");
28:): *this+=new String("match");
29:): *this+=new String("postmatch");
1.172.2.4 paf 30: for(int i=0; i<MAX_MATCH_GROUPS; i++) {
1.172.2.21.2. (paf 31:): char *cname=new char[3/*strlen("100")*/+1/*terminating 0*/];
32:): *this+=new String(cname, sprintf(cname, "%d", 1+i));
1.172.2.2 paf 33: }
1.172.2.21 paf 34: }
1.172.2.4 paf 35: };
36:
37: Table string_match_table_template(
1.172.2.21.2. (paf 38:): Exception::undefined_source,
39:): new String_match_table_template_columns);
1.172.2.2 paf 40:
41: // methods
42:
1.172.2.11 paf 43: String::String(const char* src, size_t src_size, bool tainted): Array<String_fragment>(1), fsize(0) {
1.41 paf 44: if(src)
1.75 paf 45: if(tainted)
46: APPEND_TAINTED(src, src_size, 0, 0);
1.41 paf 47: else
1.75 paf 48: APPEND_CLEAN(src, src_size, 0, 0);
1.1 paf 49: }
1.140 paf 50:
1.172.2.4 paf 51: String::String(const String& src): Array<String_fragment>(src.count()) {
1.169 paf 52: append(src, UL_PASS_APPENDED);
1.120 paf 53: }
1.28 paf 54:
1.13 paf 55: String& String::real_append(STRING_APPEND_PARAMS) {
1.9 paf 56: if(!src)
57: return *this;
1.26 paf 58: if(!size)
59: size=strlen(src);
60: if(!size)
1.9 paf 61: return *this;
1.122 paf 62:
1.172.2.2 paf 63: if(is_full())
64: expand(fdelta);
1.1 paf 65:
1.172.2.4 paf 66: String_fragment *fragment=&felements[fused++];
67: fragment->ptr=src;
68: fragment->size=size;
69: fragment->lang=lang;
1.13 paf 70: #ifndef NO_STRING_ORIGIN
1.172.2.4 paf 71: fragment->origin.file=file;
72: fragment->origin.line=line;
1.13 paf 73: #endif
1.172.2.16 paf 74:
75: fsize+=size;
1.1 paf 76:
77: return *this;
78: }
79:
1.16 paf 80: uint String::hash_code() const {
1.7 paf 81: uint result=0;
1.172.2.4 paf 82: STRING_FOREACH_FRAGMENT(
83: result=generic_hash_code(result, fragment->ptr, fragment->size);
1.123 paf 84: );
1.5 paf 85: return result;
86: }
87:
1.60 paf 88: /// @todo move 'lang' skipping to pos
89: int String::cmp(int& partial, const String& src,
90: size_t this_offset, Untaint_lang lang) const {
1.59 paf 91: partial=-1;
1.125 paf 92: size_t a_size=size();
93: this_offset=min(this_offset, a_size-1);
1.55 paf 94:
1.172.2.4 paf 95: const String_fragment *a_current=felements;
96: const String_fragment *b_current=src.felements;
1.55 paf 97: size_t a_offset=this_offset;
98: size_t b_offset=0;
1.172.2.4 paf 99: String_fragment *a_end=felements+fused;
100: String_fragment *b_end=src.felements+src.fused;
1.116 paf 101: int result;
1.60 paf 102: size_t pos=0;
1.33 paf 103:
1.172.2.4 paf 104: bool a_break;
105: bool b_break;
1.172.2.17 paf 106: while(true) {
107: a_break=(a_current==a_end);
108: b_break=(b_current==b_end);
109: if(a_break || b_break)
110: break;
111:
1.172.2.4 paf 112: if(pos+a_current->size > this_offset) {
1.172.2.6 paf 113: if(lang!=UL_UNSPECIFIED && a_current->lang>(String_UL)lang)
1.60 paf 114: return -1; // wrong lang -- bail out
115:
1.55 paf 116: int size_diff=
1.172.2.4 paf 117: (a_current->size-a_offset)-
118: (b_current->size-b_offset);
1.55 paf 119:
120: if(size_diff==0) { // a has same size as b
1.172.2.4 paf 121: result=memcmp(a_current->ptr+a_offset, b_current->ptr+b_offset,
122: a_current->size-a_offset);
1.55 paf 123: if(result)
124: return result;
1.172.2.4 paf 125: pos+=a_current->size;
126: a_current++; a_offset=0;
127: b_current++; b_offset=0;
128: } else if(size_diff>0) { // a longer
129: result=memcmp(a_current->ptr+a_offset, b_current->ptr+b_offset,
130: b_current->size-b_offset);
1.55 paf 131: if(result)
132: return result;
1.172.2.4 paf 133: a_offset+=b_current->size-b_offset;
134: b_current++; b_offset=0;
1.55 paf 135: } else { // b longer
1.172.2.4 paf 136: result=memcmp(a_current->ptr+a_offset, b_current->ptr+b_offset,
137: a_current->size-a_offset);
1.55 paf 138: if(result)
139: return result;
1.172.2.4 paf 140: b_offset+=a_current->size-a_offset;
141: pos+=a_current->size;
142: a_current++; a_offset=0;
1.55 paf 143: }
144: } else {
1.172.2.4 paf 145: a_offset-=a_current->size;
146: pos+=a_current->size;
147: a_current++;
1.9 paf 148: }
1.27 paf 149: }
1.55 paf 150: if(a_break==b_break) { // ended simultaneously
151: partial=0; return 0;
152: } else if(a_break) { // first bytes equal, but a ended before b
153: partial=1; return -1;
154: } else {
155: partial=2; return +1;
156: }
1.27 paf 157: }
158:
1.60 paf 159: /// @todo move 'lang' skipping to pos
1.59 paf 160: int String::cmp(int& partial, const char* b_ptr, size_t src_size,
1.60 paf 161: size_t this_offset, Untaint_lang lang) const {
1.59 paf 162: partial=-1;
1.125 paf 163: size_t a_size=size();
1.50 paf 164: size_t b_size=src_size?src_size:b_ptr?strlen(b_ptr):0;
1.125 paf 165: this_offset=min(this_offset, a_size-1);
1.27 paf 166:
1.172.2.4 paf 167: const String_fragment *a_current=felements;
1.59 paf 168: size_t a_offset=this_offset;
1.55 paf 169: size_t b_offset=0;
1.172.2.4 paf 170: String_fragment *a_end=felements+fused;
1.60 paf 171: size_t pos=0;
1.52 paf 172:
1.172.2.4 paf 173: bool a_break;
1.83 parser 174: bool b_break=b_size==0;
1.172.2.17 paf 175: while(true) {
176: a_break=(a_current==a_end);
177: if(a_break || b_break)
178: break;
1.172.2.4 paf 179: if(pos+a_current->size > this_offset) {
1.172.2.6 paf 180: if(lang!=UL_UNSPECIFIED && a_current->lang>(String_UL)lang)
1.60 paf 181: return -1; // wrong lang -- bail out
182:
1.59 paf 183: int size_diff=
1.172.2.4 paf 184: (a_current->size-a_offset)-
1.59 paf 185: (b_size-b_offset);
186:
187: if(size_diff==0) { // a has same size as b
1.172.2.4 paf 188: if(int result=memcmp(a_current->ptr+a_offset, b_ptr+b_offset,
189: a_current->size-a_offset)!=0)
1.59 paf 190: return result;
1.172.2.4 paf 191: pos+=a_current->size;
192: a_current++; a_offset=0;
1.59 paf 193: b_break=true;
194: } else if (size_diff>0) { // a longer
1.172.2.4 paf 195: if(int result=memcmp(a_current->ptr+a_offset, b_ptr+b_offset,
1.59 paf 196: b_size-b_offset)!=0)
197: return result;
198: a_offset+=b_size-b_offset;
199: b_break=true;
200: } else { // b longer
1.172.2.4 paf 201: if(int result=memcmp(a_current->ptr+a_offset, b_ptr+b_offset,
202: a_current->size-a_offset)!=0)
1.59 paf 203: return result;
1.172.2.4 paf 204: b_offset+=a_current->size-a_offset;
205: pos+=a_current->size;
206: a_current++; a_offset=0;
1.59 paf 207: }
208: } else {
1.172.2.4 paf 209: a_offset-=a_current->size;
210: pos+=a_current->size;
211: a_current++;
1.9 paf 212: }
213: }
1.55 paf 214: if(a_break==b_break) { // ended simultaneously
215: partial=0; return 0;
216: } else if(a_break) { // first bytes equal, but a ended before b
217: partial=1; return -1;
218: } else {
219: partial=2; return +1;
220: }
1.5 paf 221: }
1.46 paf 222:
223: #ifndef NO_STRING_ORIGIN
1.172.2.4 paf 224: const String_fragment::Origin& String::origin() const {
1.140 paf 225: if(is_empty()) {
1.172.2.4 paf 226: static const String_fragment::Origin empty_origin={"empty string"};
1.96 parser 227: return empty_origin;
228: }
1.46 paf 229:
1.147 paf 230: // determining origin by first piece or last appended piece
231: // because any of them can be constant=without origin:
1.50 paf 232: // ex: ^load[/file] "document_root" + "/file"
1.80 paf 233: // when last peice is constant,
234: // ex: parser_root_auto_path{dynamic} / auto.p{const}
235: // using first piece
1.172.2.4 paf 236: String_fragment::Origin& first_origin=felements[0].origin;
237: return first_origin.file ? first_origin : felements[fused-1].origin;
1.46 paf 238: }
239: #endif
1.53 paf 240:
1.172.2.21.2. (paf 241:): const String& String::mid(size_t start, size_t finish) const {
242:): const String& result(new String());
1.107 parser 243:
1.166 paf 244: start=min(start, size());
1.167 paf 245: finish=max(start, finish);
1.60 paf 246: if(start==finish)
1.107 parser 247: return result;
1.53 paf 248:
249: size_t pos=0;
1.172.2.4 paf 250: STRING_FOREACH_FRAGMENT(
251: size_t item_finish=pos+fragment->size;
1.123 paf 252: if(item_finish > start) { // started now or already?
1.172.2.4 paf 253: bool started=result->is_empty(); // started now?
1.123 paf 254: bool finished=finish <= item_finish; // finished now?
255: size_t offset=started?start-pos:0;
1.172.2.4 paf 256: size_t size=finished?finish-pos:fragment->size;
257: result->APPEND(
258: fragment->ptr+offset, size-offset,
259: fragment->lang,
260: fragment->origin.file, fragment->origin.line);
1.123 paf 261: if(finished)
1.53 paf 262: goto break2;
263: }
1.172.2.4 paf 264: pos+=fragment->size;
1.123 paf 265: );
1.53 paf 266: break2:
1.60 paf 267: // SAPI::log(pool(), "piece of '%s' from %d to %d is '%s'",
268: //cstr(), start, finish, result.cstr());
1.53 paf 269: return result;
1.54 paf 270: }
271:
1.60 paf 272: int String::pos(const String& substr,
1.116 paf 273: int result, Untaint_lang lang) const {
1.125 paf 274: size_t self_size=size();
1.131 paf 275: for(; size_t(result)<self_size; result++) {
1.60 paf 276: int partial; cmp(partial, substr, result, lang);
1.58 paf 277: if(
278: partial==0 || // full match
279: partial==2) // 'substr' starts 'this'+'result'
280: return result;
281: }
282:
283: return -1;
284: }
285:
1.172.2.11 paf 286: int String::pos(const char* substr, size_t substr_size,
1.116 paf 287: int result, Untaint_lang lang) const {
1.125 paf 288: size_t self_size=size();
1.131 paf 289: for(; size_t(result)<self_size; result++) {
1.60 paf 290: int partial; cmp(partial, substr, substr_size, result, lang);
1.55 paf 291: if(
292: partial==0 || // full match
293: partial==2) // 'substr' starts 'this'+'result'
294: return result;
295: }
296:
297: return -1;
1.60 paf 298: }
299:
1.172.2.14 paf 300: void String::split(ArrayString& result,
1.60 paf 301: size_t* pos_after_ref,
1.172.2.11 paf 302: const char* delim, size_t delim_size,
1.172.2.10 paf 303: Untaint_lang lang, int limit) {
1.125 paf 304: size_t self_size=size();
1.60 paf 305: if(delim_size) {
306: size_t pos_after=pos_after_ref?*pos_after_ref:0;
307: int pos_before;
308: // while we have 'delim'...
309: for(; (pos_before=pos(delim, delim_size, pos_after, lang))>=0 && limit; limit--) {
1.172.2.4 paf 310: result+=mid(pos_after, pos_before);
1.60 paf 311: pos_after=pos_before+delim_size;
312: }
313: // last piece
1.124 paf 314: if(pos_after<self_size && limit) {
1.172.2.4 paf 315: result+=mid(pos_after, self_size);
1.124 paf 316: pos_after=self_size;
1.60 paf 317: }
318: if(pos_after_ref)
319: *pos_after_ref=pos_after;
320: } else { // empty delim
1.172.2.21.2. (paf 321:): result+=String* (this);
1.60 paf 322: if(pos_after_ref)
1.124 paf 323: *pos_after_ref+=self_size;
1.60 paf 324: }
325: }
326:
1.172.2.14 paf 327: void String::split(ArrayString& result,
1.60 paf 328: size_t* pos_after_ref,
329: const String& delim, Untaint_lang lang,
1.172.2.10 paf 330: int limit) {
1.140 paf 331: if(!delim.is_empty()) {
1.60 paf 332: size_t pos_after=pos_after_ref?*pos_after_ref:0;
333: int pos_before;
334: // while we have 'delim'...
335: for(; (pos_before=pos(delim, pos_after, lang))>=0 && limit; limit--) {
1.172.2.4 paf 336: result+=mid(pos_after, pos_before);
1.60 paf 337: pos_after=pos_before+delim.size();
338: }
339: // last piece
340: if(pos_after<size() && limit) {
1.172.2.4 paf 341: result+=mid(pos_after, size());
1.60 paf 342: pos_after=size();
343: }
344: if(pos_after_ref)
345: *pos_after_ref=pos_after;
346: } else { // empty delim
1.172.2.21.2. (paf 347:): result+=String* (this);
1.60 paf 348: if(pos_after_ref)
349: *pos_after_ref+=size();
350: }
1.61 paf 351: }
352:
1.172.2.21.2. (paf 353:): static void regex_options(const String& options, int *result, bool& need_pre_post_match){
1.63 paf 354: struct Regex_option {
1.172.2.11 paf 355: const char* keyL;
356: const char* keyU;
1.63 paf 357: int clear, set;
358: int *result;
1.154 paf 359: bool *flag;
1.63 paf 360: } regex_option[]={
1.153 paf 361: {"i", "I", 0, PCRE_CASELESS, result}, // a=A
362: {"s", "S", 0, PCRE_DOTALL, result}, // \n\n$ [default]
363: {"x", "U", 0, PCRE_EXTENDED, result}, // whitespace in regex ignored
364: {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$
365: {"g", "G", 0, true, result+1}, // many rows
1.154 paf 366: {"'", 0, 0, 0, 0, &need_pre_post_match},
367: {0}
1.63 paf 368: };
1.171 paf 369: result[0]=PCRE_EXTRA | PCRE_DOTALL | PCRE_DOLLAR_ENDONLY;
1.63 paf 370: result[1]=0;
371:
372: if(options)
1.153 paf 373: for(Regex_option *o=regex_option; o->keyL; o++)
1.154 paf 374: if(options->pos(o->keyL)>=0
375: || (o->keyU && options->pos(o->keyU)>=0)) {
376: if(o->flag)
377: *o->flag=true;
378: else { // result
379: *o->result &= ~o->clear;
380: *o->result |= o->set;
381: }
1.63 paf 382: }
383: }
384:
1.172.2.21.2. (paf 385:): Table* String::match(Charset& source_charset,
386:): const String& aorigin,
1.172.2.4 paf 387: const String& regexp,
1.172.2.21.2. (paf 388:): const String& options,
1.172.2.4 paf 389: Row_action row_action, void *info,
1.172.2.20 paf 390: bool& just_matched) const {
1.140 paf 391: if(regexp.is_empty())
1.149 paf 392: throw Exception(0,
1.73 paf 393: aorigin,
394: "regexp is empty");
1.154 paf 395:
1.172.2.21.2. (paf 396:): const char* pattern=regexp.cstr();
1.172.2.11 paf 397: const char* errptr;
1.62 paf 398: int erroffset;
1.154 paf 399: bool need_pre_post_match=false;
400: int option_bits[2]; regex_options(options, option_bits, need_pre_post_match);
1.172.2.20 paf 401: bool global=option_bits[1]!=0;
1.172.2.4 paf 402: pcre *code=pcre_compile(pattern.get(), option_bits[0],
1.62 paf 403: &errptr, &erroffset,
1.172.2.1 paf 404: source_charset.pcre_tables);
1.62 paf 405:
1.67 paf 406: if(!code)
1.149 paf 407: throw Exception(0,
1.172.2.4 paf 408: regexp.mid(erroffset, regexp.size()),
1.74 paf 409: "regular expression syntax error - %s", errptr);
1.62 paf 410:
1.172.2.20 paf 411: int subpatterns=pcre_info(code, 0, 0);
412: if(subpatterns<0) {
1.100 parser 413: pcre_free(code);
1.149 paf 414: throw Exception(0,
1.73 paf 415: aorigin,
1.76 paf 416: "pcre_info error (%d)",
1.172.2.20 paf 417: subpatterns);
1.63 paf 418: }
419:
1.172.2.21.2. (paf 420:): const char* subject=cstr();
1.172.2.4 paf 421: int length=size();
422: const int ovecsize=(1/*match*/+MAX_MATCH_GROUPS)*3;
1.155 paf 423: int ovector[ovecsize];
424:
425: // create table
1.172.2.21.2. (paf 426:): Table* table(new Table(string_match_table_template));
1.63 paf 427:
1.64 paf 428: int exec_option_bits=0;
1.154 paf 429: int prestart=0;
430: int poststart=0;
431: int postfinish=size();
1.63 paf 432: while(true) {
433: int exec_substrings=pcre_exec(code, 0,
1.172.2.4 paf 434: subject.get(), length, prestart,
1.64 paf 435: exec_option_bits, ovector, ovecsize);
1.63 paf 436:
437: if(exec_substrings==PCRE_ERROR_NOMATCH) {
1.100 parser 438: pcre_free(code);
1.172.2.21.2. (paf 439:): row_action(table, Array0/*last time, no raw*/, 0, 0, poststart, postfinish, info);
1.172.2.20 paf 440: if(global || subpatterns)
441: return table; // global or with subpatterns=true+result
442: else {
1.172.2.21.2. (paf 443:): just_matched=false; return Table*(0); // not global=no result
1.172.2.20 paf 444: }
1.63 paf 445: }
446:
447: if(exec_substrings<0) {
1.100 parser 448: pcre_free(code);
1.149 paf 449: throw Exception(0,
1.63 paf 450: aorigin,
1.76 paf 451: "regular expression execute error (%d)",
1.63 paf 452: exec_substrings);
453: }
454:
1.154 paf 455: int prefinish=ovector[0];
456: poststart=ovector[1];
1.172.2.14 paf 457: object_ptr<ArrayString> row(new ArrayString);
1.172.2.4 paf 458: if(need_pre_post_match) {
459: *row+=mid(0, prefinish); // .prematch column value
460: *row+=mid(prefinish, poststart); // .match
461: *row+=mid(poststart, postfinish); // .postmatch
462: } else {
1.172.2.21.2. (paf 463:): *row+=0; // .prematch column value
464:): *row+=0; // .match
465:): *row+=0; // .postmatch
1.172.2.4 paf 466: }
1.63 paf 467:
468: for(int i=1; i<exec_substrings; i++) {
1.69 paf 469: // -1:-1 case handled peacefully by mid() itself
1.172.2.4 paf 470: *row+=mid(ovector[i*2+0], ovector[i*2+1]); // .i column value
1.63 paf 471: }
472:
1.172.2.20 paf 473: row_action(table, row, prestart, prefinish, poststart, postfinish, info);
1.63 paf 474:
1.172.2.20 paf 475: if(!global || prestart==poststart) { // not global | going to hang
1.100 parser 476: pcre_free(code);
1.172.2.21.2. (paf 477:): row_action(table, Array0/*last time, no row*/, 0, 0, poststart, postfinish, info);
1.172.2.20 paf 478: return table;
1.63 paf 479: }
1.154 paf 480: prestart=poststart;
1.63 paf 481:
482: /*
483: if(option_bits[0] & PCRE_MULTILINE)
1.64 paf 484: exec_option_bits|=PCRE_NOTBOL; // start of subject+startoffset not BOL
1.63 paf 485: */
486: }
1.82 parser 487: }
488:
1.172.2.21.2. (paf 489:): const String& String::change_caseCharset& source_charset, Change_case_kind kind) {
490:): const String& result(new String());
1.172.2.4 paf 491:
1.172.2.1 paf 492: const unsigned char *tables=source_charset.pcre_tables;
1.82 parser 493:
494: const unsigned char *a;
495: const unsigned char *b;
496: switch(kind) {
497: case CC_UPPER:
498: a=tables+lcc_offset;
499: b=tables+fcc_offset;
500: break;
501: case CC_LOWER:
502: a=tables+lcc_offset;
503: b=0;
504: break;
505: default:
1.149 paf 506: throw Exception(0,
1.172.2.21.2. (paf 507:): String* (this),
1.82 parser 508: "unknown change case kind #%d",
509: static_cast<int>(kind)); // never
510: a=b=0; // calm, compiler
511: break; // never
512: }
513:
1.172.2.4 paf 514: STRING_FOREACH_FRAGMENT(
1.172.2.21.2. (paf 515:): char *new_cstr=new char[fragment->size];
1.143 paf 516: char *dest=new_cstr;
1.172.2.11 paf 517: const char* end=fragment->ptr+fragment->size;
518: for(const char* current=fragment->ptr; current<end; current++) {
1.172.2.4 paf 519: unsigned char c=a[(unsigned char)*current];
1.143 paf 520: if(b)
521: c=b[c];
1.82 parser 522:
1.143 paf 523: *dest++=(char)c;
1.82 parser 524: }
1.143 paf 525:
1.172.2.4 paf 526: result->APPEND(new_cstr, fragment->size,
527: fragment->lang,
528: fragment->origin.file, fragment->origin.line);
1.143 paf 529: );
1.89 parser 530:
1.101 parser 531: return result;
532: }
533:
1.150 paf 534: /// @test if in some piece were found no dict words, append it, not it's duplicate
1.172.2.21.2. (paf 535:): const String& String::replaceconst Dictionary& dict) const {
536:): const String& result(new String());
537:): const char* lcstr(cstr());
1.172.2.11 paf 538: const char* current=lcstr.get();
1.170 paf 539:
1.172.2.4 paf 540: STRING_FOREACH_FRAGMENT(
1.170 paf 541: IFNDEF_NO_STRING_ORIGIN(
1.172.2.11 paf 542: const char* joined_origin_file=fragment->origin.file;
1.172.2.4 paf 543: const size_t joined_origin_line=fragment->origin.line;
1.170 paf 544: );
1.172.2.18 paf 545: String_UL joined_lang=fragment->lang;
1.172.2.11 paf 546: const char* joined_ptr=current;
1.170 paf 547: // calc size
548: size_t joined_size=0;
1.172.2.4 paf 549: STRING_PREPARED_FOREACH_FRAGMENT(
550: if(fragment->lang==joined_lang)
551: joined_size+=fragment->size;
1.170 paf 552: else
553: break; // before non-ours
554: );
555: current+=joined_size;
556:
557: // pointers are after joined piece
1.172.2.4 paf 558: // & one step back, see STRING_PREPARED_FOREACH_FRAGMENT
559: --fragment;
1.170 paf 560:
1.172.2.21.2. (paf 561:): char *new_cstr=new char[(size_t)ceil(joined_size*dict.max_ratio())];
1.123 paf 562: char *dest=new_cstr;
1.170 paf 563: while(joined_size) {
1.172.2.4 paf 564: // there is a fragment where first column starts 'joined_ptr'
565: if(Table::element_type row=dict.first_that_starts(joined_ptr, joined_size)) {
1.123 paf 566: // get a=>b values
1.172.2.21.2. (paf 567:): const String& a=row->get(0);
568:): const String& b=row->get(1);
1.170 paf 569: // skip 'a' in 'joined_ptr' && reduce work size
1.172.2.4 paf 570: joined_ptr+=a->size(); joined_size-=a->size();
1.123 paf 571: // write 'b' to 'dest' && skip 'b' in 'dest'
1.172.2.5 paf 572: b->store_to(dest, String::UL_AS_IS); dest+=b->size();
1.123 paf 573: } else {
574: // write a char to b && reduce work size
1.170 paf 575: *dest++=*joined_ptr++; joined_size--;
1.101 parser 576: }
577: }
578:
1.172.2.4 paf 579: result->APPEND(new_cstr, dest-new_cstr, joined_lang,
1.170 paf 580: joined_origin_file, joined_origin_line);
1.156 paf 581: );
1.170 paf 582:
1.156 paf 583: return result;
584: }
585:
1.172.2.21.2. (paf 586:): const String& String::join_chains() const {
587:): const String& result(new String());
1.172.2.4 paf 588:
1.172.2.21.2. (paf 589:): char *pooled_buf=new char[cstr_bufsize()];
1.172.2.5 paf 590: store_to(pooled_buf, String::UL_AS_IS);
1.172.2.11 paf 591: const char* current=pooled_buf;
1.156 paf 592:
1.172.2.4 paf 593: STRING_FOREACH_FRAGMENT(
1.156 paf 594: IFNDEF_NO_STRING_ORIGIN(
1.172.2.11 paf 595: const char* joined_origin_file=fragment->origin.file;
1.172.2.4 paf 596: const size_t joined_origin_line=fragment->origin.line;
1.156 paf 597: );
1.172.2.18 paf 598: String_UL joined_lang=fragment->lang;
1.172.2.11 paf 599: const char* joined_ptr=current;
1.156 paf 600: // calc size
601: size_t joined_size=0;
1.172.2.4 paf 602: STRING_PREPARED_FOREACH_FRAGMENT(
603: if(fragment->lang==joined_lang)
604: joined_size+=fragment->size;
1.156 paf 605: else
606: break; // before non-ours
607: );
608: current+=joined_size;
609:
610: // pointers are after joined piece
1.172.2.4 paf 611: // & one step back, see STRING_PREPARED_FOREACH_FRAGMENT
612: --fragment;
1.156 paf 613:
1.172.2.4 paf 614: result->APPEND(joined_ptr, joined_size, joined_lang,
1.150 paf 615: joined_origin_file, joined_origin_line);
1.123 paf 616: );
1.156 paf 617:
1.89 parser 618: return result;
619: }
620:
1.172.2.10 paf 621: double String::as_double() {
1.89 parser 622: double result;
1.172.2.4 paf 623: char buf[MAX_STRING];
624: if(size()>MAX_STRING-1)
625: throw Exception("number.format",
1.172.2.21.2. (paf 626:): String* (this),
1.172.2.4 paf 627: "invalid number too long a string (%u>%u)", size(), MAX_STRING-1);
1.172.2.5 paf 628: char *eol=store_to(buf, String::UL_AS_IS); *eol=0;
1.172.2.11 paf 629: const char* cstr=buf;
1.172.2.4 paf 630:
1.161 paf 631: while(*cstr && isspace(*cstr))
632: cstr++;
633: if(!*cstr)
1.162 paf 634: return 0;
1.161 paf 635:
1.102 parser 636: char *error_pos;
1.89 parser 637: // 0xABC
1.99 parser 638: if(cstr[0]=='0')
639: if(cstr[1]=='x' || cstr[1]=='X')
640: result=(double)(unsigned long)strtol(cstr, &error_pos, 0);
641: else
1.102 parser 642: result=(double)strtod(cstr+1/*skip leading 0*/, &error_pos);
1.89 parser 643: else
1.99 parser 644: result=(double)strtod(cstr, &error_pos);
1.89 parser 645:
1.159 paf 646: while(char c=*error_pos++)
647: if(!isspace(c))
648: throw Exception("number.format",
1.172.2.21.2. (paf 649:): String* (this),
1.159 paf 650: "invalid number (double)");
1.89 parser 651:
652: return result;
653: }
1.172.2.10 paf 654: int String::as_int() {
1.89 parser 655: int result;
1.172.2.4 paf 656: char buf[MAX_STRING];
657: if(size()>MAX_STRING-1)
658: throw Exception("number.format",
1.172.2.21.2. (paf 659:): String* (this),
1.172.2.4 paf 660: "invalid number too long a string (%u>%u)", size(), MAX_STRING-1);
1.172.2.5 paf 661: char *eol=store_to(buf, String::UL_AS_IS); *eol=0;
1.172.2.11 paf 662: const char* cstr=buf;
1.172.2.4 paf 663:
1.161 paf 664: while(*cstr && isspace(*cstr))
665: cstr++;
666: if(!*cstr)
1.162 paf 667: return 0;
1.161 paf 668:
1.102 parser 669: char *error_pos;
1.89 parser 670: // 0xABC
1.99 parser 671: if(cstr[0]=='0')
672: if(cstr[1]=='x' || cstr[1]=='X')
673: result=(int)(unsigned long)strtol(cstr, &error_pos, 0);
674: else
1.102 parser 675: result=(int)strtol(cstr+1/*skip leading 0*/, &error_pos, 0);
1.89 parser 676: else
677: result=(int)strtol(cstr, &error_pos, 0);
678:
1.159 paf 679: while(char c=*error_pos++)
680: if(!isspace(c))
681: throw Exception("number.format",
1.172.2.21.2. (paf 682:): String* (this),
1.159 paf 683: "invalid number (int)");
1.82 parser 684:
685: return result;
1.61 paf 686: }
1.113 parser 687:
1.172.2.4 paf 688: inline void uint2uchars(uint word, uchar *bytes) {
689: bytes[0]=word&0xFF;
690: bytes[1]=(word>>8)&0xFF;
691: bytes[2]=(word>>16)&0xFF;
692: bytes[3]=(word>>24)&0xFF;
693: }
694: inline uint uchars2uint(uchar *bytes) {
695: return bytes[3]<<24
696: | bytes[2]<<16
697: | bytes[1]<<8
698: | bytes[0];
699: }
700:
1.172.2.21.2. (paf 701:): void String::serializesize_t prolog_size, char *& buf, size_t& buf_size) const {
1.113 parser 702: buf_size=
703: prolog_size
1.172.2.18 paf 704: +fused*(sizeof(String_UL)+sizeof(size_t))
1.113 parser 705: +size();
1.172.2.21.2. (paf 706:): buf=new char[buf_size];
1.172.2.4 paf 707: char *cur=buf+prolog_size;
1.113 parser 708:
1.172.2.4 paf 709: STRING_FOREACH_FRAGMENT(
1.123 paf 710: // lang
1.172.2.4 paf 711: memcpy(cur, &fragment->lang, sizeof(fragment->lang));
712: cur+=sizeof(fragment->lang);
1.123 paf 713: // size
1.172.2.4 paf 714: // bug on some sparc platform [you can't work with integers on odd pointers]
715: // forces us to use byte array instead
716: uchar bytes[4];
717: uint2uchars(fragment->size, bytes);
718: memcpy(cur, &bytes, sizeof(bytes)); cur+=sizeof(bytes);
1.123 paf 719: // bytes
1.172.2.4 paf 720: memcpy(cur, fragment->ptr, fragment->size);
721: cur+=fragment->size;
1.123 paf 722: );
1.113 parser 723: }
1.172.2.11 paf 724: bool String::deserialize(size_t prolog_size, void *buf, size_t buf_size, const char* file) {
1.135 paf 725: if(buf_size<=prolog_size)
1.148 paf 726: return false;
1.135 paf 727:
1.126 paf 728: char *cur=(char *)buf+prolog_size;
1.113 parser 729: buf_size-=prolog_size;
730:
731: while(buf_size) {
1.172.2.18 paf 732: if(sizeof(String_UL)+sizeof(size_t)>buf_size) // lang+size
1.148 paf 733: return false;
734:
1.172.2.6 paf 735: String_UL lang=*(String_UL *)(cur);
1.172.2.4 paf 736: size_t size=uchars2uint((uchar *)cur);
1.128 paf 737:
1.172.2.18 paf 738: size_t piece_size=sizeof(String_UL)+sizeof(size_t)+size;
1.148 paf 739: if(piece_size>buf_size) // buffer overrun, can be on incomplete cache files
740: return false;
741:
1.172.2.18 paf 742: const char* ptr=(const char*)(cur+sizeof(String_UL)+sizeof(size_t));
1.126 paf 743: APPEND(ptr, size, lang, file, 0);
1.113 parser 744:
745: cur+=piece_size;
746: buf_size-=piece_size;
747: }
1.148 paf 748: return true;
1.113 parser 749: }
E-mail: