Annotation of parser3/src/main/pa_string.C, revision 1.172.2.7
1.45 paf 1: /** @file
1.55 paf 2: Parser: string class. @see untasize_t.C.
1.46 paf 3:
1.172 paf 4: Copyright (c) 2001, 2003 ArtLebedev Group (http://www.artlebedev.com)
1.138 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.164 paf 6: */
1.46 paf 7:
1.172.2.7! paf 8: static const char* IDENT_STRING_C="$Date: 2003/01/27 15:07:48 $";
1.4 paf 9:
1.70 paf 10: #include "pcre.h"
11:
1.12 paf 12: #include "pa_string.h"
1.22 paf 13: #include "pa_exception.h"
1.60 paf 14: #include "pa_array.h"
1.61 paf 15: #include "pa_table.h"
1.101 parser 16: #include "pa_dictionary.h"
1.132 paf 17: #include "pa_charset.h"
1.60 paf 18:
1.172.2.2 paf 19: // helpers
1.139 paf 20:
1.172.2.2 paf 21: /// String::match uses this as replace & global search table columns
1.139 paf 22:
1.172.2.4 paf 23: const int MAX_MATCH_GROUPS=100;
24:
25: class String_match_table_template_columns: public Array<ConstStringPtr> {
26: Pool pool;
1.172.2.2 paf 27: public:
1.172.2.4 paf 28: String_match_table_template_columns() {
29: *this+=ConstStringPtr(new String("prematch"));
30: *this+=ConstStringPtr(new String("match"));
31: *this+=ConstStringPtr(new String("postmatch"));
32: for(int i=0; i<MAX_MATCH_GROUPS; i++) {
33: char *cname=new(pool) char[3/*strlen("100")*/+1/*terminating 0*/];
34: ConstStringPtr sname(new String(cname, sprintf(cname, "%d", 1+i)));
35: *this+=sname;
1.172.2.2 paf 36: }
37: }
1.172.2.4 paf 38: };
39:
40: Table string_match_table_template(
1.172.2.7! paf 41: ConstStringPtrZero,
1.172.2.4 paf 42: Table::columns_type(new String_match_table_template_columns));
1.172.2.2 paf 43:
44: // methods
45:
1.172.2.4 paf 46: String::String(const char *src, size_t src_size, bool tainted): Array<String_fragment>(1), fsize(0) {
1.41 paf 47: if(src)
1.75 paf 48: if(tainted)
49: APPEND_TAINTED(src, src_size, 0, 0);
1.41 paf 50: else
1.75 paf 51: APPEND_CLEAN(src, src_size, 0, 0);
1.1 paf 52: }
1.140 paf 53:
1.172.2.4 paf 54: String::String(const String& src): Array<String_fragment>(src.count()) {
1.169 paf 55: append(src, UL_PASS_APPENDED);
1.120 paf 56: }
1.28 paf 57:
1.13 paf 58: String& String::real_append(STRING_APPEND_PARAMS) {
1.9 paf 59: if(!src)
60: return *this;
1.26 paf 61: if(!size)
62: size=strlen(src);
63: if(!size)
1.9 paf 64: return *this;
1.122 paf 65:
1.172.2.2 paf 66: if(is_full())
67: expand(fdelta);
1.1 paf 68:
1.172.2.4 paf 69: String_fragment *fragment=&felements[fused++];
70: fragment->ptr=src;
71: fragment->size=size;
72: fragment->lang=lang;
1.13 paf 73: #ifndef NO_STRING_ORIGIN
1.172.2.4 paf 74: fragment->origin.file=file;
75: fragment->origin.line=line;
1.13 paf 76: #endif
1.1 paf 77:
78: return *this;
79: }
80:
1.16 paf 81: uint String::hash_code() const {
1.7 paf 82: uint result=0;
1.172.2.4 paf 83: STRING_FOREACH_FRAGMENT(
84: result=generic_hash_code(result, fragment->ptr, fragment->size);
1.123 paf 85: );
1.5 paf 86: return result;
87: }
88:
1.60 paf 89: /// @todo move 'lang' skipping to pos
90: int String::cmp(int& partial, const String& src,
91: size_t this_offset, Untaint_lang lang) const {
1.59 paf 92: partial=-1;
1.125 paf 93: size_t a_size=size();
94: this_offset=min(this_offset, a_size-1);
1.55 paf 95:
1.172.2.4 paf 96: const String_fragment *a_current=felements;
97: const String_fragment *b_current=src.felements;
1.55 paf 98: size_t a_offset=this_offset;
99: size_t b_offset=0;
1.172.2.4 paf 100: String_fragment *a_end=felements+fused;
101: String_fragment *b_end=src.felements+src.fused;
1.116 paf 102: int result;
1.60 paf 103: size_t pos=0;
1.33 paf 104:
1.172.2.4 paf 105: bool a_break;
106: bool b_break;
107: while(!(
108: (a_break=(a_current==a_end))
109: || (b_break=(b_current==b_end)))) {
110: if(pos+a_current->size > this_offset) {
1.172.2.6 paf 111: if(lang!=UL_UNSPECIFIED && a_current->lang>(String_UL)lang)
1.60 paf 112: return -1; // wrong lang -- bail out
113:
1.55 paf 114: int size_diff=
1.172.2.4 paf 115: (a_current->size-a_offset)-
116: (b_current->size-b_offset);
1.55 paf 117:
118: if(size_diff==0) { // a has same size as b
1.172.2.4 paf 119: result=memcmp(a_current->ptr+a_offset, b_current->ptr+b_offset,
120: a_current->size-a_offset);
1.55 paf 121: if(result)
122: return result;
1.172.2.4 paf 123: pos+=a_current->size;
124: a_current++; a_offset=0;
125: b_current++; b_offset=0;
126: } else if(size_diff>0) { // a longer
127: result=memcmp(a_current->ptr+a_offset, b_current->ptr+b_offset,
128: b_current->size-b_offset);
1.55 paf 129: if(result)
130: return result;
1.172.2.4 paf 131: a_offset+=b_current->size-b_offset;
132: b_current++; b_offset=0;
1.55 paf 133: } else { // b longer
1.172.2.4 paf 134: result=memcmp(a_current->ptr+a_offset, b_current->ptr+b_offset,
135: a_current->size-a_offset);
1.55 paf 136: if(result)
137: return result;
1.172.2.4 paf 138: b_offset+=a_current->size-a_offset;
139: pos+=a_current->size;
140: a_current++; a_offset=0;
1.55 paf 141: }
142: } else {
1.172.2.4 paf 143: a_offset-=a_current->size;
144: pos+=a_current->size;
145: a_current++;
1.9 paf 146: }
1.27 paf 147: }
1.55 paf 148: if(a_break==b_break) { // ended simultaneously
149: partial=0; return 0;
150: } else if(a_break) { // first bytes equal, but a ended before b
151: partial=1; return -1;
152: } else {
153: partial=2; return +1;
154: }
1.27 paf 155: }
156:
1.60 paf 157: /// @todo move 'lang' skipping to pos
1.59 paf 158: int String::cmp(int& partial, const char* b_ptr, size_t src_size,
1.60 paf 159: size_t this_offset, Untaint_lang lang) const {
1.59 paf 160: partial=-1;
1.125 paf 161: size_t a_size=size();
1.50 paf 162: size_t b_size=src_size?src_size:b_ptr?strlen(b_ptr):0;
1.125 paf 163: this_offset=min(this_offset, a_size-1);
1.27 paf 164:
1.172.2.4 paf 165: const String_fragment *a_current=felements;
1.59 paf 166: size_t a_offset=this_offset;
1.55 paf 167: size_t b_offset=0;
1.172.2.4 paf 168: String_fragment *a_end=felements+fused;
1.60 paf 169: size_t pos=0;
1.52 paf 170:
1.172.2.4 paf 171: bool a_break;
1.83 parser 172: bool b_break=b_size==0;
1.172.2.4 paf 173: while(!(
174: (a_break=(a_current==a_end))
175: || b_break)) {
176: if(pos+a_current->size > this_offset) {
1.172.2.6 paf 177: if(lang!=UL_UNSPECIFIED && a_current->lang>(String_UL)lang)
1.60 paf 178: return -1; // wrong lang -- bail out
179:
1.59 paf 180: int size_diff=
1.172.2.4 paf 181: (a_current->size-a_offset)-
1.59 paf 182: (b_size-b_offset);
183:
184: if(size_diff==0) { // a has same size as b
1.172.2.4 paf 185: if(int result=memcmp(a_current->ptr+a_offset, b_ptr+b_offset,
186: a_current->size-a_offset)!=0)
1.59 paf 187: return result;
1.172.2.4 paf 188: pos+=a_current->size;
189: a_current++; a_offset=0;
1.59 paf 190: b_break=true;
191: } else if (size_diff>0) { // a longer
1.172.2.4 paf 192: if(int result=memcmp(a_current->ptr+a_offset, b_ptr+b_offset,
1.59 paf 193: b_size-b_offset)!=0)
194: return result;
195: a_offset+=b_size-b_offset;
196: b_break=true;
197: } else { // b longer
1.172.2.4 paf 198: if(int result=memcmp(a_current->ptr+a_offset, b_ptr+b_offset,
199: a_current->size-a_offset)!=0)
1.59 paf 200: return result;
1.172.2.4 paf 201: b_offset+=a_current->size-a_offset;
202: pos+=a_current->size;
203: a_current++; a_offset=0;
1.59 paf 204: }
205: } else {
1.172.2.4 paf 206: a_offset-=a_current->size;
207: pos+=a_current->size;
208: a_current++;
1.9 paf 209: }
210: }
1.55 paf 211: if(a_break==b_break) { // ended simultaneously
212: partial=0; return 0;
213: } else if(a_break) { // first bytes equal, but a ended before b
214: partial=1; return -1;
215: } else {
216: partial=2; return +1;
217: }
1.5 paf 218: }
1.46 paf 219:
220: #ifndef NO_STRING_ORIGIN
1.172.2.4 paf 221: const String_fragment::Origin& String::origin() const {
1.140 paf 222: if(is_empty()) {
1.172.2.4 paf 223: static const String_fragment::Origin empty_origin={"empty string"};
1.96 parser 224: return empty_origin;
225: }
1.46 paf 226:
1.147 paf 227: // determining origin by first piece or last appended piece
228: // because any of them can be constant=without origin:
1.50 paf 229: // ex: ^load[/file] "document_root" + "/file"
1.80 paf 230: // when last peice is constant,
231: // ex: parser_root_auto_path{dynamic} / auto.p{const}
232: // using first piece
1.172.2.4 paf 233: String_fragment::Origin& first_origin=felements[0].origin;
234: return first_origin.file ? first_origin : felements[fused-1].origin;
1.46 paf 235: }
236: #endif
1.53 paf 237:
1.172.2.3 paf 238: StringPtr String::mid(size_t start, size_t finish) const {
239: StringPtr result(new String());
1.107 parser 240:
1.166 paf 241: start=min(start, size());
1.167 paf 242: finish=max(start, finish);
1.60 paf 243: if(start==finish)
1.107 parser 244: return result;
1.53 paf 245:
246: size_t pos=0;
1.172.2.4 paf 247: STRING_FOREACH_FRAGMENT(
248: size_t item_finish=pos+fragment->size;
1.123 paf 249: if(item_finish > start) { // started now or already?
1.172.2.4 paf 250: bool started=result->is_empty(); // started now?
1.123 paf 251: bool finished=finish <= item_finish; // finished now?
252: size_t offset=started?start-pos:0;
1.172.2.4 paf 253: size_t size=finished?finish-pos:fragment->size;
254: result->APPEND(
255: fragment->ptr+offset, size-offset,
256: fragment->lang,
257: fragment->origin.file, fragment->origin.line);
1.123 paf 258: if(finished)
1.53 paf 259: goto break2;
260: }
1.172.2.4 paf 261: pos+=fragment->size;
1.123 paf 262: );
1.53 paf 263: break2:
1.60 paf 264: // SAPI::log(pool(), "piece of '%s' from %d to %d is '%s'",
265: //cstr(), start, finish, result.cstr());
1.53 paf 266: return result;
1.54 paf 267: }
268:
1.60 paf 269: int String::pos(const String& substr,
1.116 paf 270: int result, Untaint_lang lang) const {
1.125 paf 271: size_t self_size=size();
1.131 paf 272: for(; size_t(result)<self_size; result++) {
1.60 paf 273: int partial; cmp(partial, substr, result, lang);
1.58 paf 274: if(
275: partial==0 || // full match
276: partial==2) // 'substr' starts 'this'+'result'
277: return result;
278: }
279:
280: return -1;
281: }
282:
1.60 paf 283: int String::pos(const char *substr, size_t substr_size,
1.116 paf 284: int result, Untaint_lang lang) const {
1.125 paf 285: size_t self_size=size();
1.131 paf 286: for(; size_t(result)<self_size; result++) {
1.60 paf 287: int partial; cmp(partial, substr, substr_size, result, lang);
1.55 paf 288: if(
289: partial==0 || // full match
290: partial==2) // 'substr' starts 'this'+'result'
291: return result;
292: }
293:
294: return -1;
1.60 paf 295: }
296:
1.172.2.4 paf 297: void String::split(Array<ConstStringPtr>& result,
1.60 paf 298: size_t* pos_after_ref,
299: const char *delim, size_t delim_size,
300: Untaint_lang lang, int limit) const {
1.125 paf 301: size_t self_size=size();
1.60 paf 302: if(delim_size) {
303: size_t pos_after=pos_after_ref?*pos_after_ref:0;
304: int pos_before;
305: // while we have 'delim'...
306: for(; (pos_before=pos(delim, delim_size, pos_after, lang))>=0 && limit; limit--) {
1.172.2.4 paf 307: result+=mid(pos_after, pos_before);
1.60 paf 308: pos_after=pos_before+delim_size;
309: }
310: // last piece
1.124 paf 311: if(pos_after<self_size && limit) {
1.172.2.4 paf 312: result+=mid(pos_after, self_size);
1.124 paf 313: pos_after=self_size;
1.60 paf 314: }
315: if(pos_after_ref)
316: *pos_after_ref=pos_after;
317: } else { // empty delim
1.172.2.4 paf 318: result+=ConstStringPtr(this);
1.60 paf 319: if(pos_after_ref)
1.124 paf 320: *pos_after_ref+=self_size;
1.60 paf 321: }
322: }
323:
1.172.2.4 paf 324: void String::split(Array<ConstStringPtr>& result,
1.60 paf 325: size_t* pos_after_ref,
326: const String& delim, Untaint_lang lang,
327: int limit) const {
1.140 paf 328: if(!delim.is_empty()) {
1.60 paf 329: size_t pos_after=pos_after_ref?*pos_after_ref:0;
330: int pos_before;
331: // while we have 'delim'...
332: for(; (pos_before=pos(delim, pos_after, lang))>=0 && limit; limit--) {
1.172.2.4 paf 333: result+=mid(pos_after, pos_before);
1.60 paf 334: pos_after=pos_before+delim.size();
335: }
336: // last piece
337: if(pos_after<size() && limit) {
1.172.2.4 paf 338: result+=mid(pos_after, size());
1.60 paf 339: pos_after=size();
340: }
341: if(pos_after_ref)
342: *pos_after_ref=pos_after;
343: } else { // empty delim
1.172.2.4 paf 344: result+=ConstStringPtr(this);
1.60 paf 345: if(pos_after_ref)
346: *pos_after_ref+=size();
347: }
1.61 paf 348: }
349:
1.172.2.4 paf 350: static void regex_options(ConstStringPtr options, int *result, bool& need_pre_post_match){
1.63 paf 351: struct Regex_option {
1.153 paf 352: const char *keyL;
353: const char *keyU;
1.63 paf 354: int clear, set;
355: int *result;
1.154 paf 356: bool *flag;
1.63 paf 357: } regex_option[]={
1.153 paf 358: {"i", "I", 0, PCRE_CASELESS, result}, // a=A
359: {"s", "S", 0, PCRE_DOTALL, result}, // \n\n$ [default]
360: {"x", "U", 0, PCRE_EXTENDED, result}, // whitespace in regex ignored
361: {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$
362: {"g", "G", 0, true, result+1}, // many rows
1.154 paf 363: {"'", 0, 0, 0, 0, &need_pre_post_match},
364: {0}
1.63 paf 365: };
1.171 paf 366: result[0]=PCRE_EXTRA | PCRE_DOTALL | PCRE_DOLLAR_ENDONLY;
1.63 paf 367: result[1]=0;
368:
369: if(options)
1.153 paf 370: for(Regex_option *o=regex_option; o->keyL; o++)
1.154 paf 371: if(options->pos(o->keyL)>=0
372: || (o->keyU && options->pos(o->keyU)>=0)) {
373: if(o->flag)
374: *o->flag=true;
375: else { // result
376: *o->result &= ~o->clear;
377: *o->result |= o->set;
378: }
1.63 paf 379: }
380: }
381:
1.172.2.4 paf 382: TablePtr String::match(Charset& source_charset,
383: ConstStringPtr aorigin,
384: const String& regexp,
385: ConstStringPtr options,
386: Row_action row_action, void *info,
387: bool *was_global) const {
1.64 paf 388:
1.140 paf 389: if(regexp.is_empty())
1.149 paf 390: throw Exception(0,
1.73 paf 391: aorigin,
392: "regexp is empty");
1.154 paf 393:
1.172.2.4 paf 394: CharPtr pattern=regexp.cstr();
1.62 paf 395: const char *errptr;
396: int erroffset;
1.154 paf 397: bool need_pre_post_match=false;
398: int option_bits[2]; regex_options(options, option_bits, need_pre_post_match);
1.95 parser 399: if(was_global)
400: *was_global=option_bits[1]!=0;
1.172.2.4 paf 401: pcre *code=pcre_compile(pattern.get(), option_bits[0],
1.62 paf 402: &errptr, &erroffset,
1.172.2.1 paf 403: source_charset.pcre_tables);
1.62 paf 404:
1.67 paf 405: if(!code)
1.149 paf 406: throw Exception(0,
1.172.2.4 paf 407: regexp.mid(erroffset, regexp.size()),
1.74 paf 408: "regular expression syntax error - %s", errptr);
1.62 paf 409:
1.63 paf 410: int info_substrings=pcre_info(code, 0, 0);
411: if(info_substrings<0) {
1.100 parser 412: pcre_free(code);
1.149 paf 413: throw Exception(0,
1.73 paf 414: aorigin,
1.76 paf 415: "pcre_info error (%d)",
1.73 paf 416: info_substrings);
1.63 paf 417: }
418:
1.172.2.4 paf 419: CharPtr subject=cstr();
420: int length=size();
421: const int ovecsize=(1/*match*/+MAX_MATCH_GROUPS)*3;
1.155 paf 422: int ovector[ovecsize];
423:
424: // create table
1.172.2.4 paf 425: TablePtr result(new Table(string_match_table_template));
1.63 paf 426:
1.64 paf 427: int exec_option_bits=0;
1.154 paf 428: int prestart=0;
429: int poststart=0;
430: int postfinish=size();
1.63 paf 431: while(true) {
432: int exec_substrings=pcre_exec(code, 0,
1.172.2.4 paf 433: subject.get(), length, prestart,
1.64 paf 434: exec_option_bits, ovector, ovecsize);
1.63 paf 435:
436: if(exec_substrings==PCRE_ERROR_NOMATCH) {
1.100 parser 437: pcre_free(code);
1.172.2.4 paf 438: row_action(*result.get(), 0/*last time, no raw*/, 0, 0, poststart, postfinish, info);
439: if(option_bits[1])
440: return result; // global=true+result
441: else
442: return TablePtr(0);// not global=no result
1.63 paf 443: }
444:
445: if(exec_substrings<0) {
1.100 parser 446: pcre_free(code);
1.149 paf 447: throw Exception(0,
1.63 paf 448: aorigin,
1.76 paf 449: "regular expression execute error (%d)",
1.63 paf 450: exec_substrings);
451: }
452:
1.154 paf 453: int prefinish=ovector[0];
454: poststart=ovector[1];
1.172.2.4 paf 455: object_ptr<Array<ConstStringPtr> > row;
456: if(need_pre_post_match) {
457: *row+=mid(0, prefinish); // .prematch column value
458: *row+=mid(prefinish, poststart); // .match
459: *row+=mid(poststart, postfinish); // .postmatch
460: } else {
461: *row+=ConstStringPtrZero; // .prematch column value
462: *row+=ConstStringPtrZero; // .match
463: *row+=ConstStringPtrZero; // .postmatch
464: }
1.63 paf 465:
466: for(int i=1; i<exec_substrings; i++) {
1.69 paf 467: // -1:-1 case handled peacefully by mid() itself
1.172.2.4 paf 468: *row+=mid(ovector[i*2+0], ovector[i*2+1]); // .i column value
1.63 paf 469: }
470:
1.172.2.4 paf 471: row_action(*result, row.get(), prestart, prefinish, poststart, postfinish, info);
1.63 paf 472:
1.154 paf 473: if(!option_bits[1] || prestart==poststart) { // not global | going to hang
1.100 parser 474: pcre_free(code);
1.172.2.4 paf 475: row_action(*result, 0/*last time, no row*/, 0, 0, poststart, postfinish, info);
476: return result;
1.63 paf 477: }
1.154 paf 478: prestart=poststart;
1.63 paf 479:
480: /*
481: if(option_bits[0] & PCRE_MULTILINE)
1.64 paf 482: exec_option_bits|=PCRE_NOTBOL; // start of subject+startoffset not BOL
1.63 paf 483: */
484: }
1.82 parser 485: }
486:
1.172.2.4 paf 487: StringPtr String::change_case(Pool& pool, Charset& source_charset, Change_case_kind kind) const {
488: StringPtr result(new String());
489:
1.172.2.1 paf 490: const unsigned char *tables=source_charset.pcre_tables;
1.82 parser 491:
492: const unsigned char *a;
493: const unsigned char *b;
494: switch(kind) {
495: case CC_UPPER:
496: a=tables+lcc_offset;
497: b=tables+fcc_offset;
498: break;
499: case CC_LOWER:
500: a=tables+lcc_offset;
501: b=0;
502: break;
503: default:
1.149 paf 504: throw Exception(0,
1.172.2.4 paf 505: ConstStringPtr(this),
1.82 parser 506: "unknown change case kind #%d",
507: static_cast<int>(kind)); // never
508: a=b=0; // calm, compiler
509: break; // never
510: }
511:
1.172.2.4 paf 512: STRING_FOREACH_FRAGMENT(
513: char *new_cstr=new(pool) char[fragment->size];
1.143 paf 514: char *dest=new_cstr;
1.172.2.4 paf 515: const char *end=fragment->ptr+fragment->size;
516: for(const char *current=fragment->ptr; current<end; current++) {
517: unsigned char c=a[(unsigned char)*current];
1.143 paf 518: if(b)
519: c=b[c];
1.82 parser 520:
1.143 paf 521: *dest++=(char)c;
1.82 parser 522: }
1.143 paf 523:
1.172.2.4 paf 524: result->APPEND(new_cstr, fragment->size,
525: fragment->lang,
526: fragment->origin.file, fragment->origin.line);
1.143 paf 527: );
1.89 parser 528:
1.101 parser 529: return result;
530: }
531:
1.150 paf 532: /// @test if in some piece were found no dict words, append it, not it's duplicate
1.172.2.4 paf 533: StringPtr String::replace(Pool& pool, Dictionary& dict) const {
534: StringPtr result(new String());
535: CharPtr lcstr(cstr());
536: const char *current=lcstr.get();
1.170 paf 537:
1.172.2.4 paf 538: STRING_FOREACH_FRAGMENT(
1.170 paf 539: IFNDEF_NO_STRING_ORIGIN(
1.172.2.4 paf 540: const char *joined_origin_file=fragment->origin.file;
541: const size_t joined_origin_line=fragment->origin.line;
1.170 paf 542: );
1.172.2.4 paf 543: uchar joined_lang=fragment->lang;
1.170 paf 544: const char *joined_ptr=current;
545: // calc size
546: size_t joined_size=0;
1.172.2.4 paf 547: STRING_PREPARED_FOREACH_FRAGMENT(
548: if(fragment->lang==joined_lang)
549: joined_size+=fragment->size;
1.170 paf 550: else
551: break; // before non-ours
552: );
553: current+=joined_size;
554:
555: // pointers are after joined piece
1.172.2.4 paf 556: // & one step back, see STRING_PREPARED_FOREACH_FRAGMENT
557: --fragment;
1.170 paf 558:
1.172.2.4 paf 559: char *new_cstr=new(pool) char[(size_t)ceil(joined_size*dict.max_ratio())];
1.123 paf 560: char *dest=new_cstr;
1.170 paf 561: while(joined_size) {
1.172.2.4 paf 562: // there is a fragment where first column starts 'joined_ptr'
563: if(Table::element_type row=dict.first_that_starts(joined_ptr, joined_size)) {
1.123 paf 564: // get a=>b values
1.172.2.4 paf 565: ConstStringPtr a=row->get(0);
566: ConstStringPtr b=row->get(1);
1.170 paf 567: // skip 'a' in 'joined_ptr' && reduce work size
1.172.2.4 paf 568: joined_ptr+=a->size(); joined_size-=a->size();
1.123 paf 569: // write 'b' to 'dest' && skip 'b' in 'dest'
1.172.2.5 paf 570: b->store_to(dest, String::UL_AS_IS); dest+=b->size();
1.123 paf 571: } else {
572: // write a char to b && reduce work size
1.170 paf 573: *dest++=*joined_ptr++; joined_size--;
1.101 parser 574: }
575: }
576:
1.172.2.4 paf 577: result->APPEND(new_cstr, dest-new_cstr, joined_lang,
1.170 paf 578: joined_origin_file, joined_origin_line);
1.156 paf 579: );
1.170 paf 580:
1.156 paf 581: return result;
582: }
583:
1.172.2.4 paf 584: StringPtr String::join_chains(Pool& pool) const {
585: StringPtr result(new String());
586:
587: char *pooled_buf=new(pool) char[cstr_bufsize()];
1.172.2.5 paf 588: store_to(pooled_buf, String::UL_AS_IS);
1.172.2.4 paf 589: const char *current=pooled_buf;
1.156 paf 590:
1.172.2.4 paf 591: STRING_FOREACH_FRAGMENT(
1.156 paf 592: IFNDEF_NO_STRING_ORIGIN(
1.172.2.4 paf 593: const char *joined_origin_file=fragment->origin.file;
594: const size_t joined_origin_line=fragment->origin.line;
1.156 paf 595: );
1.172.2.4 paf 596: uchar joined_lang=fragment->lang;
1.156 paf 597: const char *joined_ptr=current;
598: // calc size
599: size_t joined_size=0;
1.172.2.4 paf 600: STRING_PREPARED_FOREACH_FRAGMENT(
601: if(fragment->lang==joined_lang)
602: joined_size+=fragment->size;
1.156 paf 603: else
604: break; // before non-ours
605: );
606: current+=joined_size;
607:
608: // pointers are after joined piece
1.172.2.4 paf 609: // & one step back, see STRING_PREPARED_FOREACH_FRAGMENT
610: --fragment;
1.156 paf 611:
1.172.2.4 paf 612: result->APPEND(joined_ptr, joined_size, joined_lang,
1.150 paf 613: joined_origin_file, joined_origin_line);
1.123 paf 614: );
1.156 paf 615:
1.89 parser 616: return result;
617: }
618:
1.90 parser 619: double String::as_double() const {
1.89 parser 620: double result;
1.172.2.4 paf 621: char buf[MAX_STRING];
622: if(size()>MAX_STRING-1)
623: throw Exception("number.format",
624: ConstStringPtr(this),
625: "invalid number too long a string (%u>%u)", size(), MAX_STRING-1);
1.172.2.5 paf 626: char *eol=store_to(buf, String::UL_AS_IS); *eol=0;
1.172.2.4 paf 627: const char *cstr=buf;
628:
1.161 paf 629: while(*cstr && isspace(*cstr))
630: cstr++;
631: if(!*cstr)
1.162 paf 632: return 0;
1.161 paf 633:
1.102 parser 634: char *error_pos;
1.89 parser 635: // 0xABC
1.99 parser 636: if(cstr[0]=='0')
637: if(cstr[1]=='x' || cstr[1]=='X')
638: result=(double)(unsigned long)strtol(cstr, &error_pos, 0);
639: else
1.102 parser 640: result=(double)strtod(cstr+1/*skip leading 0*/, &error_pos);
1.89 parser 641: else
1.99 parser 642: result=(double)strtod(cstr, &error_pos);
1.89 parser 643:
1.159 paf 644: while(char c=*error_pos++)
645: if(!isspace(c))
646: throw Exception("number.format",
1.172.2.4 paf 647: ConstStringPtr(this),
1.159 paf 648: "invalid number (double)");
1.89 parser 649:
650: return result;
651: }
1.90 parser 652: int String::as_int() const {
1.89 parser 653: int result;
1.172.2.4 paf 654: char buf[MAX_STRING];
655: if(size()>MAX_STRING-1)
656: throw Exception("number.format",
657: ConstStringPtr(this),
658: "invalid number too long a string (%u>%u)", size(), MAX_STRING-1);
1.172.2.5 paf 659: char *eol=store_to(buf, String::UL_AS_IS); *eol=0;
1.172.2.4 paf 660: const char *cstr=buf;
661:
1.161 paf 662: while(*cstr && isspace(*cstr))
663: cstr++;
664: if(!*cstr)
1.162 paf 665: return 0;
1.161 paf 666:
1.102 parser 667: char *error_pos;
1.89 parser 668: // 0xABC
1.99 parser 669: if(cstr[0]=='0')
670: if(cstr[1]=='x' || cstr[1]=='X')
671: result=(int)(unsigned long)strtol(cstr, &error_pos, 0);
672: else
1.102 parser 673: result=(int)strtol(cstr+1/*skip leading 0*/, &error_pos, 0);
1.89 parser 674: else
675: result=(int)strtol(cstr, &error_pos, 0);
676:
1.159 paf 677: while(char c=*error_pos++)
678: if(!isspace(c))
679: throw Exception("number.format",
1.172.2.4 paf 680: ConstStringPtr(this),
1.159 paf 681: "invalid number (int)");
1.82 parser 682:
683: return result;
1.61 paf 684: }
1.113 parser 685:
1.172.2.4 paf 686: inline void uint2uchars(uint word, uchar *bytes) {
687: bytes[0]=word&0xFF;
688: bytes[1]=(word>>8)&0xFF;
689: bytes[2]=(word>>16)&0xFF;
690: bytes[3]=(word>>24)&0xFF;
691: }
692: inline uint uchars2uint(uchar *bytes) {
693: return bytes[3]<<24
694: | bytes[2]<<16
695: | bytes[1]<<8
696: | bytes[0];
697: }
698:
699: void String::serialize(Pool& pool, size_t prolog_size, char *& buf, size_t& buf_size) const {
1.113 parser 700: buf_size=
701: prolog_size
1.172.2.4 paf 702: +fused*(sizeof(uchar)+sizeof(size_t))
1.113 parser 703: +size();
1.172.2.4 paf 704: buf=new(pool) char[buf_size];
705: char *cur=buf+prolog_size;
1.113 parser 706:
1.172.2.4 paf 707: STRING_FOREACH_FRAGMENT(
1.123 paf 708: // lang
1.172.2.4 paf 709: memcpy(cur, &fragment->lang, sizeof(fragment->lang));
710: cur+=sizeof(fragment->lang);
1.123 paf 711: // size
1.172.2.4 paf 712: // bug on some sparc platform [you can't work with integers on odd pointers]
713: // forces us to use byte array instead
714: uchar bytes[4];
715: uint2uchars(fragment->size, bytes);
716: memcpy(cur, &bytes, sizeof(bytes)); cur+=sizeof(bytes);
1.123 paf 717: // bytes
1.172.2.4 paf 718: memcpy(cur, fragment->ptr, fragment->size);
719: cur+=fragment->size;
1.123 paf 720: );
1.113 parser 721: }
1.148 paf 722: bool String::deserialize(size_t prolog_size, void *buf, size_t buf_size, const char *file) {
1.135 paf 723: if(buf_size<=prolog_size)
1.148 paf 724: return false;
1.135 paf 725:
1.126 paf 726: char *cur=(char *)buf+prolog_size;
1.113 parser 727: buf_size-=prolog_size;
728:
729: while(buf_size) {
1.172.2.4 paf 730: if(sizeof(uchar)+sizeof(size_t)>buf_size) // lang+size
1.148 paf 731: return false;
732:
1.172.2.6 paf 733: String_UL lang=*(String_UL *)(cur);
1.172.2.4 paf 734: size_t size=uchars2uint((uchar *)cur);
1.128 paf 735:
1.172.2.4 paf 736: size_t piece_size=sizeof(uchar)+sizeof(size_t)+size;
1.148 paf 737: if(piece_size>buf_size) // buffer overrun, can be on incomplete cache files
738: return false;
739:
1.172.2.4 paf 740: const char *ptr=(const char*)(cur+sizeof(uchar)+sizeof(size_t));
1.126 paf 741: APPEND(ptr, size, lang, file, 0);
1.113 parser 742:
743: cur+=piece_size;
744: buf_size-=piece_size;
745: }
1.148 paf 746: return true;
1.113 parser 747: }
E-mail: