Annotation of parser3/src/main/pa_string.C, revision 1.95
1.45 paf 1: /** @file
1.55 paf 2: Parser: string class. @see untasize_t.C.
1.46 paf 3:
1.36 paf 4: Copyright (c) 2001 ArtLebedev Group (http://www.artlebedev.com)
1.46 paf 5:
1.37 paf 6: Author: Alexander Petrosyan <paf@design.ru> (http://design.ru/paf)
1.4 paf 7: */
1.95 ! parser 8: static const char *RCSId="$Id: pa_string.C,v 1.94 2001/07/18 10:06:04 parser Exp $";
1.4 paf 9:
1.48 paf 10: #include "pa_config_includes.h"
1.1 paf 11:
1.70 paf 12: #include "pcre.h"
1.82 parser 13: #include "internal.h"
1.70 paf 14:
1.13 paf 15: #include "pa_pool.h"
1.12 paf 16: #include "pa_string.h"
1.5 paf 17: #include "pa_hash.h"
1.22 paf 18: #include "pa_exception.h"
1.53 paf 19: #include "pa_common.h"
1.60 paf 20: #include "pa_array.h"
21: #include "pa_globals.h"
1.61 paf 22: #include "pa_table.h"
1.60 paf 23:
1.75 paf 24: String::String(Pool& apool, const char *src, size_t src_size, bool tainted) :
1.94 parser 25: Pooled(apool),
26: forigins_mode(false) {
1.28 paf 27: last_chunk=&head;
28: head.count=CR_PREALLOCATED_COUNT;
1.5 paf 29: append_here=head.rows;
1.2 paf 30: head.preallocated_link=0;
1.28 paf 31: link_row=&head.rows[head.count];
1.8 paf 32: fused_rows=fsize=0;
1.41 paf 33:
34: if(src)
1.75 paf 35: if(tainted)
36: APPEND_TAINTED(src, src_size, 0, 0);
1.41 paf 37: else
1.75 paf 38: APPEND_CLEAN(src, src_size, 0, 0);
1.1 paf 39: }
40:
1.94 parser 41: String::String(const String& src) :
42: Pooled(src.pool()),
43: forigins_mode(false) {
1.8 paf 44: head.count=CR_PREALLOCATED_COUNT;
45:
1.55 paf 46: size_t src_used_rows=src.fused_rows;
1.8 paf 47: if(src_used_rows<=head.count) {
1.55 paf 48: // all new rows fit size_to preallocated area
49: size_t curr_chunk_rows=head.count;
1.8 paf 50: memcpy(head.rows, src.head.rows, sizeof(Chunk::Row)*src_used_rows);
51: append_here=&head.rows[src_used_rows];
52: link_row=&head.rows[curr_chunk_rows];
53: } else {
54: // warning:
1.10 paf 55: // heavily relies on the fact
56: // "preallocated area is the same for all strings"
1.8 paf 57: //
58: // info:
59: // allocating only enough mem to fit src string rows
60: // next append would allocate a new chunk
61: //
1.55 paf 62: // new rows don't fit size_to preallocated area: splitting size_to two chunks
1.8 paf 63: // preallocated chunk src to constructing head
64: memcpy(head.rows, src.head.rows, sizeof(Chunk::Row)*head.count);
1.55 paf 65: // remaining rows size_to new_chunk
66: size_t curr_chunk_rows=src_used_rows-head.count;
1.8 paf 67: Chunk *new_chunk=static_cast<Chunk *>(
1.55 paf 68: malloc(sizeof(size_t)+sizeof(Chunk::Row)*curr_chunk_rows+sizeof(Chunk *)));
1.8 paf 69: new_chunk->count=curr_chunk_rows;
70: head.preallocated_link=new_chunk;
1.28 paf 71: append_here=link_row=&new_chunk->rows[new_chunk->count];
1.8 paf 72:
73: Chunk *old_chunk=src.head.preallocated_link;
74: Chunk::Row *new_rows=new_chunk->rows;
1.55 paf 75: size_t rows_left_to_copy=new_chunk->count;
1.8 paf 76: while(true) {
1.55 paf 77: size_t old_count=old_chunk->count;
1.8 paf 78: Chunk *next_chunk=old_chunk->rows[old_count].link;
79: if(next_chunk) {
80: // not last source chunk
81: // taking it all
82: memcpy(new_rows, old_chunk->rows, sizeof(Chunk::Row)*old_count);
83: new_rows+=old_count;
84: rows_left_to_copy-=old_count;
85:
86: old_chunk=next_chunk;
87: } else {
88: // the last source chunk
89: // taking only those rows of chunk that _left_to_copy
90: memcpy(new_rows, old_chunk->rows, sizeof(Chunk::Row)*rows_left_to_copy);
91: break;
92: }
93: }
1.5 paf 94: }
1.8 paf 95: link_row->link=0;
96: fused_rows=src_used_rows;
97: fsize=src.fsize;
1.94 parser 98: }
99:
100: void String::expand() {
101: size_t new_chunk_count=last_chunk->count+CR_GROW_COUNT;
102: last_chunk=static_cast<Chunk *>(
103: malloc(sizeof(size_t)+sizeof(Chunk::Row)*new_chunk_count+sizeof(Chunk *)));
104: last_chunk->count=new_chunk_count;
105: link_row->link=last_chunk;
106: append_here=last_chunk->rows;
107: link_row=&last_chunk->rows[last_chunk->count];
108: link_row->link=0;
1.5 paf 109: }
1.28 paf 110:
1.42 paf 111: String& String::append(const String& src, Untaint_lang lang, bool forced) {
1.60 paf 112: const Chunk *chunk=&src.head;
1.40 paf 113: do {
1.60 paf 114: const Chunk::Row *row=chunk->rows;
115: for(size_t i=0; i<chunk->count; i++, row++) {
116: if(row==src.append_here)
1.40 paf 117: goto break2;
1.60 paf 118:
119: APPEND(row->item.ptr, row->item.size,
120: (lang!=UL_PASS_APPENDED && (row->item.lang==UL_TAINTED || forced))?lang:row->item.lang,
121: row->item.origin.file, row->item.origin.line);
1.40 paf 122: }
123: chunk=row->link;
124: } while(chunk);
125: break2:
1.60 paf 126: return *this;
1.34 paf 127: }
1.60 paf 128:
1.13 paf 129: String& String::real_append(STRING_APPEND_PARAMS) {
1.9 paf 130: if(!src)
131: return *this;
1.26 paf 132: if(!size)
133: size=strlen(src);
134: if(!size)
1.9 paf 135: return *this;
136:
1.1 paf 137: if(chunk_is_full())
138: expand();
139:
140: append_here->item.ptr=src;
1.26 paf 141: fsize+=append_here->item.size=size;
1.52 paf 142: append_here->item.lang=lang;
1.13 paf 143: #ifndef NO_STRING_ORIGIN
1.14 paf 144: append_here->item.origin.file=file;
145: append_here->item.origin.line=line;
1.13 paf 146: #endif
1.8 paf 147: append_here++; fused_rows++;
1.1 paf 148:
149: return *this;
150: }
151:
1.16 paf 152: uint String::hash_code() const {
1.7 paf 153: uint result=0;
1.5 paf 154:
1.16 paf 155: const Chunk *chunk=&head;
1.5 paf 156: do {
1.16 paf 157: const Chunk::Row *row=chunk->rows;
1.55 paf 158: for(size_t i=0; i<chunk->count; i++) {
1.5 paf 159: if(row==append_here)
160: goto break2;
161:
1.6 paf 162: result=Hash::generic_code(result, row->item.ptr, row->item.size);
1.5 paf 163: row++;
164: }
165: chunk=row->link;
166: } while(chunk);
167: break2:
168: return result;
169: }
170:
1.60 paf 171: /// @todo move 'lang' skipping to pos
172: int String::cmp(int& partial, const String& src,
173: size_t this_offset, Untaint_lang lang) const {
1.59 paf 174: partial=-1;
1.55 paf 175: this_offset=min(this_offset, size()-1);
176:
1.16 paf 177: const Chunk *a_chunk=&head;
178: const Chunk *b_chunk=&src.head;
179: const Chunk::Row *a_row=a_chunk->rows;
180: const Chunk::Row *b_row=b_chunk->rows;
1.55 paf 181: size_t a_offset=this_offset;
182: size_t b_offset=0;
1.9 paf 183: Chunk::Row *a_end=append_here;
184: Chunk::Row *b_end=src.append_here;
1.55 paf 185: size_t a_countdown=a_chunk->count;
186: size_t b_countdown=b_chunk->count;
187: size_t result;
1.60 paf 188: size_t pos=0;
1.33 paf 189:
1.83 parser 190: bool a_break=size()==0;
1.91 parser 191: bool b_break=src.size()==0;
1.83 parser 192: if(!(a_break || b_break)) while(true) {
1.55 paf 193: if(pos+a_row->item.size > this_offset) {
1.71 paf 194: if(lang!=UL_UNSPECIFIED && a_row->item.lang!=lang)
1.60 paf 195: return -1; // wrong lang -- bail out
196:
1.55 paf 197: int size_diff=
198: (a_row->item.size-a_offset)-
199: (b_row->item.size-b_offset);
200:
201: if(size_diff==0) { // a has same size as b
1.60 paf 202: result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset,
203: a_row->item.size-a_offset);
1.55 paf 204: if(result)
205: return result;
1.60 paf 206: pos+=a_row->item.size;
1.55 paf 207: a_row++; a_countdown--; a_offset=0;
208: b_row++; b_countdown--; b_offset=0;
209: } else if (size_diff>0) { // a longer
1.60 paf 210: result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset,
211: b_row->item.size-b_offset);
1.55 paf 212: if(result)
213: return result;
214: a_offset+=b_row->item.size-b_offset;
215: b_row++; b_countdown--; b_offset=0;
216: } else { // b longer
1.60 paf 217: result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset,
218: a_row->item.size-a_offset);
1.55 paf 219: if(result)
220: return result;
221: b_offset+=a_row->item.size-a_offset;
1.60 paf 222: pos+=a_row->item.size;
1.55 paf 223: a_row++; a_countdown--; a_offset=0;
224: }
1.83 parser 225: if(b_break=b_row==b_end) {
226: a_break=a_row==a_end;
227: break;
228: }
1.55 paf 229: if(!b_countdown) {
230: b_chunk=b_row->link;
231: b_row=b_chunk->rows;
232: b_countdown=b_chunk->count;
233: }
234: } else {
1.60 paf 235: a_offset-=a_row->item.size;
236: pos+=a_row->item.size;
237: a_row++; a_countdown--;
1.9 paf 238: }
239:
1.83 parser 240: if(a_break=a_row==a_end) {
241: b_break=b_row==b_end;
242: break;
243: }
1.11 paf 244: if(!a_countdown) {
1.9 paf 245: a_chunk=a_row->link;
246: a_row=a_chunk->rows;
1.11 paf 247: a_countdown=a_chunk->count;
1.9 paf 248: }
1.27 paf 249: }
1.55 paf 250: if(a_break==b_break) { // ended simultaneously
251: partial=0; return 0;
252: } else if(a_break) { // first bytes equal, but a ended before b
253: partial=1; return -1;
254: } else {
255: partial=2; return +1;
256: }
1.27 paf 257: }
258:
1.60 paf 259: /// @todo move 'lang' skipping to pos
1.59 paf 260: int String::cmp(int& partial, const char* b_ptr, size_t src_size,
1.60 paf 261: size_t this_offset, Untaint_lang lang) const {
1.59 paf 262: partial=-1;
1.50 paf 263: size_t b_size=src_size?src_size:b_ptr?strlen(b_ptr):0;
1.59 paf 264: this_offset=min(this_offset, size()-1);
1.27 paf 265:
266: const Chunk *a_chunk=&head;
267: const Chunk::Row *a_row=a_chunk->rows;
1.59 paf 268: size_t a_offset=this_offset;
1.55 paf 269: size_t b_offset=0;
1.27 paf 270: Chunk::Row *a_end=append_here;
1.55 paf 271: size_t a_countdown=a_chunk->count;
1.60 paf 272: size_t pos=0;
1.52 paf 273:
1.83 parser 274: bool a_break=size()==0;
275: bool b_break=b_size==0;
276: if(!(a_break || b_break)) while(true) {
1.59 paf 277: if(pos+a_row->item.size > this_offset) {
1.71 paf 278: if(lang!=UL_UNSPECIFIED && a_row->item.lang!=lang)
1.60 paf 279: return -1; // wrong lang -- bail out
280:
1.59 paf 281: int size_diff=
282: (a_row->item.size-a_offset)-
283: (b_size-b_offset);
284:
285: if(size_diff==0) { // a has same size as b
286: if(size_t result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset,
287: a_row->item.size-a_offset)!=0)
288: return result;
1.60 paf 289: pos+=a_row->item.size;
1.59 paf 290: a_row++; a_countdown--; a_offset=0;
291: b_break=true;
292: } else if (size_diff>0) { // a longer
293: if(size_t result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset,
294: b_size-b_offset)!=0)
295: return result;
296: a_offset+=b_size-b_offset;
297: b_break=true;
298: } else { // b longer
299: if(size_t result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset,
300: a_row->item.size-a_offset)!=0)
301: return result;
302: b_offset+=a_row->item.size-a_offset;
1.60 paf 303: pos+=a_row->item.size;
1.59 paf 304: a_row++; a_countdown--; a_offset=0;
305: }
306: } else {
1.60 paf 307: a_offset-=a_row->item.size;
308: pos+=a_row->item.size;
309: a_row++; a_countdown--;
1.27 paf 310: }
311:
1.86 parser 312: a_break=a_row==a_end;
313: if(a_break || b_break)
1.83 parser 314: break;
1.27 paf 315: if(!a_countdown) {
316: a_chunk=a_row->link;
317: a_row=a_chunk->rows;
318: a_countdown=a_chunk->count;
1.9 paf 319: }
320: }
1.55 paf 321: if(a_break==b_break) { // ended simultaneously
322: partial=0; return 0;
323: } else if(a_break) { // first bytes equal, but a ended before b
324: partial=1; return -1;
325: } else {
326: partial=2; return +1;
327: }
1.5 paf 328: }
1.46 paf 329:
330: #ifndef NO_STRING_ORIGIN
331: const Origin& String::origin() const {
332: if(!fused_rows)
333: THROW(0, 0,
1.50 paf 334: 0,
335: "String::origin() of empty string called");
1.46 paf 336:
1.49 paf 337: // determining origin by last appended piece
1.50 paf 338: // because first one frequently constant.
339: // ex: ^load[/file] "document_root" + "/file"
1.80 paf 340: // when last peice is constant,
341: // ex: parser_root_auto_path{dynamic} / auto.p{const}
342: // using first piece
343: Origin& last_origin=append_here[-1].item.origin;
344: return last_origin.file ? last_origin : head.rows[0].item.origin;
1.46 paf 345: }
346: #endif
1.53 paf 347:
1.69 paf 348: String& String::mid(size_t start, size_t finish) const {
1.53 paf 349: start=max(0, start);
350: finish=min(size(), finish);
1.60 paf 351: if(start==finish)
352: return *empty_string;
1.53 paf 353:
354: String& result=*NEW String(pool());
355:
356: size_t pos=0;
357: const Chunk *chunk=&head;
358: do {
359: const Chunk::Row *row=chunk->rows;
1.55 paf 360: for(size_t i=0; i<chunk->count; pos+=row->item.size, i++, row++) {
1.53 paf 361: if(row==append_here)
362: goto break2;
363:
1.60 paf 364: size_t item_finish=pos+row->item.size;
365: if(item_finish > start) { // started now or already?
366: bool started=result.size()==0; // started now?
367: bool finished=finish <= item_finish; // finished now?
1.53 paf 368: size_t offset=started?start-pos:0;
369: size_t size=finished?finish-pos:row->item.size;
370: result.APPEND(
371: row->item.ptr+offset, size-offset,
372: row->item.lang,
373: row->item.origin.file, row->item.origin.line);
374: if(finished)
375: goto break2;
376: }
377: }
378: chunk=row->link;
379: } while(chunk);
380: break2:
1.60 paf 381: // SAPI::log(pool(), "piece of '%s' from %d to %d is '%s'",
382: //cstr(), start, finish, result.cstr());
1.53 paf 383: return result;
1.54 paf 384: }
385:
1.60 paf 386: int String::pos(const String& substr,
387: size_t result, Untaint_lang lang) const {
1.58 paf 388: for(; result<size(); result++) {
1.60 paf 389: int partial; cmp(partial, substr, result, lang);
1.58 paf 390: if(
391: partial==0 || // full match
392: partial==2) // 'substr' starts 'this'+'result'
393: return result;
394: }
395:
396: return -1;
397: }
398:
1.60 paf 399: int String::pos(const char *substr, size_t substr_size,
400: size_t result, Untaint_lang lang) const {
1.57 paf 401: for(; result<size(); result++) {
1.60 paf 402: int partial; cmp(partial, substr, substr_size, result, lang);
1.55 paf 403: if(
404: partial==0 || // full match
405: partial==2) // 'substr' starts 'this'+'result'
406: return result;
407: }
408:
409: return -1;
1.60 paf 410: }
411:
412: void String::split(Array& result,
413: size_t* pos_after_ref,
414: const char *delim, size_t delim_size,
415: Untaint_lang lang, int limit) const {
416: if(delim_size) {
417: size_t pos_after=pos_after_ref?*pos_after_ref:0;
418: int pos_before;
419: // while we have 'delim'...
420: for(; (pos_before=pos(delim, delim_size, pos_after, lang))>=0 && limit; limit--) {
1.69 paf 421: result+=&mid(pos_after, pos_before);
1.60 paf 422: pos_after=pos_before+delim_size;
423: }
424: // last piece
425: if(pos_after<size() && limit) {
1.69 paf 426: result+=&mid(pos_after, size());
1.60 paf 427: pos_after=size();
428: }
429: if(pos_after_ref)
430: *pos_after_ref=pos_after;
431: } else { // empty delim
432: result+=this;
433: if(pos_after_ref)
434: *pos_after_ref+=size();
435: }
436: }
437:
438: void String::split(Array& result,
439: size_t* pos_after_ref,
440: const String& delim, Untaint_lang lang,
441: int limit) const {
442: if(delim.size()) {
443: size_t pos_after=pos_after_ref?*pos_after_ref:0;
444: int pos_before;
445: // while we have 'delim'...
446: for(; (pos_before=pos(delim, pos_after, lang))>=0 && limit; limit--) {
1.69 paf 447: result+=&mid(pos_after, pos_before);
1.60 paf 448: pos_after=pos_before+delim.size();
449: }
450: // last piece
451: if(pos_after<size() && limit) {
1.69 paf 452: result+=&mid(pos_after, size());
1.60 paf 453: pos_after=size();
454: }
455: if(pos_after_ref)
456: *pos_after_ref=pos_after;
457: } else { // empty delim
458: result+=this;
459: if(pos_after_ref)
460: *pos_after_ref+=size();
461: }
1.61 paf 462: }
463:
1.63 paf 464: static void regex_options(char *options, int *result){
465: struct Regex_option {
466: char key;
467: int clear, set;
468: int *result;
469: } regex_option[]={
470: {'i', 0, PCRE_CASELESS, result}, // a=A
1.79 paf 471: {'s', 0, PCRE_DOTALL, result}, // \n\n$ [default]
1.63 paf 472: {'x', 0, PCRE_EXTENDED, result}, // whitespace in regex ignored
473: {'m', PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$
474: {'g', 0, true, result+1}, // many rows
475: {0},
476: };
477: result[0]=PCRE_EXTRA | PCRE_DOTALL;
478: result[1]=0;
479:
480: if(options)
481: for(Regex_option *o=regex_option; o->key; o++)
482: if(
483: strchr(options, o->key) ||
484: strchr(options, toupper(o->key))) {
485: *(o->result)&=~o->clear;
486: *(o->result)|=o->set;
487: }
488: }
489:
1.88 parser 490: /// @todo maybe need speedup: some option to remove pre/match/post string generation
1.77 paf 491: bool String::match(const unsigned char *pcre_tables,
492: const String *aorigin,
1.62 paf 493: const String& regexp,
1.63 paf 494: const String *options,
1.64 paf 495: Table **table,
1.95 ! parser 496: Row_action row_action, void *info,
! 497: bool *was_global) const {
1.64 paf 498:
1.73 paf 499: if(!regexp.size())
500: THROW(0, 0,
501: aorigin,
502: "regexp is empty");
1.68 paf 503: const char *pattern=regexp.cstr(UL_AS_IS);
1.62 paf 504: const char *errptr;
505: int erroffset;
1.63 paf 506: int option_bits[2]; regex_options(options?options->cstr():0, option_bits);
1.95 ! parser 507: if(was_global)
! 508: *was_global=option_bits[1]!=0;
1.63 paf 509: pcre *code=pcre_compile(pattern, option_bits[0],
1.62 paf 510: &errptr, &erroffset,
1.74 paf 511: pcre_tables);
1.62 paf 512:
1.67 paf 513: if(!code)
1.62 paf 514: THROW(0, 0,
1.69 paf 515: ®exp.mid(erroffset, regexp.size()),
1.74 paf 516: "regular expression syntax error - %s", errptr);
1.62 paf 517:
1.63 paf 518: int info_substrings=pcre_info(code, 0, 0);
519: if(info_substrings<0) {
520: (*pcre_free)(code);
521: THROW(0, 0,
1.73 paf 522: aorigin,
1.76 paf 523: "pcre_info error (%d)",
1.73 paf 524: info_substrings);
1.63 paf 525: }
526:
527: int startoffset=0;
1.68 paf 528: const char *subject=cstr(UL_AS_IS);
1.62 paf 529: int length=strlen(subject);
1.63 paf 530: int ovecsize;
531: int *ovector=(int *)malloc(sizeof(int)*
1.65 paf 532: (ovecsize=(1/*match*/+info_substrings)*3));
1.62 paf 533:
1.64 paf 534: { // create table
535: Array& columns=*NEW Array(pool());
536: columns+=string_pre_match_name;
537: columns+=string_match_name;
538: columns+=string_post_match_name;
539: for(int i=1; i<=info_substrings; i++) {
540: char *column=(char *)malloc(MAX_NUMBER);
541: snprintf(column, MAX_NUMBER, "%d", i);
542: columns+=NEW String(pool(), column); // .i column name
543: }
544: *table=NEW Table(pool(), aorigin, &columns);
1.62 paf 545: }
1.63 paf 546:
1.64 paf 547: int exec_option_bits=0;
1.63 paf 548: while(true) {
549: int exec_substrings=pcre_exec(code, 0,
550: subject, length, startoffset,
1.64 paf 551: exec_option_bits, ovector, ovecsize);
1.63 paf 552:
553: if(exec_substrings==PCRE_ERROR_NOMATCH) {
554: (*pcre_free)(code);
1.67 paf 555: (*row_action)(**table, 0/*last time, no row*/, 0, 0, info);
1.63 paf 556: return option_bits[1]!=0; // global=true+table, not global=false
557: }
558:
559: if(exec_substrings<0) {
560: (*pcre_free)(code);
561: THROW(0, 0,
562: aorigin,
1.76 paf 563: "regular expression execute error (%d)",
1.63 paf 564: exec_substrings);
565: }
566:
567: Array& row=*NEW Array(pool());
1.81 paf 568: row+=&mid(0, ovector[0]); // .prematch column value
1.69 paf 569: row+=&mid(ovector[0], ovector[1]); // .match
1.81 paf 570: row+=&mid(ovector[1], size()); // .postmatch
1.63 paf 571:
572: for(int i=1; i<exec_substrings; i++) {
1.69 paf 573: // -1:-1 case handled peacefully by mid() itself
574: row+=&mid(ovector[i*2+0], ovector[i*2+1]); // .i column value
1.63 paf 575: }
576:
1.67 paf 577: (*row_action)(**table, &row, startoffset, ovector[0], info);
1.63 paf 578:
1.67 paf 579: if(!option_bits[1] || !(startoffset=ovector[1])) { // not global | going to hang
1.63 paf 580: (*pcre_free)(code);
1.67 paf 581: (*row_action)(**table, 0/*last time, no row*/, 0, 0, info);
1.63 paf 582: return true;
583: }
584:
585: /*
586: if(option_bits[0] & PCRE_MULTILINE)
1.64 paf 587: exec_option_bits|=PCRE_NOTBOL; // start of subject+startoffset not BOL
1.63 paf 588: */
589: }
1.82 parser 590: }
591:
592: String& String::change_case(Pool& pool, const unsigned char *tables,
593: Change_case_kind kind) const {
594: String& result=*new(pool) String(pool);
595:
596: const unsigned char *a;
597: const unsigned char *b;
598: switch(kind) {
599: case CC_UPPER:
600: a=tables+lcc_offset;
601: b=tables+fcc_offset;
602: break;
603: case CC_LOWER:
604: a=tables+lcc_offset;
605: b=0;
606: break;
607: default:
608: PTHROW(0, 0,
609: this,
610: "unknown change case kind #%d",
611: static_cast<int>(kind)); // never
612: a=b=0; // calm, compiler
613: break; // never
614: }
615:
616: const Chunk *chunk=&head;
617: do {
618: const Chunk::Row *row=chunk->rows;
619: for(size_t i=0; i<chunk->count; i++, row++) {
620: if(row==append_here)
621: goto break2;
622:
623: char *new_cstr=(char *)pool.malloc(row->item.size);
624: char *dest=new_cstr;
625: const char *src=row->item.ptr;
626: for(int size=row->item.size; size--; src++) {
627: unsigned char c=a[(unsigned char)*src];
628: if(b)
629: c=b[c];
630:
631: *dest++=(char)c;
632: }
633:
634: result.APPEND(new_cstr, row->item.size,
635: row->item.lang,
636: row->item.origin.file, row->item.origin.line);
637: }
638: chunk=row->link;
639: } while(chunk);
640: break2:
1.89 parser 641:
642: return result;
643: }
644:
1.90 parser 645: double String::as_double() const {
1.89 parser 646: double result;
647: const char *cstr=this->cstr();
648: char *error_pos=0;
649: // 0xABC
650: if(cstr[0]=='0' && (cstr[1]=='x' || cstr[1]=='X'))
651: result=(double)(unsigned long)strtol(cstr, &error_pos, 0);
652: else
653: result=strtod(cstr, &error_pos);
654:
655: if(error_pos && *error_pos)
656: THROW(0, 0,
657: this,
658: "invalid number (double)");
659:
660: return result;
661: }
1.90 parser 662: int String::as_int() const {
1.89 parser 663: int result;
664: const char *cstr=this->cstr();
665: char *error_pos=0;
666: // 0xABC
667: if(cstr[0]=='0' && (cstr[1]=='x' || cstr[1]=='X'))
668: result=(int)(unsigned long)strtol(cstr, &error_pos, 0);
669: else
670: result=(int)strtol(cstr, &error_pos, 0);
671:
672: if(error_pos && *error_pos)
673: THROW(0, 0,
674: this,
675: "invalid number (int)");
1.82 parser 676:
677: return result;
1.61 paf 678: }
E-mail: