Annotation of parser3/src/main/pa_string.C, revision 1.172.2.21.2.37
1.45 paf 1: /** @file
1.172.2.21.2. (paf 2:): Parser: string class. @see untalength_t.C.
1.46 paf 3:
1.172.2.11 paf 4: Copyright (c) 2001-2003 ArtLebedev Group (http://www.artlebedev.com)
1.138 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.164 paf 6: */
1.46 paf 7:
1.172.2.21.2. 7(paf 8:3): static const char* IDENT_STRING_C="$Date: 2003/04/08 11:08:12 $";
1.4 paf 9:
1.70 paf 10: #include "pcre.h"
11:
1.12 paf 12: #include "pa_string.h"
1.22 paf 13: #include "pa_exception.h"
1.61 paf 14: #include "pa_table.h"
1.101 parser 15: #include "pa_dictionary.h"
1.132 paf 16: #include "pa_charset.h"
1.60 paf 17:
1.172.2.2 paf 18: // helpers
1.139 paf 19:
1.172.2.2 paf 20: /// String::match uses this as replace & global search table columns
1.139 paf 21:
1.172.2.4 paf 22: const int MAX_MATCH_GROUPS=100;
23:
1.172.2.14 paf 24: class String_match_table_template_columns: public ArrayString {
1.172.2.2 paf 25: public:
1.172.2.4 paf 26: String_match_table_template_columns() {
1.172.2.21.2. (paf 27:): *this+=new String("prematch");
28:): *this+=new String("match");
29:): *this+=new String("postmatch");
1.172.2.4 paf 30: for(int i=0; i<MAX_MATCH_GROUPS; i++) {
1.172.2.21.2. 5(paf 31:3): *this+=new String(StringBody::Format(1+i), String::L_CLEAN);
1.172.2.2 paf 32: }
1.172.2.21 paf 33: }
1.172.2.4 paf 34: };
35:
1.172.2.21.2. (paf 36:): Table string_match_table_template(new String_match_table_template_columns);
1.172.2.2 paf 37:
1.172.2.21.2. (paf 38:): // String::ArrayFragment methods
39:):
3(paf 40:3): void String::ArrayFragment::append_positions(const ArrayFragment& src,
41:3): size_t substr_begin, size_t substr_end) {
1(paf 42:3): if(substr_begin==substr_end)
3(paf 43:3): return;
(paf 44:):
6(paf 45:3): FILE *err=fopen("append.log", "wt");
46:3):
1(paf 47:3): size_t fragment_begin=0;
(paf 48:): size_t fragment_end;
1(paf 49:3): for(Array_iterator<element_type> i(src); ; fragment_begin=fragment_end) {
6(paf 50:3): const Fragment fragment=i.next();
1(paf 51:3): fragment_end=fragment_begin+fragment.length;
6(paf 52:3): fprintf(err, "1end=%u\n", fragment_end);fflush(err);
(paf 53:):
6(paf 54:3): // not reached fragments which may include 'substr'?
55:3): if(!(substr_begin>=fragment_begin && substr_begin<=fragment_end))
56:3): continue;
57:3):
(paf 58:): // found first fragment including piece of 'substr'
6(paf 59:3): if(substr_end<=fragment_end) // fits into first fragment?
60:3): *this+=Fragment(fragment.lang, substr_end-substr_begin);
61:3): else { // spans more then one fragment
62:3): *this+=Fragment(fragment.lang, fragment_end-substr_begin);
63:3): while(true) {
64:3): const Fragment fragment=i.next();
65:3): fragment_end=(fragment_begin=fragment_end)+fragment.length;
66:3): fprintf(err, "2end=%u\n", fragment_end);fflush(err);
67:3):
68:3): if(substr_end>fragment_end) // are there still more?
69:3): append(Fragment(fragment.lang, fragment.length)); // appending whole fragment
70:3): else { // no, it was last
71:3): append(Fragment(fragment.lang, substr_end-fragment_begin));
72:3): fclose(err);
73:3): return;
74:3): }
(paf 75:): }
76:): }
77:):
78:): break;
6(paf 79:3): }
80:3): fclose(err);
(paf 81:): }
82:):
8(paf 83:3): // StringBody methods
5(paf 84:3):
9(paf 85:3): StringBody StringBody::Format(int value) {
8(paf 86:3): char local[MAX_NUMBER];
9(paf 87:3): size_t length=snprintf(local, MAX_NUMBER, "%d", value);
88:3): return StringBody(pa_strdup(local, length), length);
8(paf 89:3): }
5(paf 90:3):
3(paf 91:3): static int CORD_batched_iter_fn_generic_hash_code(char c, void * client_data) {
92:3): uint& result=*static_cast<uint*>(client_data);
93:3): generic_hash_code(result, c);
94:3): return 0;
95:3): }
5(paf 96:3): static int CORD_batched_iter_fn_generic_hash_code(const char* s, void * client_data) {
97:3): uint& result=*static_cast<uint*>(client_data);
98:3): generic_hash_code(result, s);
99:3): return 0;
100:3): };
7(paf 101:3): uint StringBody::hash_code() const {
5(paf 102:3): uint result=0;
3(paf 103:3): CORD_iter5(body, 0,
104:3): CORD_batched_iter_fn_generic_hash_code,
105:3): CORD_batched_iter_fn_generic_hash_code, &result);
5(paf 106:3): return result;
107:3): }
108:3):
(paf 109:): // String methods
1.172.2.2 paf 110:
1.172.2.21.2. 6(paf 111:3): String::String(const char* cstr, size_t helper_length, bool tainted): body(CORD_EMPTY) {
112:3): append_help_length(cstr, helper_length, tainted?L_TAINTED:L_CLEAN);
1.1 paf 113: }
1.172.2.21.2. 7(paf 114:3): String::String(String::C cstr, bool tainted): body(CORD_EMPTY) {
115:3): append_know_length(cstr, cstr.length, tainted?L_TAINTED:L_CLEAN);
116:3): }
1.140 paf 117:
1.172.2.21.2. 0(paf 118:3): String::String(const String& src): body(src.body) {
119:3): fragments.append(src.fragments);
7(paf 120:3): #ifndef NDEBUG
121:3): invariant();
122:3): #endif
0(paf 123:3): }
1.28 paf 124:
1.172.2.21.2. 7(paf 125:3): String& String::append_know_length(const char* str, size_t known_length, Language lang) {
126:3): body.append_know_length(str, known_length);
127:3): fragments+=Fragment(lang, known_length);
128:3):
129:3): #ifndef NDEBUG
130:3): invariant();
131:3): #endif
132:3): return *this;
133:3): }
4(paf 134:3): String& String::append_help_length(const char* str, size_t helper_length, Language lang) {
3(paf 135:3): if(!str)
1.9 paf 136: return *this;
1.172.2.21.2. 6(paf 137:3): size_t known_length=helper_length?helper_length:strlen(str);
138:3): if(!known_length)
1.9 paf 139: return *this;
1.122 paf 140:
1.172.2.21.2. 7(paf 141:3): return append_know_length(str, known_length, lang);
1.1 paf 142: }
1.172.2.21.2. 6(paf 143:3): String& String::append_strdup(const char* str, size_t helper_length, Language lang) {
144:3): size_t known_length=helper_length?helper_length:strlen(str);
145:3): if(!known_length)
146:3): return *this;
147:3):
148:3): body.append_strdup_know_length(str, known_length);
149:3): fragments+=Fragment(lang, known_length);
4(paf 150:3):
7(paf 151:3): #ifndef NDEBUG
152:3): invariant();
153:3): #endif
4(paf 154:3): return *this;
155:3): }
1.1 paf 156:
1.172.2.21.2. 1(paf 157:3): /// @todo check in doc: whether it documents NOW bad situation "abc".mid(-1, 3) =were?="ab"
8(paf 158:3): String& String::mid(size_t substr_begin, size_t substr_end) const {
(paf 159:): String& result=*new String;
1.33 paf 160:
1.172.2.21.2. 1(paf 161:3): size_t self_length=length();
6(paf 162:3): substr_begin=min(substr_begin, self_length);
1(paf 163:3): substr_end=min(max(substr_end, substr_begin), self_length);
164:3): if(substr_begin==substr_end)
(paf 165:): return result;
1.52 paf 166:
1.172.2.21.2. (paf 167:): // first: letters themselves
7(paf 168:3): result.body=body.mid(substr_begin, substr_end-substr_begin);
1.46 paf 169:
1.172.2.21.2. (paf 170:): // next: their langs
3(paf 171:3): result.fragments.append_positions(fragments, substr_begin, substr_end);
1.53 paf 172:
1.172.2.21.2. 5(paf 173:3): // SAPI::log("piece of '%s' from %d to %d is '%s'",
1(paf 174:3): //cstr(), substr_begin, substr_end, result.cstr());
7(paf 175:3): #ifndef NDEBUG
176:3): result.invariant();
177:3): #endif
178:3):
1.53 paf 179: return result;
1.54 paf 180: }
181:
1.172.2.21.2. 7(paf 182:3): size_t String::pos(const StringBody substr,
5(paf 183:3): size_t this_offset, Language lang) const {
(paf 184:): // first: letters themselves
2(paf 185:3): size_t substr_begin=body.pos(substr, this_offset);
1(paf 186:3): if(substr_begin==CORD_NOT_FOUND)
(paf 187:): return STRING_NOT_FOUND;
188:):
189:): // next: check the lang when specified
190:):
5(paf 191:3): if(lang==L_UNSPECIFIED) // ignore lang?
1(paf 192:3): return substr_begin;
(paf 193:):
194:): // substr must be in one fragment, and fragments' lang must = lang
7(paf 195:3): size_t substr_end=substr_begin+substr.length();
1(paf 196:3): size_t fragment_begin=0;
(paf 197:): size_t fragment_end;
1(paf 198:3): for(Array_iterator<ArrayFragment::element_type> i(fragments); i.has_next(); fragment_begin=fragment_end) {
3(paf 199:3): const Fragment fragment=i.next();
1(paf 200:3): fragment_end=fragment_begin+fragment.length;
(paf 201:):
1(paf 202:3): if(substr_begin<fragment_begin) // not reached fragments which may include 'result'?
(paf 203:): continue;
2(paf 204:3): if(substr_begin>=fragment_end) // begin of substr OUT of current fragment?
205:3): continue;
(paf 206:):
207:): if(substr_end>fragment_end) // end of substr OUT of current fragment?
2(paf 208:3): throw Exception(0, // (*) see below
209:3): this,
210:3): "searching for '%s' starting from %ud problem: found begin in one fragment, but end in another",
7(paf 211:3): substr.cstr(), this_offset);
(paf 212:):
4(paf 213:3): if(fragment.lang<=lang)
2(paf 214:3): return substr_begin;
215:3): else { // bad lang...
216:3): /// WARNING: this possibly skips assert (*), but it's fast
217:3): substr_begin=body.pos(substr, fragment_end/*...search AFTER for more*/);
218:3): if(substr_begin==CORD_NOT_FOUND)
219:3): return STRING_NOT_FOUND;
220:3):
7(paf 221:3): size_t substr_end=substr_begin+substr.length();
2(paf 222:3): // and continuing with next fragment
223:3): }
(paf 224:): }
225:):
226:): return STRING_NOT_FOUND;
1.58 paf 227: }
228:
1.172.2.21.2. (paf 229:): size_t String::pos(const String& substr,
5(paf 230:3): size_t this_offset, Language lang) const {
(paf 231:): return pos(substr.body, this_offset, lang);
1.60 paf 232: }
233:
1.172.2.14 paf 234: void String::split(ArrayString& result,
1.172.2.21.2. (paf 235:): size_t& pos_after,
236:): const char* delim,
2(paf 237:3): Language lang, int limit) const {
(paf 238:): size_t self_length=length();
239:): if(size_t delim_length=strlen(delim)) {
1.60 paf 240: int pos_before;
241: // while we have 'delim'...
1.172.2.21.2. (paf 242:): for(; (pos_before=pos(delim, pos_after, lang))>=0 && limit; limit--) {
243:): result+=&mid(pos_after, pos_before);
244:): pos_after=pos_before+delim_length;
1.60 paf 245: }
246: // last piece
1.172.2.21.2. (paf 247:): if(pos_after<self_length && limit) {
248:): result+=&mid(pos_after, self_length);
249:): pos_after=self_length;
1.60 paf 250: }
251: } else { // empty delim
1.172.2.21.2. (paf 252:): result+=this;
253:): pos_after+=self_length;
1.60 paf 254: }
255: }
256:
1.172.2.14 paf 257: void String::split(ArrayString& result,
1.172.2.21.2. (paf 258:): size_t& pos_after,
5(paf 259:3): const String& delim, Language lang,
(paf 260:): int limit) const {
5(paf 261:3): if(!delim.is_empty()) {
1.60 paf 262: int pos_before;
263: // while we have 'delim'...
264: for(; (pos_before=pos(delim, pos_after, lang))>=0 && limit; limit--) {
1.172.2.21.2. (paf 265:): result+=&mid(pos_after, pos_before);
266:): pos_after=pos_before+delim.length();
1.60 paf 267: }
268: // last piece
1.172.2.21.2. (paf 269:): if(pos_after<length() && limit) {
270:): result+=&mid(pos_after, length());
271:): pos_after=length();
1.60 paf 272: }
273: } else { // empty delim
1.172.2.21.2. (paf 274:): result+=this;
275:): pos_after+=length();
1.60 paf 276: }
1.61 paf 277: }
278:
1.172.2.21.2. 2(paf 279:3): static void regex_options(const String* options, int *result, bool& need_pre_post_match){
1.63 paf 280: struct Regex_option {
1.172.2.11 paf 281: const char* keyL;
282: const char* keyU;
1.63 paf 283: int clear, set;
284: int *result;
1.154 paf 285: bool *flag;
1.63 paf 286: } regex_option[]={
1.153 paf 287: {"i", "I", 0, PCRE_CASELESS, result}, // a=A
288: {"s", "S", 0, PCRE_DOTALL, result}, // \n\n$ [default]
289: {"x", "U", 0, PCRE_EXTENDED, result}, // whitespace in regex ignored
290: {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$
291: {"g", "G", 0, true, result+1}, // many rows
1.154 paf 292: {"'", 0, 0, 0, 0, &need_pre_post_match},
293: {0}
1.63 paf 294: };
1.171 paf 295: result[0]=PCRE_EXTRA | PCRE_DOTALL | PCRE_DOLLAR_ENDONLY;
1.63 paf 296: result[1]=0;
297:
1.172.2.21.2. 2(paf 298:3): if(options && !options->is_empty())
1.153 paf 299: for(Regex_option *o=regex_option; o->keyL; o++)
1.172.2.21.2. 2(paf 300:3): if(options->pos(o->keyL)>=0
301:3): || (o->keyU && options->pos(o->keyU)>=0)) {
1.154 paf 302: if(o->flag)
303: *o->flag=true;
304: else { // result
305: *o->result &= ~o->clear;
306: *o->result |= o->set;
307: }
1.63 paf 308: }
309: }
310:
1.172.2.21.2. (paf 311:): Table* String::match(Charset& source_charset,
1.172.2.4 paf 312: const String& regexp,
1.172.2.21.2. 2(paf 313:3): const String* options,
1.172.2.4 paf 314: Row_action row_action, void *info,
1.172.2.20 paf 315: bool& just_matched) const {
1.172.2.21.2. 5(paf 316:3): if(regexp.is_empty())
(paf 317:): throw Exception(0,
318:): 0,
1.73 paf 319: "regexp is empty");
1.154 paf 320:
1.172.2.21.2. (paf 321:): const char* pattern=regexp.cstr();
1.172.2.11 paf 322: const char* errptr;
1.62 paf 323: int erroffset;
1.172.2.21.2. (paf 324:): bool need_pre_post_match=false;
7(paf 325:3): int option_bits[2]={0}; regex_options(options, option_bits, need_pre_post_match);
1.172.2.20 paf 326: bool global=option_bits[1]!=0;
1.172.2.21.2. (paf 327:): pcre *code=pcre_compile(pattern, option_bits[0],
1.62 paf 328: &errptr, &erroffset,
1.172.2.1 paf 329: source_charset.pcre_tables);
1.62 paf 330:
1.67 paf 331: if(!code)
1.172.2.21.2. (paf 332:): throw Exception(0,
333:): ®exp.mid(erroffset, regexp.length()),
1.74 paf 334: "regular expression syntax error - %s", errptr);
1.62 paf 335:
1.172.2.20 paf 336: int subpatterns=pcre_info(code, 0, 0);
337: if(subpatterns<0) {
1.100 parser 338: pcre_free(code);
1.149 paf 339: throw Exception(0,
1.172.2.21.2. (paf 340:): ®exp,
1.76 paf 341: "pcre_info error (%d)",
1.172.2.20 paf 342: subpatterns);
1.63 paf 343: }
344:
1.172.2.21.2. (paf 345:): const char* subject=cstr();
346:): size_t subject_length=strlen(subject);
347:): const int oveclength=(1/*match*/+MAX_MATCH_GROUPS)*3;
348:): int ovector[oveclength];
1.155 paf 349:
350: // create table
1.172.2.21.2. 1(paf 351:3): Table& table=*new Table(string_match_table_template);
1.63 paf 352:
1.64 paf 353: int exec_option_bits=0;
1.154 paf 354: int prestart=0;
355: int poststart=0;
1.172.2.21.2. (paf 356:): int postfinish=length();
1.63 paf 357: while(true) {
358: int exec_substrings=pcre_exec(code, 0,
1.172.2.21.2. (paf 359:): subject, subject_length, prestart,
360:): exec_option_bits, ovector, oveclength);
1.63 paf 361:
362: if(exec_substrings==PCRE_ERROR_NOMATCH) {
1.100 parser 363: pcre_free(code);
1.172.2.21.2. (paf 364:): row_action(table, 0/*last time, no raw*/, 0, 0, poststart, postfinish, info);
1.172.2.20 paf 365: if(global || subpatterns)
1.172.2.21.2. 2(paf 366:3): return &table; // global or with subpatterns=true+result
1.172.2.20 paf 367: else {
1.172.2.21.2. (paf 368:): just_matched=false; return 0; // not global=no result
1.172.2.20 paf 369: }
1.63 paf 370: }
371:
372: if(exec_substrings<0) {
1.100 parser 373: pcre_free(code);
1.172.2.21.2. (paf 374:): throw Exception(0,
375:): ®exp,
1.76 paf 376: "regular expression execute error (%d)",
1.63 paf 377: exec_substrings);
378: }
379:
1.154 paf 380: int prefinish=ovector[0];
381: poststart=ovector[1];
1.172.2.21.2. (paf 382:): ArrayString* row=new ArrayString;
1.172.2.4 paf 383: if(need_pre_post_match) {
1.172.2.21.2. (paf 384:): *row+=&mid(0, prefinish); // .prematch column value
385:): *row+=&mid(prefinish, poststart); // .match
386:): *row+=&mid(poststart, postfinish); // .postmatch
1.172.2.4 paf 387: } else {
1.172.2.21.2. (paf 388:): *row+=0; // .prematch column value
389:): *row+=0; // .match
390:): *row+=0; // .postmatch
1.172.2.4 paf 391: }
1.63 paf 392:
393: for(int i=1; i<exec_substrings; i++) {
1.69 paf 394: // -1:-1 case handled peacefully by mid() itself
1.172.2.21.2. (paf 395:): *row+=&mid(ovector[i*2+0], ovector[i*2+1]); // .i column value
1.63 paf 396: }
397:
1.172.2.20 paf 398: row_action(table, row, prestart, prefinish, poststart, postfinish, info);
1.63 paf 399:
1.172.2.20 paf 400: if(!global || prestart==poststart) { // not global | going to hang
1.100 parser 401: pcre_free(code);
1.172.2.21.2. (paf 402:): row_action(table, 0/*last time, no row*/, 0, 0, poststart, postfinish, info);
2(paf 403:3): return &table;
1.63 paf 404: }
1.154 paf 405: prestart=poststart;
1.63 paf 406:
407: /*
408: if(option_bits[0] & PCRE_MULTILINE)
1.64 paf 409: exec_option_bits|=PCRE_NOTBOL; // start of subject+startoffset not BOL
1.63 paf 410: */
411: }
1.82 parser 412: }
413:
1.172.2.21.2. 0(paf 414:3): String& String::change_case(Charset& source_charset, Change_case_kind kind) const {
(paf 415:): String& result=*new String();
0(paf 416:3): if(is_empty())
417:3): return result;
1.172.2.4 paf 418:
1.172.2.1 paf 419: const unsigned char *tables=source_charset.pcre_tables;
1.82 parser 420:
421: const unsigned char *a;
422: const unsigned char *b;
423: switch(kind) {
424: case CC_UPPER:
425: a=tables+lcc_offset;
426: b=tables+fcc_offset;
427: break;
428: case CC_LOWER:
429: a=tables+lcc_offset;
430: b=0;
431: break;
432: default:
1.172.2.21.2. (paf 433:): throw Exception(0,
434:): this,
1.82 parser 435: "unknown change case kind #%d",
436: static_cast<int>(kind)); // never
437: a=b=0; // calm, compiler
438: break; // never
439: }
440:
1.172.2.21.2. (paf 441:): char* new_cstr=cstrm();
442:): char *dest=new_cstr;
443:): unsigned char index;
444:): for(const char* current=new_cstr; index=(unsigned char)*current; current++) {
445:): unsigned char c=a[index];
446:): if(b)
447:): c=b[c];
1.82 parser 448:
1.172.2.21.2. (paf 449:): *dest++=(char)c;
450:): }
451:): result.body=new_cstr;
3(paf 452:3): result.fragments.append(fragments);
1.89 parser 453:
1.101 parser 454: return result;
455: }
456:
1.172.2.21.2. (paf 457:): const String& String::replace(const Dictionary& dict) const {
458:): String& result=*new String();
459:): const char* old_cstr=cstr();
1(paf 460:3): const char* prematch_begin=old_cstr;
(paf 461:):
1(paf 462:3): const char* current=old_cstr;
463:3): while(*current) {
464:3): if(Table::element_type row=dict.first_that_begins(current)) {
(paf 465:): // prematch
1(paf 466:3): if(size_t prematch_length=current-prematch_begin) {
6(paf 467:3): result.body.append_strdup_know_length(prematch_begin, prematch_length);
4(paf 468:3): result.fragments.append_positions(fragments, prematch_begin-old_cstr, current-old_cstr);
1.101 parser 469: }
470:
1.172.2.21.2. (paf 471:): // match
472:):
473:): const String* a=row->get(0);
1(paf 474:3): // skip 'a' in 'current'; move prematch_begin
475:3): current+=a->length(); prematch_begin=current;
1.170 paf 476:
1.172.2.21.2. (paf 477:): if(row->count()>1) { // are there any b?
478:): const String* b=row->get(1);
6(paf 479:3): result<<*b;
(paf 480:): }
481:): } else // simply advance
482:): current++;
483:): }
1.156 paf 484:
1.172.2.21.2. (paf 485:): // postmatch
3(paf 486:3): if(size_t postmatch_length=current-prematch_begin) {
6(paf 487:3): result.body.append_strdup_know_length(prematch_begin, postmatch_length);
4(paf 488:3): result.fragments.append_positions(fragments, prematch_begin-old_cstr, current-old_cstr);
(paf 489:): }
1.156 paf 490:
1.172.2.21.2. 7(paf 491:3): #ifndef NDEBUG
492:3): result.invariant();
493:3): #endif
1.89 parser 494: return result;
495: }
496:
1.172.2.21.2. (paf 497:): double String::as_double() const {
1.89 parser 498: double result;
1.172.2.21.2. (paf 499:): const char *str=cstr();
500:):
501:): while(*str && isspace(*str))
502:): str++;
503:): if(!*str)
1.162 paf 504: return 0;
1.161 paf 505:
1.102 parser 506: char *error_pos;
1.89 parser 507: // 0xABC
1.172.2.21.2. (paf 508:): if(str[0]=='0')
509:): if(str[1]=='x' || str[1]=='X')
510:): result=(double)(unsigned long)strtol(str, &error_pos, 0);
1.99 parser 511: else
1.172.2.21.2. (paf 512:): result=(double)strtod(str+1/*skip leading 0*/, &error_pos);
1.89 parser 513: else
1.172.2.21.2. (paf 514:): result=(double)strtod(str, &error_pos);
1.89 parser 515:
1.159 paf 516: while(char c=*error_pos++)
517: if(!isspace(c))
518: throw Exception("number.format",
1.172.2.21.2. (paf 519:): this,
1.159 paf 520: "invalid number (double)");
1.89 parser 521:
522: return result;
523: }
1.172.2.21.2. (paf 524:): int String::as_int() const {
1.89 parser 525: int result;
1.172.2.21.2. (paf 526:): const char *str=cstr();
527:):
528:): while(*str && isspace(*str))
529:): str++;
530:): if(!*str)
1.162 paf 531: return 0;
1.161 paf 532:
1.102 parser 533: char *error_pos;
1.89 parser 534: // 0xABC
1.172.2.21.2. (paf 535:): if(str[0]=='0')
536:): if(str[1]=='x' || str[1]=='X')
537:): result=(int)(unsigned long)strtol(str, &error_pos, 0);
1.99 parser 538: else
1.172.2.21.2. (paf 539:): result=(int)strtol(str+1/*skip leading 0*/, &error_pos, 0);
1.89 parser 540: else
1.172.2.21.2. (paf 541:): result=(int)strtol(str, &error_pos, 0);
1.89 parser 542:
1.159 paf 543: while(char c=*error_pos++)
544: if(!isspace(c))
545: throw Exception("number.format",
1.172.2.21.2. (paf 546:): this,
1.159 paf 547: "invalid number (int)");
1.82 parser 548:
549: return result;
1.61 paf 550: }
1.113 parser 551:
1.172.2.4 paf 552: inline void uint2uchars(uint word, uchar *bytes) {
553: bytes[0]=word&0xFF;
554: bytes[1]=(word>>8)&0xFF;
555: bytes[2]=(word>>16)&0xFF;
556: bytes[3]=(word>>24)&0xFF;
557: }
558: inline uint uchars2uint(uchar *bytes) {
559: return bytes[3]<<24
560: | bytes[2]<<16
561: | bytes[1]<<8
562: | bytes[0];
563: }
564:
1.172.2.21.2. 7(paf 565:3): static int serialize_body_piece(const char* s, char** cur) {
(paf 566:): size_t length=strlen(s);
7(paf 567:3): memcpy(*cur, s, length); *cur+=length;
(paf 568:): return 0;
569:): };
7(paf 570:3): String::Cm String::serialize(size_t prolog_length) const {
(paf 571:): size_t buf_length=
572:): prolog_length
5(paf 573:3): +fragments.count()*(sizeof(Language)+sizeof(size_t))
(paf 574:): +length();
7(paf 575:3): String::Cm result(new(PointerFreeGC) char[buf_length], buf_length);
(paf 576:):
577:): // 1: prolog
578:): char *cur=result.str+prolog_length;
579:):
580:):
581:): // 2: fragments.count
582:): size_t fragments_count=fragments.count();
583:): memcpy(cur, &fragments_count, sizeof(fragments_count)); cur+=sizeof(fragments_count);
1.113 parser 584:
1.172.2.21.2. (paf 585:): // 3: lang info
586:): for(Array_iterator<ArrayFragment::element_type> i(fragments); i.has_next(); ) {
3(paf 587:3): const Fragment fragment=i.next();
1.123 paf 588: // lang
1.172.2.21.2. (paf 589:): memcpy(cur, &fragment.lang, sizeof(fragment.lang)); cur+=sizeof(fragment.lang);
590:): // length
591:): memcpy(cur, &fragment.length, sizeof(fragment.length)); cur+=sizeof(fragment.length);
592:): }
593:):
594:): // 4: letters
7(paf 595:3): body.for_each(serialize_body_piece, &cur);
(paf 596:):
597:): return result;
1.113 parser 598: }
1.172.2.21.2. 9(paf 599:3): bool String::deserialize(size_t prolog_length, void *buf, size_t buf_length) {
(paf 600:): if(buf_length<=prolog_length)
1.148 paf 601: return false;
1.172.2.21.2. (paf 602:): buf_length-=prolog_length;
1.113 parser 603:
1.172.2.21.2. (paf 604:): // 1: prolog
605:): const char* cur=(const char* )buf+prolog_length;
1.148 paf 606:
1.172.2.21.2. (paf 607:): // 2: fragments.count
608:): if(buf_length<sizeof(size_t)) // fragments.count don't fit?
609:): return false;
610:): size_t fragments_count=*reinterpret_cast<const size_t*>(cur); cur+=sizeof(size_t);
611:): buf_length-=sizeof(size_t);
6(paf 612:3):
613:3): if(fragments_count) {
614:3): // 3: lang info
615:3): size_t total_length=0;
616:3): for(size_t f=0; f<fragments_count; f++) {
617:3): size_t piece_length=sizeof(Language)+sizeof(size_t);
618:3): if(buf_length<piece_length) // lang+length
619:3): return false;
620:3):
621:3): Language lang=*reinterpret_cast<const Language *>(cur); cur+=sizeof(Language);
622:3): size_t fragment_length=*reinterpret_cast<const size_t*>(cur); cur+=sizeof(size_t);
623:3): fragments+=Fragment(lang, fragment_length);
624:3): total_length+=fragment_length;
1.128 paf 625:
1.172.2.21.2. 6(paf 626:3): buf_length-=piece_length;
627:3): }
1.148 paf 628:
1.172.2.21.2. 6(paf 629:3): // 4: letters
630:3): if(buf_length!=total_length)
631:3): return false;
1.113 parser 632:
1.172.2.21.2. 6(paf 633:3): body=StringBody(cur, buf_length);
1.113 parser 634: }
1.172.2.21.2. (paf 635:):
7(paf 636:3): #ifndef NDEBUG
637:3): invariant();
638:3): #endif
1.148 paf 639: return true;
1.113 parser 640: }
E-mail: