Annotation of parser3/src/main/pa_string.C, revision 1.172.2.21.2.12
1.45 paf 1: /** @file
1.172.2.21.2. (paf 2:): Parser: string class. @see untalength_t.C.
1.46 paf 3:
1.172.2.11 paf 4: Copyright (c) 2001-2003 ArtLebedev Group (http://www.artlebedev.com)
1.138 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.164 paf 6: */
1.46 paf 7:
1.172.2.21.2. 2(paf 8:3): static const char* IDENT_STRING_C="$Date: 2003/03/20 15:05:58 $";
1.4 paf 9:
1.70 paf 10: #include "pcre.h"
11:
1.12 paf 12: #include "pa_string.h"
1.22 paf 13: #include "pa_exception.h"
1.61 paf 14: #include "pa_table.h"
1.101 parser 15: #include "pa_dictionary.h"
1.132 paf 16: #include "pa_charset.h"
1.60 paf 17:
1.172.2.2 paf 18: // helpers
1.139 paf 19:
1.172.2.2 paf 20: /// String::match uses this as replace & global search table columns
1.139 paf 21:
1.172.2.4 paf 22: const int MAX_MATCH_GROUPS=100;
23:
1.172.2.14 paf 24: class String_match_table_template_columns: public ArrayString {
1.172.2.2 paf 25: public:
1.172.2.4 paf 26: String_match_table_template_columns() {
1.172.2.21.2. (paf 27:): *this+=new String("prematch");
28:): *this+=new String("match");
29:): *this+=new String("postmatch");
1.172.2.4 paf 30: for(int i=0; i<MAX_MATCH_GROUPS; i++) {
1.172.2.21.2. (paf 31:): char *cname=new(PointerFreeGC) char[3/*strlen("100")*/+1/*terminating 0*/];
32:): *this+=new String(cname, sprintf(cname, "%d", 1+i));
1.172.2.2 paf 33: }
1.172.2.21 paf 34: }
1.172.2.4 paf 35: };
36:
1.172.2.21.2. (paf 37:): Table string_match_table_template(new String_match_table_template_columns);
1.172.2.2 paf 38:
1.172.2.21.2. (paf 39:): // String::ArrayFragment methods
40:):
41:): String::ArrayFragment& String::ArrayFragment::append(const ArrayFragment& src,
1(paf 42:3): size_t substr_begin, size_t substr_end) {
43:3): if(substr_begin==substr_end)
(paf 44:): return *this;
45:):
1(paf 46:3): size_t fragment_begin=0;
(paf 47:): size_t fragment_end;
1(paf 48:3): for(Array_iterator<element_type> i(src); ; fragment_begin=fragment_end) {
(paf 49:): const element_type& fragment=i.next();
1(paf 50:3): fragment_end=fragment_begin+fragment.length;
(paf 51:):
1(paf 52:3): if(fragment_begin<substr_begin) // not reached fragments which may include 'substr'?
(paf 53:): continue;
54:):
55:): // found first fragment including piece of 'substr'
56:): size_t piece_end=min(substr_end, fragment_end);
1(paf 57:3): *this+=Fragment(fragment.lang, piece_end-substr_begin);
(paf 58:):
59:): while(substr_end>fragment_end) { // are there more fragments including pieces of 'substr'?
1(paf 60:3): fragment_begin=fragment_end;
61:3):
(paf 62:): const element_type& fragment=i.next();
1(paf 63:3): fragment_end=fragment_begin+fragment.length;
(paf 64:):
65:): if(substr_end>fragment_end) // are there still more?
66:): simple_append(Fragment(fragment.lang, fragment.length)); // appending whole fragment
67:): else { // no, it was last
1(paf 68:3): simple_append(Fragment(fragment.lang, substr_end-fragment_begin));
(paf 69:): goto break2;
70:): }
71:): }
72:):
73:): break;
74:): }
75:): break2:
76:):
77:): return *this;
78:): }
79:): /*
1(paf 80:3): void String::ArrayFragment::mid(ArrayFragment& result, size_t substr_begin, size_t substr_end) {
(paf 81:): }
82:): */
83:):
84:): // String methods
1.172.2.2 paf 85:
1.172.2.21.2. (paf 86:): String::String(const char* cstr, size_t helper_length, bool tainted): body(CORD_EMPTY) {
87:): append_cstr(cstr, helper_length, tainted?UL_TAINTED:UL_CLEAN);
1.1 paf 88: }
1.140 paf 89:
1.172.2.21.2. (paf 90:): String::String(const String& src): body(src.body), fragments(src.fragments) {}
1.28 paf 91:
1.172.2.21.2. (paf 92:): String& String::append_cstr(const char* heap_variable, size_t helper_length, Untaint_lang lang) {
93:): if(!heap_variable)
94:): return *this;
0(paf 95:3): size_t use_length;
96:3): if(helper_length) {
97:3): #ifdef _DEBUG
98:3): size_t real_length=strlen(heap_variable);
99:3): if(helper_length!=real_length)
100:3): throw Exception(0,
101:3): this,
102:3): "bad String::String(real=%d, helper%d",
103:3): real_length, helper_length);
104:3): #endif
105:3): use_length=helper_length;
106:3): } else
107:3): use_length=strlen(heap_variable);
(paf 108:): size_t length=helper_length?helper_length:strlen(heap_variable);
109:): if(!length)
110:): return *this;
111:):
112:): body=CORD_cat_char_star(body, heap_variable, length);
113:): fragments+=Fragment(lang, length);
114:):
115:): return *this;
116:): }
117:):
118:): String& String::append_copy(const char* auto_variable, size_t helper_length, Untaint_lang lang) {
119:): if(!auto_variable)
1.9 paf 120: return *this;
1.172.2.21.2. 0(paf 121:3): size_t use_length=helper_length?helper_length:strlen(auto_variable);
122:3): if(!use_length)
1.9 paf 123: return *this;
1.122 paf 124:
1.172.2.21.2. 0(paf 125:3): body=pa_strdup(auto_variable, use_length);
126:3): fragments+=Fragment(lang, use_length);
1.1 paf 127:
128: return *this;
129: }
130:
1.172.2.21.2. (paf 131:): static int CORD_batched_iter_fn_generic_hash_code(const char* s, void * client_data) {
132:): uint& result=*static_cast<uint*>(client_data);
133:): generic_hash_code(result, s);
134:): return 0;
135:): };
1.16 paf 136: uint String::hash_code() const {
1.7 paf 137: uint result=0;
1.172.2.21.2. (paf 138:): CORD_iter5(body, 0, 0, CORD_batched_iter_fn_generic_hash_code, &result);
1.5 paf 139: return result;
140: }
141:
1.172.2.21.2. 1(paf 142:3): /// @todo check in doc: whether it documents NOW bad situation "abc".mid(-1, 3) =were?="ab"
143:3): const String& String::mid(size_t substr_begin, size_t substr_end) const {
(paf 144:): String& result=*new String;
1.33 paf 145:
1.172.2.21.2. 1(paf 146:3): size_t self_length=length();
147:3): substr_begin=max(min(substr_begin, self_length), (size_t)0);
148:3): substr_end=min(max(substr_end, substr_begin), self_length);
149:3): if(substr_begin==substr_end)
(paf 150:): return result;
1.52 paf 151:
1.172.2.21.2. (paf 152:): // first: letters themselves
1(paf 153:3): result.body=CORD_substr(body, substr_begin, substr_end-substr_begin);
1.46 paf 154:
1.172.2.21.2. (paf 155:): // next: their langs
1(paf 156:3): result.fragments.append(fragments, substr_begin, substr_end);
1.53 paf 157:
1.60 paf 158: // SAPI::log(pool(), "piece of '%s' from %d to %d is '%s'",
1.172.2.21.2. 1(paf 159:3): //cstr(), substr_begin, substr_end, result.cstr());
1.53 paf 160: return result;
1.54 paf 161: }
162:
1.172.2.21.2. (paf 163:): size_t String::pos(CORD substr,
164:): size_t this_offset, Untaint_lang lang) const {
165:): // first: letters themselves
1(paf 166:3): size_t substr_begin=CORD_str(body, this_offset, substr);
167:3): if(substr_begin==CORD_NOT_FOUND)
(paf 168:): return STRING_NOT_FOUND;
169:):
170:): // next: check the lang when specified
171:):
172:): if(lang==UL_UNSPECIFIED) // ignore lang?
1(paf 173:3): return substr_begin;
(paf 174:):
175:): // substr must be in one fragment, and fragments' lang must = lang
1(paf 176:3): size_t substr_end=substr_begin+CORD_len(substr);
177:3): size_t fragment_begin=0;
(paf 178:): size_t fragment_end;
1(paf 179:3): for(Array_iterator<ArrayFragment::element_type> i(fragments); i.has_next(); fragment_begin=fragment_end) {
(paf 180:): const Fragment& fragment=i.next();
1(paf 181:3): fragment_end=fragment_begin+fragment.length;
(paf 182:):
1(paf 183:3): if(substr_begin<fragment_begin) // not reached fragments which may include 'result'?
(paf 184:): continue;
2(paf 185:3): if(substr_begin>=fragment_end) // begin of substr OUT of current fragment?
186:3): continue;
(paf 187:):
188:): if(substr_end>fragment_end) // end of substr OUT of current fragment?
2(paf 189:3): throw Exception(0, // (*) see below
190:3): this,
191:3): "searching for '%s' starting from %ud problem: found begin in one fragment, but end in another",
192:3): CORD_to_const_char_star(substr), this_offset);
(paf 193:):
2(paf 194:3): if(fragment.lang==lang)
195:3): return substr_begin;
196:3): else { // bad lang...
197:3): /// WARNING: this possibly skips assert (*), but it's fast
198:3): substr_begin=CORD_str(body, fragment_end/*...search AFTER for more*/, substr);
199:3): if(substr_begin==CORD_NOT_FOUND)
200:3): return STRING_NOT_FOUND;
201:3):
202:3): size_t substr_end=substr_begin+CORD_len(substr);
203:3): // and continuing with next fragment
204:3): }
(paf 205:): }
206:):
207:): return STRING_NOT_FOUND;
1.58 paf 208: }
209:
1.172.2.21.2. (paf 210:): size_t String::pos(const String& substr,
211:): size_t this_offset, Untaint_lang lang) const {
212:): return pos(substr.body, this_offset, lang);
1.60 paf 213: }
214:
1.172.2.14 paf 215: void String::split(ArrayString& result,
1.172.2.21.2. (paf 216:): size_t& pos_after,
217:): const char* delim,
1.172.2.10 paf 218: Untaint_lang lang, int limit) {
1.172.2.21.2. (paf 219:): size_t self_length=length();
220:): if(size_t delim_length=strlen(delim)) {
1.60 paf 221: int pos_before;
222: // while we have 'delim'...
1.172.2.21.2. (paf 223:): for(; (pos_before=pos(delim, pos_after, lang))>=0 && limit; limit--) {
224:): result+=&mid(pos_after, pos_before);
225:): pos_after=pos_before+delim_length;
1.60 paf 226: }
227: // last piece
1.172.2.21.2. (paf 228:): if(pos_after<self_length && limit) {
229:): result+=&mid(pos_after, self_length);
230:): pos_after=self_length;
1.60 paf 231: }
232: } else { // empty delim
1.172.2.21.2. (paf 233:): result+=this;
234:): pos_after+=self_length;
1.60 paf 235: }
236: }
237:
1.172.2.14 paf 238: void String::split(ArrayString& result,
1.172.2.21.2. (paf 239:): size_t& pos_after,
1.60 paf 240: const String& delim, Untaint_lang lang,
1.172.2.21.2. (paf 241:): int limit) const {
242:): if(delim) {
1.60 paf 243: int pos_before;
244: // while we have 'delim'...
245: for(; (pos_before=pos(delim, pos_after, lang))>=0 && limit; limit--) {
1.172.2.21.2. (paf 246:): result+=&mid(pos_after, pos_before);
247:): pos_after=pos_before+delim.length();
1.60 paf 248: }
249: // last piece
1.172.2.21.2. (paf 250:): if(pos_after<length() && limit) {
251:): result+=&mid(pos_after, length());
252:): pos_after=length();
1.60 paf 253: }
254: } else { // empty delim
1.172.2.21.2. (paf 255:): result+=this;
256:): pos_after+=length();
1.60 paf 257: }
1.61 paf 258: }
259:
1.172.2.21.2. (paf 260:): static void regex_options(const String& options, int *result, bool& need_pre_post_match){
1.63 paf 261: struct Regex_option {
1.172.2.11 paf 262: const char* keyL;
263: const char* keyU;
1.63 paf 264: int clear, set;
265: int *result;
1.154 paf 266: bool *flag;
1.63 paf 267: } regex_option[]={
1.153 paf 268: {"i", "I", 0, PCRE_CASELESS, result}, // a=A
269: {"s", "S", 0, PCRE_DOTALL, result}, // \n\n$ [default]
270: {"x", "U", 0, PCRE_EXTENDED, result}, // whitespace in regex ignored
271: {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$
272: {"g", "G", 0, true, result+1}, // many rows
1.154 paf 273: {"'", 0, 0, 0, 0, &need_pre_post_match},
274: {0}
1.63 paf 275: };
1.171 paf 276: result[0]=PCRE_EXTRA | PCRE_DOTALL | PCRE_DOLLAR_ENDONLY;
1.63 paf 277: result[1]=0;
278:
279: if(options)
1.153 paf 280: for(Regex_option *o=regex_option; o->keyL; o++)
1.172.2.21.2. (paf 281:): if(options.pos(o->keyL)>=0
282:): || (o->keyU && options.pos(o->keyU)>=0)) {
1.154 paf 283: if(o->flag)
284: *o->flag=true;
285: else { // result
286: *o->result &= ~o->clear;
287: *o->result |= o->set;
288: }
1.63 paf 289: }
290: }
291:
1.172.2.21.2. (paf 292:): Table* String::match(Charset& source_charset,
1.172.2.4 paf 293: const String& regexp,
1.172.2.21.2. (paf 294:): const String& options,
1.172.2.4 paf 295: Row_action row_action, void *info,
1.172.2.20 paf 296: bool& just_matched) const {
1.172.2.21.2. (paf 297:): if(!regexp)
298:): throw Exception(0,
299:): 0,
1.73 paf 300: "regexp is empty");
1.154 paf 301:
1.172.2.21.2. (paf 302:): const char* pattern=regexp.cstr();
1.172.2.11 paf 303: const char* errptr;
1.62 paf 304: int erroffset;
1.172.2.21.2. (paf 305:): bool need_pre_post_match=false;
1.154 paf 306: int option_bits[2]; regex_options(options, option_bits, need_pre_post_match);
1.172.2.20 paf 307: bool global=option_bits[1]!=0;
1.172.2.21.2. (paf 308:): pcre *code=pcre_compile(pattern, option_bits[0],
1.62 paf 309: &errptr, &erroffset,
1.172.2.1 paf 310: source_charset.pcre_tables);
1.62 paf 311:
1.67 paf 312: if(!code)
1.172.2.21.2. (paf 313:): throw Exception(0,
314:): ®exp.mid(erroffset, regexp.length()),
1.74 paf 315: "regular expression syntax error - %s", errptr);
1.62 paf 316:
1.172.2.20 paf 317: int subpatterns=pcre_info(code, 0, 0);
318: if(subpatterns<0) {
1.100 parser 319: pcre_free(code);
1.149 paf 320: throw Exception(0,
1.172.2.21.2. (paf 321:): ®exp,
1.76 paf 322: "pcre_info error (%d)",
1.172.2.20 paf 323: subpatterns);
1.63 paf 324: }
325:
1.172.2.21.2. (paf 326:): const char* subject=cstr();
327:): size_t subject_length=strlen(subject);
328:): const int oveclength=(1/*match*/+MAX_MATCH_GROUPS)*3;
329:): int ovector[oveclength];
1.155 paf 330:
331: // create table
1.172.2.21.2. (paf 332:): Table* table=new Table(string_match_table_template);
1.63 paf 333:
1.64 paf 334: int exec_option_bits=0;
1.154 paf 335: int prestart=0;
336: int poststart=0;
1.172.2.21.2. (paf 337:): int postfinish=length();
1.63 paf 338: while(true) {
339: int exec_substrings=pcre_exec(code, 0,
1.172.2.21.2. (paf 340:): subject, subject_length, prestart,
341:): exec_option_bits, ovector, oveclength);
1.63 paf 342:
343: if(exec_substrings==PCRE_ERROR_NOMATCH) {
1.100 parser 344: pcre_free(code);
1.172.2.21.2. (paf 345:): row_action(table, 0/*last time, no raw*/, 0, 0, poststart, postfinish, info);
1.172.2.20 paf 346: if(global || subpatterns)
347: return table; // global or with subpatterns=true+result
348: else {
1.172.2.21.2. (paf 349:): just_matched=false; return 0; // not global=no result
1.172.2.20 paf 350: }
1.63 paf 351: }
352:
353: if(exec_substrings<0) {
1.100 parser 354: pcre_free(code);
1.172.2.21.2. (paf 355:): throw Exception(0,
356:): ®exp,
1.76 paf 357: "regular expression execute error (%d)",
1.63 paf 358: exec_substrings);
359: }
360:
1.154 paf 361: int prefinish=ovector[0];
362: poststart=ovector[1];
1.172.2.21.2. (paf 363:): ArrayString* row=new ArrayString;
1.172.2.4 paf 364: if(need_pre_post_match) {
1.172.2.21.2. (paf 365:): *row+=&mid(0, prefinish); // .prematch column value
366:): *row+=&mid(prefinish, poststart); // .match
367:): *row+=&mid(poststart, postfinish); // .postmatch
1.172.2.4 paf 368: } else {
1.172.2.21.2. (paf 369:): *row+=0; // .prematch column value
370:): *row+=0; // .match
371:): *row+=0; // .postmatch
1.172.2.4 paf 372: }
1.63 paf 373:
374: for(int i=1; i<exec_substrings; i++) {
1.69 paf 375: // -1:-1 case handled peacefully by mid() itself
1.172.2.21.2. (paf 376:): *row+=&mid(ovector[i*2+0], ovector[i*2+1]); // .i column value
1.63 paf 377: }
378:
1.172.2.20 paf 379: row_action(table, row, prestart, prefinish, poststart, postfinish, info);
1.63 paf 380:
1.172.2.20 paf 381: if(!global || prestart==poststart) { // not global | going to hang
1.100 parser 382: pcre_free(code);
1.172.2.21.2. (paf 383:): row_action(table, 0/*last time, no row*/, 0, 0, poststart, postfinish, info);
1.172.2.20 paf 384: return table;
1.63 paf 385: }
1.154 paf 386: prestart=poststart;
1.63 paf 387:
388: /*
389: if(option_bits[0] & PCRE_MULTILINE)
1.64 paf 390: exec_option_bits|=PCRE_NOTBOL; // start of subject+startoffset not BOL
1.63 paf 391: */
392: }
1.82 parser 393: }
394:
1.172.2.21.2. (paf 395:): String& String::change_case(Charset& source_charset, Change_case_kind kind) {
396:): String& result=*new String();
1.172.2.4 paf 397:
1.172.2.1 paf 398: const unsigned char *tables=source_charset.pcre_tables;
1.82 parser 399:
400: const unsigned char *a;
401: const unsigned char *b;
402: switch(kind) {
403: case CC_UPPER:
404: a=tables+lcc_offset;
405: b=tables+fcc_offset;
406: break;
407: case CC_LOWER:
408: a=tables+lcc_offset;
409: b=0;
410: break;
411: default:
1.172.2.21.2. (paf 412:): throw Exception(0,
413:): this,
1.82 parser 414: "unknown change case kind #%d",
415: static_cast<int>(kind)); // never
416: a=b=0; // calm, compiler
417: break; // never
418: }
419:
1.172.2.21.2. (paf 420:): char* new_cstr=cstrm();
421:): char *dest=new_cstr;
422:): unsigned char index;
423:): for(const char* current=new_cstr; index=(unsigned char)*current; current++) {
424:): unsigned char c=a[index];
425:): if(b)
426:): c=b[c];
1.82 parser 427:
1.172.2.21.2. (paf 428:): *dest++=(char)c;
429:): }
430:): result.body=new_cstr;
431:): result.fragments.append(fragments, 0, fragments.count());
1.89 parser 432:
1.101 parser 433: return result;
434: }
435:
1.172.2.21.2. (paf 436:): const String& String::replace(const Dictionary& dict) const {
437:): String& result=*new String();
438:): const char* old_cstr=cstr();
1(paf 439:3): const char* prematch_begin=old_cstr;
(paf 440:):
441:): for(const char* current=old_cstr; *current; ) {
1(paf 442:3): if(Table::element_type row=dict.first_that_begins(current)) {
(paf 443:): // prematch
1(paf 444:3): if(size_t prematch_length=current-prematch_begin) {
445:3): result.body=CORD_cat_char_star(result.body, prematch_begin, prematch_length);
446:3): result.fragments.append(fragments, prematch_begin-old_cstr, prematch_length);
1.101 parser 447: }
448:
1.172.2.21.2. (paf 449:): // match
450:):
451:): const String* a=row->get(0);
1(paf 452:3): // skip 'a' in 'current'; move prematch_begin
453:3): current+=a->length(); prematch_begin=current;
1.170 paf 454:
1.172.2.21.2. (paf 455:): if(row->count()>1) { // are there any b?
456:): const String* b=row->get(1);
457:): if(size_t b_length=b->length()) {
458:): result.body=CORD_cat(result.body, b->cstr_to_cord());
459:): result.fragments.append(b->fragments, 0, b->fragments.count());
460:): }
461:): }
462:): } else // simply advance
463:): current++;
464:): }
1.156 paf 465:
1.172.2.21.2. (paf 466:): // postmatch
1(paf 467:3): if(size_t prematch_length=current-prematch_begin) {
468:3): result.body=CORD_cat_char_star(result.body, prematch_begin, prematch_length);
469:3): result.fragments.append(fragments, prematch_begin-old_cstr, prematch_length);
(paf 470:): }
1.156 paf 471:
1.89 parser 472: return result;
473: }
474:
1.172.2.21.2. (paf 475:): double String::as_double() const {
1.89 parser 476: double result;
1.172.2.21.2. (paf 477:): const char *str=cstr();
478:):
479:): while(*str && isspace(*str))
480:): str++;
481:): if(!*str)
1.162 paf 482: return 0;
1.161 paf 483:
1.102 parser 484: char *error_pos;
1.89 parser 485: // 0xABC
1.172.2.21.2. (paf 486:): if(str[0]=='0')
487:): if(str[1]=='x' || str[1]=='X')
488:): result=(double)(unsigned long)strtol(str, &error_pos, 0);
1.99 parser 489: else
1.172.2.21.2. (paf 490:): result=(double)strtod(str+1/*skip leading 0*/, &error_pos);
1.89 parser 491: else
1.172.2.21.2. (paf 492:): result=(double)strtod(str, &error_pos);
1.89 parser 493:
1.159 paf 494: while(char c=*error_pos++)
495: if(!isspace(c))
496: throw Exception("number.format",
1.172.2.21.2. (paf 497:): this,
1.159 paf 498: "invalid number (double)");
1.89 parser 499:
500: return result;
501: }
1.172.2.21.2. (paf 502:): int String::as_int() const {
1.89 parser 503: int result;
1.172.2.21.2. (paf 504:): const char *str=cstr();
505:):
506:): while(*str && isspace(*str))
507:): str++;
508:): if(!*str)
1.162 paf 509: return 0;
1.161 paf 510:
1.102 parser 511: char *error_pos;
1.89 parser 512: // 0xABC
1.172.2.21.2. (paf 513:): if(str[0]=='0')
514:): if(str[1]=='x' || str[1]=='X')
515:): result=(int)(unsigned long)strtol(str, &error_pos, 0);
1.99 parser 516: else
1.172.2.21.2. (paf 517:): result=(int)strtol(str+1/*skip leading 0*/, &error_pos, 0);
1.89 parser 518: else
1.172.2.21.2. (paf 519:): result=(int)strtol(str, &error_pos, 0);
1.89 parser 520:
1.159 paf 521: while(char c=*error_pos++)
522: if(!isspace(c))
523: throw Exception("number.format",
1.172.2.21.2. (paf 524:): this,
1.159 paf 525: "invalid number (int)");
1.82 parser 526:
527: return result;
1.61 paf 528: }
1.113 parser 529:
1.172.2.4 paf 530: inline void uint2uchars(uint word, uchar *bytes) {
531: bytes[0]=word&0xFF;
532: bytes[1]=(word>>8)&0xFF;
533: bytes[2]=(word>>16)&0xFF;
534: bytes[3]=(word>>24)&0xFF;
535: }
536: inline uint uchars2uint(uchar *bytes) {
537: return bytes[3]<<24
538: | bytes[2]<<16
539: | bytes[1]<<8
540: | bytes[0];
541: }
542:
1.172.2.21.2. (paf 543:): static int CORD_batched_iter_fn_append(const char* s, void* client_data) {
544:): char*& cur=*static_cast<char**>(client_data);
545:):
546:): size_t length=strlen(s);
547:): memcpy(cur, s, length); cur+=length;
548:): return 0;
549:): };
550:): String::C String::serialize(size_t prolog_length) const {
551:): size_t buf_length=
552:): prolog_length
553:): +fragments.count()*(sizeof(Untaint_lang)+sizeof(size_t))
554:): +length();
555:): C result(new(PointerFreeGC) char[buf_length], buf_length);
556:):
557:): // 1: prolog
558:): char *cur=result.str+prolog_length;
559:):
560:):
561:): // 2: fragments.count
562:): size_t fragments_count=fragments.count();
563:): memcpy(cur, &fragments_count, sizeof(fragments_count)); cur+=sizeof(fragments_count);
1.113 parser 564:
1.172.2.21.2. (paf 565:): // 3: lang info
566:): for(Array_iterator<ArrayFragment::element_type> i(fragments); i.has_next(); ) {
567:): const Fragment& fragment=i.next();
1.123 paf 568: // lang
1.172.2.21.2. (paf 569:): memcpy(cur, &fragment.lang, sizeof(fragment.lang)); cur+=sizeof(fragment.lang);
570:): // length
571:): memcpy(cur, &fragment.length, sizeof(fragment.length)); cur+=sizeof(fragment.length);
572:): }
573:):
574:): // 4: letters
575:): CORD_iter5(body, 0, 0, CORD_batched_iter_fn_append, &cur);
576:):
577:): return result;
1.113 parser 578: }
1.172.2.21.2. (paf 579:): bool String::deserialize(size_t prolog_length, void *buf, size_t buf_length, const char* file) {
580:): if(buf_length<=prolog_length)
1.148 paf 581: return false;
1.172.2.21.2. (paf 582:): buf_length-=prolog_length;
1.113 parser 583:
1.172.2.21.2. (paf 584:): // 1: prolog
585:): const char* cur=(const char* )buf+prolog_length;
1.148 paf 586:
1.172.2.21.2. (paf 587:): // 2: fragments.count
588:): if(buf_length<sizeof(size_t)) // fragments.count don't fit?
589:): return false;
590:): size_t fragments_count=*reinterpret_cast<const size_t*>(cur); cur+=sizeof(size_t);
591:): buf_length-=sizeof(size_t);
1.128 paf 592:
1.172.2.21.2. (paf 593:): // 3: lang info
594:): size_t total_length=0;
595:): for(size_t f=0; f<fragments_count; f++) {
596:): size_t piece_length=sizeof(Untaint_lang)+sizeof(size_t);
597:): if(buf_length<piece_length) // lang+length
1.148 paf 598: return false;
599:
1.172.2.21.2. (paf 600:): Untaint_lang lang=*reinterpret_cast<const Untaint_lang *>(cur); cur+=sizeof(Untaint_lang);
601:): size_t fragment_length=*reinterpret_cast<const size_t*>(cur); cur+=sizeof(size_t);
602:): fragments+=Fragment(lang, fragment_length);
603:): total_length+=fragment_length;
1.113 parser 604:
1.172.2.21.2. (paf 605:): buf_length-=piece_length;
1.113 parser 606: }
1.172.2.21.2. (paf 607:):
608:): // 4: letters
609:): if(buf_length!=total_length)
610:): return false;
611:):
612:): body=CORD_cat_char_star(CORD_EMPTY, cur, buf_length);
613:):
1.148 paf 614: return true;
1.113 parser 615: }
E-mail: