Annotation of parser3/src/main/pa_string.C, revision 1.172.2.21.2.26
1.45 paf 1: /** @file
1.172.2.21.2. (paf 2:): Parser: string class. @see untalength_t.C.
1.46 paf 3:
1.172.2.11 paf 4: Copyright (c) 2001-2003 ArtLebedev Group (http://www.artlebedev.com)
1.138 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.164 paf 6: */
1.46 paf 7:
1.172.2.21.2. 6(paf 8:3): static const char* IDENT_STRING_C="$Date: 2003/03/24 15:56:59 $";
1.4 paf 9:
1.70 paf 10: #include "pcre.h"
11:
1.12 paf 12: #include "pa_string.h"
1.22 paf 13: #include "pa_exception.h"
1.61 paf 14: #include "pa_table.h"
1.101 parser 15: #include "pa_dictionary.h"
1.132 paf 16: #include "pa_charset.h"
1.60 paf 17:
1.172.2.2 paf 18: // helpers
1.139 paf 19:
1.172.2.2 paf 20: /// String::match uses this as replace & global search table columns
1.139 paf 21:
1.172.2.4 paf 22: const int MAX_MATCH_GROUPS=100;
23:
1.172.2.14 paf 24: class String_match_table_template_columns: public ArrayString {
1.172.2.2 paf 25: public:
1.172.2.4 paf 26: String_match_table_template_columns() {
1.172.2.21.2. (paf 27:): *this+=new String("prematch");
28:): *this+=new String("match");
29:): *this+=new String("postmatch");
1.172.2.4 paf 30: for(int i=0; i<MAX_MATCH_GROUPS; i++) {
1.172.2.21.2. (paf 31:): char *cname=new(PointerFreeGC) char[3/*strlen("100")*/+1/*terminating 0*/];
32:): *this+=new String(cname, sprintf(cname, "%d", 1+i));
1.172.2.2 paf 33: }
1.172.2.21 paf 34: }
1.172.2.4 paf 35: };
36:
1.172.2.21.2. (paf 37:): Table string_match_table_template(new String_match_table_template_columns);
1.172.2.2 paf 38:
1.172.2.21.2. (paf 39:): // String::ArrayFragment methods
40:):
3(paf 41:3): void String::ArrayFragment::append_positions(const ArrayFragment& src,
42:3): size_t substr_begin, size_t substr_end) {
1(paf 43:3): if(substr_begin==substr_end)
3(paf 44:3): return;
(paf 45:):
1(paf 46:3): size_t fragment_begin=0;
(paf 47:): size_t fragment_end;
1(paf 48:3): for(Array_iterator<element_type> i(src); ; fragment_begin=fragment_end) {
(paf 49:): const element_type& fragment=i.next();
1(paf 50:3): fragment_end=fragment_begin+fragment.length;
(paf 51:):
5(paf 52:3): if(substr_begin<fragment_begin) // not reached fragments which may include 'substr'?
(paf 53:): continue;
54:):
6(paf 55:3): if(substr_begin>fragment_end) // not reached fragments which may include 'substr'?
56:3): continue;
57:3):
(paf 58:): // found first fragment including piece of 'substr'
59:): size_t piece_end=min(substr_end, fragment_end);
1(paf 60:3): *this+=Fragment(fragment.lang, piece_end-substr_begin);
(paf 61:):
62:): while(substr_end>fragment_end) { // are there more fragments including pieces of 'substr'?
1(paf 63:3): fragment_begin=fragment_end;
64:3):
(paf 65:): const element_type& fragment=i.next();
1(paf 66:3): fragment_end=fragment_begin+fragment.length;
(paf 67:):
68:): if(substr_end>fragment_end) // are there still more?
3(paf 69:3): append(Fragment(fragment.lang, fragment.length)); // appending whole fragment
(paf 70:): else { // no, it was last
3(paf 71:3): append(Fragment(fragment.lang, substr_end-fragment_begin));
72:3): return;
(paf 73:): }
74:): }
75:):
76:): break;
77:): }
78:): }
79:):
5(paf 80:3): // StringSimple methods
81:3):
82:3):
3(paf 83:3): static int CORD_batched_iter_fn_generic_hash_code(char c, void * client_data) {
84:3): uint& result=*static_cast<uint*>(client_data);
85:3): generic_hash_code(result, c);
86:3): return 0;
87:3): }
5(paf 88:3): static int CORD_batched_iter_fn_generic_hash_code(const char* s, void * client_data) {
89:3): uint& result=*static_cast<uint*>(client_data);
90:3): generic_hash_code(result, s);
91:3): return 0;
92:3): };
7(paf 93:3): uint StringBody::hash_code() const {
5(paf 94:3): uint result=0;
3(paf 95:3): CORD_iter5(body, 0,
96:3): CORD_batched_iter_fn_generic_hash_code,
97:3): CORD_batched_iter_fn_generic_hash_code, &result);
5(paf 98:3): return result;
99:3): }
100:3):
(paf 101:): // String methods
1.172.2.2 paf 102:
1.172.2.21.2. (paf 103:): String::String(const char* cstr, size_t helper_length, bool tainted): body(CORD_EMPTY) {
5(paf 104:3): append(cstr, helper_length, tainted?L_TAINTED:L_CLEAN);
1.1 paf 105: }
1.140 paf 106:
1.172.2.21.2. (paf 107:): String::String(const String& src): body(src.body), fragments(src.fragments) {}
1.28 paf 108:
1.172.2.21.2. 6(paf 109:3): String& String::append(const char* str, size_t length, Language lang) {
3(paf 110:3): if(!str)
1.9 paf 111: return *this;
1.172.2.21.2. 6(paf 112:3): size_t use_length=length?length:strlen(str);
0(paf 113:3): if(!use_length)
1.9 paf 114: return *this;
1.122 paf 115:
1.172.2.21.2. 7(paf 116:3): body.append(str, use_length);
0(paf 117:3): fragments+=Fragment(lang, use_length);
1.1 paf 118:
119: return *this;
120: }
121:
1.172.2.21.2. 1(paf 122:3): /// @todo check in doc: whether it documents NOW bad situation "abc".mid(-1, 3) =were?="ab"
8(paf 123:3): String& String::mid(size_t substr_begin, size_t substr_end) const {
(paf 124:): String& result=*new String;
1.33 paf 125:
1.172.2.21.2. 1(paf 126:3): size_t self_length=length();
127:3): substr_begin=max(min(substr_begin, self_length), (size_t)0);
128:3): substr_end=min(max(substr_end, substr_begin), self_length);
129:3): if(substr_begin==substr_end)
(paf 130:): return result;
1.52 paf 131:
1.172.2.21.2. (paf 132:): // first: letters themselves
7(paf 133:3): result.body=body.mid(substr_begin, substr_end-substr_begin);
1.46 paf 134:
1.172.2.21.2. (paf 135:): // next: their langs
3(paf 136:3): result.fragments.append_positions(fragments, substr_begin, substr_end);
1.53 paf 137:
1.172.2.21.2. 5(paf 138:3): // SAPI::log("piece of '%s' from %d to %d is '%s'",
1(paf 139:3): //cstr(), substr_begin, substr_end, result.cstr());
1.53 paf 140: return result;
1.54 paf 141: }
142:
1.172.2.21.2. 7(paf 143:3): size_t String::pos(const StringBody substr,
5(paf 144:3): size_t this_offset, Language lang) const {
(paf 145:): // first: letters themselves
2(paf 146:3): size_t substr_begin=body.pos(substr, this_offset);
1(paf 147:3): if(substr_begin==CORD_NOT_FOUND)
(paf 148:): return STRING_NOT_FOUND;
149:):
150:): // next: check the lang when specified
151:):
5(paf 152:3): if(lang==L_UNSPECIFIED) // ignore lang?
1(paf 153:3): return substr_begin;
(paf 154:):
155:): // substr must be in one fragment, and fragments' lang must = lang
7(paf 156:3): size_t substr_end=substr_begin+substr.length();
1(paf 157:3): size_t fragment_begin=0;
(paf 158:): size_t fragment_end;
1(paf 159:3): for(Array_iterator<ArrayFragment::element_type> i(fragments); i.has_next(); fragment_begin=fragment_end) {
(paf 160:): const Fragment& fragment=i.next();
1(paf 161:3): fragment_end=fragment_begin+fragment.length;
(paf 162:):
1(paf 163:3): if(substr_begin<fragment_begin) // not reached fragments which may include 'result'?
(paf 164:): continue;
2(paf 165:3): if(substr_begin>=fragment_end) // begin of substr OUT of current fragment?
166:3): continue;
(paf 167:):
168:): if(substr_end>fragment_end) // end of substr OUT of current fragment?
2(paf 169:3): throw Exception(0, // (*) see below
170:3): this,
171:3): "searching for '%s' starting from %ud problem: found begin in one fragment, but end in another",
7(paf 172:3): substr.cstr(), this_offset);
(paf 173:):
4(paf 174:3): if(fragment.lang<=lang)
2(paf 175:3): return substr_begin;
176:3): else { // bad lang...
177:3): /// WARNING: this possibly skips assert (*), but it's fast
178:3): substr_begin=body.pos(substr, fragment_end/*...search AFTER for more*/);
179:3): if(substr_begin==CORD_NOT_FOUND)
180:3): return STRING_NOT_FOUND;
181:3):
7(paf 182:3): size_t substr_end=substr_begin+substr.length();
2(paf 183:3): // and continuing with next fragment
184:3): }
(paf 185:): }
186:):
187:): return STRING_NOT_FOUND;
1.58 paf 188: }
189:
1.172.2.21.2. (paf 190:): size_t String::pos(const String& substr,
5(paf 191:3): size_t this_offset, Language lang) const {
(paf 192:): return pos(substr.body, this_offset, lang);
1.60 paf 193: }
194:
1.172.2.14 paf 195: void String::split(ArrayString& result,
1.172.2.21.2. (paf 196:): size_t& pos_after,
197:): const char* delim,
2(paf 198:3): Language lang, int limit) const {
(paf 199:): size_t self_length=length();
200:): if(size_t delim_length=strlen(delim)) {
1.60 paf 201: int pos_before;
202: // while we have 'delim'...
1.172.2.21.2. (paf 203:): for(; (pos_before=pos(delim, pos_after, lang))>=0 && limit; limit--) {
204:): result+=&mid(pos_after, pos_before);
205:): pos_after=pos_before+delim_length;
1.60 paf 206: }
207: // last piece
1.172.2.21.2. (paf 208:): if(pos_after<self_length && limit) {
209:): result+=&mid(pos_after, self_length);
210:): pos_after=self_length;
1.60 paf 211: }
212: } else { // empty delim
1.172.2.21.2. (paf 213:): result+=this;
214:): pos_after+=self_length;
1.60 paf 215: }
216: }
217:
1.172.2.14 paf 218: void String::split(ArrayString& result,
1.172.2.21.2. (paf 219:): size_t& pos_after,
5(paf 220:3): const String& delim, Language lang,
(paf 221:): int limit) const {
5(paf 222:3): if(!delim.is_empty()) {
1.60 paf 223: int pos_before;
224: // while we have 'delim'...
225: for(; (pos_before=pos(delim, pos_after, lang))>=0 && limit; limit--) {
1.172.2.21.2. (paf 226:): result+=&mid(pos_after, pos_before);
227:): pos_after=pos_before+delim.length();
1.60 paf 228: }
229: // last piece
1.172.2.21.2. (paf 230:): if(pos_after<length() && limit) {
231:): result+=&mid(pos_after, length());
232:): pos_after=length();
1.60 paf 233: }
234: } else { // empty delim
1.172.2.21.2. (paf 235:): result+=this;
236:): pos_after+=length();
1.60 paf 237: }
1.61 paf 238: }
239:
1.172.2.21.2. 2(paf 240:3): static void regex_options(const String* options, int *result, bool& need_pre_post_match){
1.63 paf 241: struct Regex_option {
1.172.2.11 paf 242: const char* keyL;
243: const char* keyU;
1.63 paf 244: int clear, set;
245: int *result;
1.154 paf 246: bool *flag;
1.63 paf 247: } regex_option[]={
1.153 paf 248: {"i", "I", 0, PCRE_CASELESS, result}, // a=A
249: {"s", "S", 0, PCRE_DOTALL, result}, // \n\n$ [default]
250: {"x", "U", 0, PCRE_EXTENDED, result}, // whitespace in regex ignored
251: {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$
252: {"g", "G", 0, true, result+1}, // many rows
1.154 paf 253: {"'", 0, 0, 0, 0, &need_pre_post_match},
254: {0}
1.63 paf 255: };
1.171 paf 256: result[0]=PCRE_EXTRA | PCRE_DOTALL | PCRE_DOLLAR_ENDONLY;
1.63 paf 257: result[1]=0;
258:
1.172.2.21.2. 2(paf 259:3): if(options && !options->is_empty())
1.153 paf 260: for(Regex_option *o=regex_option; o->keyL; o++)
1.172.2.21.2. 2(paf 261:3): if(options->pos(o->keyL)>=0
262:3): || (o->keyU && options->pos(o->keyU)>=0)) {
1.154 paf 263: if(o->flag)
264: *o->flag=true;
265: else { // result
266: *o->result &= ~o->clear;
267: *o->result |= o->set;
268: }
1.63 paf 269: }
270: }
271:
1.172.2.21.2. (paf 272:): Table* String::match(Charset& source_charset,
1.172.2.4 paf 273: const String& regexp,
1.172.2.21.2. 2(paf 274:3): const String* options,
1.172.2.4 paf 275: Row_action row_action, void *info,
1.172.2.20 paf 276: bool& just_matched) const {
1.172.2.21.2. 5(paf 277:3): if(regexp.is_empty())
(paf 278:): throw Exception(0,
279:): 0,
1.73 paf 280: "regexp is empty");
1.154 paf 281:
1.172.2.21.2. (paf 282:): const char* pattern=regexp.cstr();
1.172.2.11 paf 283: const char* errptr;
1.62 paf 284: int erroffset;
1.172.2.21.2. (paf 285:): bool need_pre_post_match=false;
1.154 paf 286: int option_bits[2]; regex_options(options, option_bits, need_pre_post_match);
1.172.2.20 paf 287: bool global=option_bits[1]!=0;
1.172.2.21.2. (paf 288:): pcre *code=pcre_compile(pattern, option_bits[0],
1.62 paf 289: &errptr, &erroffset,
1.172.2.1 paf 290: source_charset.pcre_tables);
1.62 paf 291:
1.67 paf 292: if(!code)
1.172.2.21.2. (paf 293:): throw Exception(0,
294:): ®exp.mid(erroffset, regexp.length()),
1.74 paf 295: "regular expression syntax error - %s", errptr);
1.62 paf 296:
1.172.2.20 paf 297: int subpatterns=pcre_info(code, 0, 0);
298: if(subpatterns<0) {
1.100 parser 299: pcre_free(code);
1.149 paf 300: throw Exception(0,
1.172.2.21.2. (paf 301:): ®exp,
1.76 paf 302: "pcre_info error (%d)",
1.172.2.20 paf 303: subpatterns);
1.63 paf 304: }
305:
1.172.2.21.2. (paf 306:): const char* subject=cstr();
307:): size_t subject_length=strlen(subject);
308:): const int oveclength=(1/*match*/+MAX_MATCH_GROUPS)*3;
309:): int ovector[oveclength];
1.155 paf 310:
311: // create table
1.172.2.21.2. 1(paf 312:3): Table& table=*new Table(string_match_table_template);
1.63 paf 313:
1.64 paf 314: int exec_option_bits=0;
1.154 paf 315: int prestart=0;
316: int poststart=0;
1.172.2.21.2. (paf 317:): int postfinish=length();
1.63 paf 318: while(true) {
319: int exec_substrings=pcre_exec(code, 0,
1.172.2.21.2. (paf 320:): subject, subject_length, prestart,
321:): exec_option_bits, ovector, oveclength);
1.63 paf 322:
323: if(exec_substrings==PCRE_ERROR_NOMATCH) {
1.100 parser 324: pcre_free(code);
1.172.2.21.2. (paf 325:): row_action(table, 0/*last time, no raw*/, 0, 0, poststart, postfinish, info);
1.172.2.20 paf 326: if(global || subpatterns)
1.172.2.21.2. 2(paf 327:3): return &table; // global or with subpatterns=true+result
1.172.2.20 paf 328: else {
1.172.2.21.2. (paf 329:): just_matched=false; return 0; // not global=no result
1.172.2.20 paf 330: }
1.63 paf 331: }
332:
333: if(exec_substrings<0) {
1.100 parser 334: pcre_free(code);
1.172.2.21.2. (paf 335:): throw Exception(0,
336:): ®exp,
1.76 paf 337: "regular expression execute error (%d)",
1.63 paf 338: exec_substrings);
339: }
340:
1.154 paf 341: int prefinish=ovector[0];
342: poststart=ovector[1];
1.172.2.21.2. (paf 343:): ArrayString* row=new ArrayString;
1.172.2.4 paf 344: if(need_pre_post_match) {
1.172.2.21.2. (paf 345:): *row+=&mid(0, prefinish); // .prematch column value
346:): *row+=&mid(prefinish, poststart); // .match
347:): *row+=&mid(poststart, postfinish); // .postmatch
1.172.2.4 paf 348: } else {
1.172.2.21.2. (paf 349:): *row+=0; // .prematch column value
350:): *row+=0; // .match
351:): *row+=0; // .postmatch
1.172.2.4 paf 352: }
1.63 paf 353:
354: for(int i=1; i<exec_substrings; i++) {
1.69 paf 355: // -1:-1 case handled peacefully by mid() itself
1.172.2.21.2. (paf 356:): *row+=&mid(ovector[i*2+0], ovector[i*2+1]); // .i column value
1.63 paf 357: }
358:
1.172.2.20 paf 359: row_action(table, row, prestart, prefinish, poststart, postfinish, info);
1.63 paf 360:
1.172.2.20 paf 361: if(!global || prestart==poststart) { // not global | going to hang
1.100 parser 362: pcre_free(code);
1.172.2.21.2. (paf 363:): row_action(table, 0/*last time, no row*/, 0, 0, poststart, postfinish, info);
2(paf 364:3): return &table;
1.63 paf 365: }
1.154 paf 366: prestart=poststart;
1.63 paf 367:
368: /*
369: if(option_bits[0] & PCRE_MULTILINE)
1.64 paf 370: exec_option_bits|=PCRE_NOTBOL; // start of subject+startoffset not BOL
1.63 paf 371: */
372: }
1.82 parser 373: }
374:
1.172.2.21.2. 0(paf 375:3): String& String::change_case(Charset& source_charset, Change_case_kind kind) const {
(paf 376:): String& result=*new String();
1.172.2.4 paf 377:
1.172.2.1 paf 378: const unsigned char *tables=source_charset.pcre_tables;
1.82 parser 379:
380: const unsigned char *a;
381: const unsigned char *b;
382: switch(kind) {
383: case CC_UPPER:
384: a=tables+lcc_offset;
385: b=tables+fcc_offset;
386: break;
387: case CC_LOWER:
388: a=tables+lcc_offset;
389: b=0;
390: break;
391: default:
1.172.2.21.2. (paf 392:): throw Exception(0,
393:): this,
1.82 parser 394: "unknown change case kind #%d",
395: static_cast<int>(kind)); // never
396: a=b=0; // calm, compiler
397: break; // never
398: }
399:
1.172.2.21.2. (paf 400:): char* new_cstr=cstrm();
401:): char *dest=new_cstr;
402:): unsigned char index;
403:): for(const char* current=new_cstr; index=(unsigned char)*current; current++) {
404:): unsigned char c=a[index];
405:): if(b)
406:): c=b[c];
1.82 parser 407:
1.172.2.21.2. (paf 408:): *dest++=(char)c;
409:): }
410:): result.body=new_cstr;
3(paf 411:3): result.fragments.append(fragments);
1.89 parser 412:
1.101 parser 413: return result;
414: }
415:
1.172.2.21.2. (paf 416:): const String& String::replace(const Dictionary& dict) const {
417:): String& result=*new String();
418:): const char* old_cstr=cstr();
1(paf 419:3): const char* prematch_begin=old_cstr;
(paf 420:):
421:): for(const char* current=old_cstr; *current; ) {
1(paf 422:3): if(Table::element_type row=dict.first_that_begins(current)) {
(paf 423:): // prematch
1(paf 424:3): if(size_t prematch_length=current-prematch_begin) {
7(paf 425:3): result.body.append(prematch_begin, prematch_length);
4(paf 426:3): result.fragments.append_positions(fragments, prematch_begin-old_cstr, current-old_cstr);
1.101 parser 427: }
428:
1.172.2.21.2. (paf 429:): // match
430:):
431:): const String* a=row->get(0);
1(paf 432:3): // skip 'a' in 'current'; move prematch_begin
433:3): current+=a->length(); prematch_begin=current;
1.170 paf 434:
1.172.2.21.2. (paf 435:): if(row->count()>1) { // are there any b?
436:): const String* b=row->get(1);
437:): if(size_t b_length=b->length()) {
7(paf 438:3): result.body.append(b->cstr_to_string_body());
3(paf 439:3): result.fragments.append(b->fragments);
(paf 440:): }
441:): }
442:): } else // simply advance
443:): current++;
444:): }
1.156 paf 445:
1.172.2.21.2. (paf 446:): // postmatch
3(paf 447:3): if(size_t postmatch_length=current-prematch_begin) {
7(paf 448:3): result.body.append(prematch_begin, postmatch_length);
4(paf 449:3): result.fragments.append_positions(fragments, prematch_begin-old_cstr, current-old_cstr);
(paf 450:): }
1.156 paf 451:
1.89 parser 452: return result;
453: }
454:
1.172.2.21.2. (paf 455:): double String::as_double() const {
1.89 parser 456: double result;
1.172.2.21.2. (paf 457:): const char *str=cstr();
458:):
459:): while(*str && isspace(*str))
460:): str++;
461:): if(!*str)
1.162 paf 462: return 0;
1.161 paf 463:
1.102 parser 464: char *error_pos;
1.89 parser 465: // 0xABC
1.172.2.21.2. (paf 466:): if(str[0]=='0')
467:): if(str[1]=='x' || str[1]=='X')
468:): result=(double)(unsigned long)strtol(str, &error_pos, 0);
1.99 parser 469: else
1.172.2.21.2. (paf 470:): result=(double)strtod(str+1/*skip leading 0*/, &error_pos);
1.89 parser 471: else
1.172.2.21.2. (paf 472:): result=(double)strtod(str, &error_pos);
1.89 parser 473:
1.159 paf 474: while(char c=*error_pos++)
475: if(!isspace(c))
476: throw Exception("number.format",
1.172.2.21.2. (paf 477:): this,
1.159 paf 478: "invalid number (double)");
1.89 parser 479:
480: return result;
481: }
1.172.2.21.2. (paf 482:): int String::as_int() const {
1.89 parser 483: int result;
1.172.2.21.2. (paf 484:): const char *str=cstr();
485:):
486:): while(*str && isspace(*str))
487:): str++;
488:): if(!*str)
1.162 paf 489: return 0;
1.161 paf 490:
1.102 parser 491: char *error_pos;
1.89 parser 492: // 0xABC
1.172.2.21.2. (paf 493:): if(str[0]=='0')
494:): if(str[1]=='x' || str[1]=='X')
495:): result=(int)(unsigned long)strtol(str, &error_pos, 0);
1.99 parser 496: else
1.172.2.21.2. (paf 497:): result=(int)strtol(str+1/*skip leading 0*/, &error_pos, 0);
1.89 parser 498: else
1.172.2.21.2. (paf 499:): result=(int)strtol(str, &error_pos, 0);
1.89 parser 500:
1.159 paf 501: while(char c=*error_pos++)
502: if(!isspace(c))
503: throw Exception("number.format",
1.172.2.21.2. (paf 504:): this,
1.159 paf 505: "invalid number (int)");
1.82 parser 506:
507: return result;
1.61 paf 508: }
1.113 parser 509:
1.172.2.4 paf 510: inline void uint2uchars(uint word, uchar *bytes) {
511: bytes[0]=word&0xFF;
512: bytes[1]=(word>>8)&0xFF;
513: bytes[2]=(word>>16)&0xFF;
514: bytes[3]=(word>>24)&0xFF;
515: }
516: inline uint uchars2uint(uchar *bytes) {
517: return bytes[3]<<24
518: | bytes[2]<<16
519: | bytes[1]<<8
520: | bytes[0];
521: }
522:
1.172.2.21.2. 7(paf 523:3): static int serialize_body_piece(const char* s, char** cur) {
(paf 524:): size_t length=strlen(s);
7(paf 525:3): memcpy(*cur, s, length); *cur+=length;
(paf 526:): return 0;
527:): };
528:): String::C String::serialize(size_t prolog_length) const {
529:): size_t buf_length=
530:): prolog_length
5(paf 531:3): +fragments.count()*(sizeof(Language)+sizeof(size_t))
(paf 532:): +length();
533:): C result(new(PointerFreeGC) char[buf_length], buf_length);
534:):
535:): // 1: prolog
536:): char *cur=result.str+prolog_length;
537:):
538:):
539:): // 2: fragments.count
540:): size_t fragments_count=fragments.count();
541:): memcpy(cur, &fragments_count, sizeof(fragments_count)); cur+=sizeof(fragments_count);
1.113 parser 542:
1.172.2.21.2. (paf 543:): // 3: lang info
544:): for(Array_iterator<ArrayFragment::element_type> i(fragments); i.has_next(); ) {
545:): const Fragment& fragment=i.next();
1.123 paf 546: // lang
1.172.2.21.2. (paf 547:): memcpy(cur, &fragment.lang, sizeof(fragment.lang)); cur+=sizeof(fragment.lang);
548:): // length
549:): memcpy(cur, &fragment.length, sizeof(fragment.length)); cur+=sizeof(fragment.length);
550:): }
551:):
552:): // 4: letters
7(paf 553:3): body.for_each(serialize_body_piece, &cur);
(paf 554:):
555:): return result;
1.113 parser 556: }
1.172.2.21.2. 9(paf 557:3): bool String::deserialize(size_t prolog_length, void *buf, size_t buf_length) {
(paf 558:): if(buf_length<=prolog_length)
1.148 paf 559: return false;
1.172.2.21.2. (paf 560:): buf_length-=prolog_length;
1.113 parser 561:
1.172.2.21.2. (paf 562:): // 1: prolog
563:): const char* cur=(const char* )buf+prolog_length;
1.148 paf 564:
1.172.2.21.2. (paf 565:): // 2: fragments.count
566:): if(buf_length<sizeof(size_t)) // fragments.count don't fit?
567:): return false;
568:): size_t fragments_count=*reinterpret_cast<const size_t*>(cur); cur+=sizeof(size_t);
569:): buf_length-=sizeof(size_t);
1.128 paf 570:
1.172.2.21.2. (paf 571:): // 3: lang info
572:): size_t total_length=0;
573:): for(size_t f=0; f<fragments_count; f++) {
5(paf 574:3): size_t piece_length=sizeof(Language)+sizeof(size_t);
(paf 575:): if(buf_length<piece_length) // lang+length
1.148 paf 576: return false;
577:
1.172.2.21.2. 5(paf 578:3): Language lang=*reinterpret_cast<const Language *>(cur); cur+=sizeof(Language);
(paf 579:): size_t fragment_length=*reinterpret_cast<const size_t*>(cur); cur+=sizeof(size_t);
580:): fragments+=Fragment(lang, fragment_length);
581:): total_length+=fragment_length;
1.113 parser 582:
1.172.2.21.2. (paf 583:): buf_length-=piece_length;
1.113 parser 584: }
1.172.2.21.2. (paf 585:):
586:): // 4: letters
587:): if(buf_length!=total_length)
588:): return false;
589:):
590:): body=CORD_cat_char_star(CORD_EMPTY, cur, buf_length);
591:):
1.148 paf 592: return true;
1.113 parser 593: }
E-mail: