Annotation of parser3/src/main/pa_string.C, revision 1.195
1.45 paf 1: /** @file
1.174 paf 2: Parser: string class. @see untalength_t.C.
1.46 paf 3:
1.194 paf 4: Copyright (c) 2001-2004 ArtLebedev Group (http://www.artlebedev.com)
1.138 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.164 paf 6: */
1.46 paf 7:
1.195 ! paf 8: static const char * const IDENT_STRING_C="$Date: 2004/02/11 15:33:16 $";
1.4 paf 9:
1.70 paf 10: #include "pcre.h"
11:
1.12 paf 12: #include "pa_string.h"
1.22 paf 13: #include "pa_exception.h"
1.61 paf 14: #include "pa_table.h"
1.101 parser 15: #include "pa_dictionary.h"
1.132 paf 16: #include "pa_charset.h"
1.60 paf 17:
1.185 paf 18: const String String::Empty;
19:
1.193 paf 20: int pa_atoi(const char* str, const String* problem_source) {
21: if(!str)
22: return 0;
23:
24: while(*str && isspace(*str))
25: str++;
26: if(!*str)
27: return 0;
28:
29: int result;
30: char *error_pos;
31: // 0xABC
32: if(str[0]=='0')
33: if(str[1]=='x' || str[1]=='X')
34: result=(int)(unsigned long)strtol(str, &error_pos, 0);
35: else
36: result=(int)strtol(str+1/*skip leading 0*/, &error_pos, 0);
37: else
38: result=(int)strtol(str, &error_pos, 0);
39:
40: while(char c=*error_pos++)
41: if(!isspace(c))
42: throw Exception("number.format",
43: problem_source,
44: problem_source?"invalid number (int)": "'%s' is invalid number (int)", str);
45:
46: return result;
47: }
48:
49: double pa_atod(const char* str, const String* problem_source) {
50: if(!str)
51: return 0;
52:
53: while(*str && isspace(*str))
54: str++;
55: if(!*str)
56: return 0;
57:
58: double result;
59: char *error_pos;
60: // 0xABC
61: if(str[0]=='0')
62: if(str[1]=='x' || str[1]=='X')
63: result=(double)(unsigned long)strtol(str, &error_pos, 0);
64: else
65: result=(double)strtod(str+1/*skip leading 0*/, &error_pos);
66: else
67: result=(double)strtod(str, &error_pos);
68:
69: while(char c=*error_pos++)
70: if(!isspace(c))
71: throw Exception("number.format",
72: problem_source,
73: problem_source?"invalid number (double)": "'%s' is invalid number (double)", str);
74:
75: return result;
76: }
77:
1.176 paf 78: // cord lib extension
79:
80: #ifndef DOXYGEN
81: typedef struct {
82: ssize_t countdown;
83: char target; /* Character we're looking for */
84: } chr_data;
85: #endif
86: static int CORD_range_contains_chr_greater_then_proc(char c, size_t size, void* client_data)
87: {
88: register chr_data * d = (chr_data *)client_data;
89:
90: if (d -> countdown<=0) return(2);
91: d -> countdown -= size;
92: if (c > d -> target) return(1);
93: return(0);
94: }
95: int CORD_range_contains_chr_greater_then(CORD x, size_t i, size_t n, int c)
96: {
97: chr_data d;
98:
99: d.countdown = n;
100: d.target = c;
101: return(CORD_block_iter(x, i, CORD_range_contains_chr_greater_then_proc, &d) == 1/*alternatives: 0 normally ended, 2=struck 'n'*/);
102: }
103:
1.187 paf 104: static int CORD_block_count_proc(char /*c*/, size_t /*size*/, void* client_data)
1.178 paf 105: {
106: int* result=(int*)client_data;
107: (*result)++;
108: return(0); // 0=continue
109: }
110: size_t CORD_block_count(CORD x)
111: {
112: size_t result=0;
113: CORD_block_iter(x, 0, CORD_block_count_proc, &result);
114: return result;
115: }
116:
1.174 paf 117: // helpers
1.139 paf 118:
1.174 paf 119: /// String::match uses this as replace & global search table columns
1.139 paf 120:
1.174 paf 121: const int MAX_MATCH_GROUPS=100;
1.139 paf 122:
1.174 paf 123: class String_match_table_template_columns: public ArrayString {
124: public:
125: String_match_table_template_columns() {
126: *this+=new String("prematch");
127: *this+=new String("match");
128: *this+=new String("postmatch");
129: for(int i=0; i<MAX_MATCH_GROUPS; i++) {
1.176 paf 130: *this+=new String(String::Body::Format(1+i), String::L_CLEAN);
1.174 paf 131: }
132: }
133: };
134:
135: Table string_match_table_template(new String_match_table_template_columns);
136:
1.176 paf 137: // String::Body methods
1.140 paf 138:
1.176 paf 139: String::Body String::Body::Format(int value) {
1.174 paf 140: char local[MAX_NUMBER];
141: size_t length=snprintf(local, MAX_NUMBER, "%d", value);
1.176 paf 142: return String::Body(pa_strdup(local, length), length);
1.120 paf 143: }
144:
1.195 ! paf 145: String::Body String::Body::trim(String::Trim_kind kind, const char* chars,
! 146: size_t* out_start, size_t* out_length) const {
! 147: size_t our_length=length();
! 148: if(!our_length)
! 149: return *this;
! 150: if(!chars)
! 151: chars=" \t\n"; // white space
! 152: Body result=*this;
! 153:
! 154: size_t start=0;
! 155: size_t end=our_length;
! 156: bool trim_start=(kind!=TRIM_END);
! 157: if(trim_start) {
! 158: CORD_pos pos; set_pos(pos, 0);
! 159: while(true) {
! 160: char c=CORD_pos_fetch(pos);
! 161: if(strchr(chars, c)) {
! 162: if(++start==our_length)
! 163: return 0; // all chars are empty, just return empty string
! 164: } else
! 165: break;
! 166:
! 167: CORD_next(pos);
! 168: }
! 169: }
! 170: // todo справа
! 171:
! 172: if(start==0 && end==our_length) // nobody moved a thing
! 173: return *this;
! 174:
! 175: if(out_start)
! 176: *out_start=start;
! 177: size_t new_length=end-start;
! 178: if(out_length)
! 179: *out_length=new_length;
! 180:
! 181: return mid(start, new_length);
! 182: }
! 183:
1.174 paf 184: static int CORD_batched_iter_fn_generic_hash_code(char c, void * client_data) {
185: uint& result=*static_cast<uint*>(client_data);
186: generic_hash_code(result, c);
187: return 0;
188: }
189: static int CORD_batched_iter_fn_generic_hash_code(const char* s, void * client_data) {
190: uint& result=*static_cast<uint*>(client_data);
191: generic_hash_code(result, s);
192: return 0;
193: };
1.176 paf 194: uint String::Body::hash_code() const {
1.174 paf 195: uint result=0;
196: CORD_iter5(body, 0,
197: CORD_batched_iter_fn_generic_hash_code,
198: CORD_batched_iter_fn_generic_hash_code, &result);
1.120 paf 199: return result;
1.94 parser 200: }
201:
1.174 paf 202: // String methods
203:
204: String::String(const char* cstr, size_t helper_length, bool tainted): body(CORD_EMPTY) {
205: append_help_length(cstr, helper_length, tainted?L_TAINTED:L_CLEAN);
1.115 paf 206: }
1.174 paf 207: String::String(const String::C cstr, bool tainted): body(CORD_EMPTY) {
208: append_know_length(cstr.str, cstr.length, tainted?L_TAINTED:L_CLEAN);
1.5 paf 209: }
1.28 paf 210:
1.174 paf 211: String& String::append_know_length(const char* str, size_t known_length, Language lang) {
212: if(!known_length)
1.9 paf 213: return *this;
1.122 paf 214:
1.176 paf 215: // first: langs
216: langs.append(body, lang, known_length);
217: // next: letters themselves
1.174 paf 218: body.append_know_length(str, known_length);
1.1 paf 219:
1.174 paf 220: ASSERT_STRING_INVARIANT(*this);
1.1 paf 221: return *this;
222: }
1.174 paf 223: String& String::append_help_length(const char* str, size_t helper_length, Language lang) {
224: if(!str)
225: return *this;
226: size_t known_length=helper_length?helper_length:strlen(str);
227: if(!known_length)
228: return *this;
1.1 paf 229:
1.174 paf 230: return append_know_length(str, known_length, lang);
1.5 paf 231: }
1.174 paf 232: String& String::append_strdup(const char* str, size_t helper_length, Language lang) {
233: size_t known_length=helper_length?helper_length:strlen(str);
234: if(!known_length)
235: return *this;
1.5 paf 236:
1.176 paf 237: // first: langs
238: langs.append(body, lang, known_length);
239: // next: letters themselves
1.174 paf 240: body.append_strdup_know_length(str, known_length);
1.33 paf 241:
1.174 paf 242: ASSERT_STRING_INVARIANT(*this);
243: return *this;
1.5 paf 244: }
1.46 paf 245:
1.174 paf 246: /// @todo check in doc: whether it documents NOW bad situation "abc".mid(-1, 3) =were?="ab"
247: String& String::mid(size_t substr_begin, size_t substr_end) const {
248: String& result=*new String;
249:
250: size_t self_length=length();
251: substr_begin=min(substr_begin, self_length);
252: substr_end=min(max(substr_end, substr_begin), self_length);
1.176 paf 253: size_t substr_length=substr_end-substr_begin;
254: if(!substr_length)
1.107 parser 255: return result;
1.53 paf 256:
1.176 paf 257: // first: their langs
258: result.langs.append(result.body, langs, substr_begin, substr_length);
259: // next: letters themselves
260: result.body=body.mid(substr_begin, substr_length);
1.174 paf 261:
262: // SAPI::log("piece of '%s' from %d to %d is '%s'",
263: //cstr(), substr_begin, substr_end, result.cstr());
264: ASSERT_STRING_INVARIANT(result);
1.53 paf 265: return result;
1.54 paf 266: }
267:
1.176 paf 268: size_t String::pos(const String::Body substr, size_t this_offset, Language lang) const {
1.183 paf 269: size_t substr_length=substr.length();
270: while(true) {
271: size_t substr_begin=body.pos(substr, this_offset);
272:
273: if(substr_begin==CORD_NOT_FOUND)
274: return STRING_NOT_FOUND;
1.174 paf 275:
1.183 paf 276: if(langs.check_lang(lang, substr_begin, substr_length))
277: return substr_begin;
278:
279: this_offset=substr_begin+substr_length;
280: }
1.58 paf 281: }
282:
1.174 paf 283: size_t String::pos(const String& substr,
284: size_t this_offset, Language lang) const {
285: return pos(substr.body, this_offset, lang);
1.60 paf 286: }
287:
1.174 paf 288: void String::split(ArrayString& result,
289: size_t& pos_after,
290: const char* delim,
291: Language lang, int limit) const {
292: size_t self_length=length();
293: if(size_t delim_length=strlen(delim)) {
1.186 paf 294: size_t pos_before;
1.60 paf 295: // while we have 'delim'...
1.174 paf 296: for(; (pos_before=pos(delim, pos_after, lang))!=STRING_NOT_FOUND && limit; limit--) {
1.69 paf 297: result+=&mid(pos_after, pos_before);
1.174 paf 298: pos_after=pos_before+delim_length;
1.60 paf 299: }
300: // last piece
1.174 paf 301: if(pos_after<self_length && limit) {
302: result+=&mid(pos_after, self_length);
303: pos_after=self_length;
1.60 paf 304: }
305: } else { // empty delim
306: result+=this;
1.174 paf 307: pos_after+=self_length;
1.60 paf 308: }
309: }
310:
1.174 paf 311: void String::split(ArrayString& result,
312: size_t& pos_after,
313: const String& delim, Language lang,
314: int limit) const {
1.140 paf 315: if(!delim.is_empty()) {
1.186 paf 316: size_t pos_before;
1.60 paf 317: // while we have 'delim'...
1.174 paf 318: for(; (pos_before=pos(delim, pos_after, lang))!=STRING_NOT_FOUND && limit; limit--) {
1.69 paf 319: result+=&mid(pos_after, pos_before);
1.174 paf 320: pos_after=pos_before+delim.length();
1.60 paf 321: }
322: // last piece
1.174 paf 323: if(pos_after<length() && limit) {
324: result+=&mid(pos_after, length());
325: pos_after=length();
1.60 paf 326: }
327: } else { // empty delim
328: result+=this;
1.174 paf 329: pos_after+=length();
1.60 paf 330: }
1.61 paf 331: }
332:
1.174 paf 333: static void regex_options(const String* options, int *result, bool& need_pre_post_match){
1.63 paf 334: struct Regex_option {
1.174 paf 335: const char* keyL;
336: const char* keyU;
1.63 paf 337: int clear, set;
338: int *result;
1.154 paf 339: bool *flag;
1.63 paf 340: } regex_option[]={
1.189 paf 341: {"i", "I", 0, PCRE_CASELESS, result, 0}, // a=A
342: {"s", "S", 0, PCRE_DOTALL, result, 0}, // \n\n$ [default]
343: {"x", "U", 0, PCRE_EXTENDED, result, 0}, // whitespace in regex ignored
344: {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result, 0}, // ^aaa\n$^bbb\n$
345: {"g", "G", 0, 1, result+1, 0}, // many rows
1.154 paf 346: {"'", 0, 0, 0, 0, &need_pre_post_match},
1.189 paf 347: {0, 0, 0, 0, 0, 0}
1.63 paf 348: };
1.171 paf 349: result[0]=PCRE_EXTRA | PCRE_DOTALL | PCRE_DOLLAR_ENDONLY;
1.63 paf 350: result[1]=0;
351:
1.174 paf 352: if(options && !options->is_empty())
1.153 paf 353: for(Regex_option *o=regex_option; o->keyL; o++)
1.174 paf 354: if(options->pos(o->keyL)!=STRING_NOT_FOUND
355: || (o->keyU && options->pos(o->keyU)!=STRING_NOT_FOUND)) {
1.154 paf 356: if(o->flag)
357: *o->flag=true;
358: else { // result
359: *o->result &= ~o->clear;
360: *o->result |= o->set;
361: }
1.63 paf 362: }
363: }
364:
1.174 paf 365: Table* String::match(Charset& source_charset,
366: const String& regexp,
367: const String* options,
368: Row_action row_action, void *info,
369: bool& just_matched) const {
1.140 paf 370: if(regexp.is_empty())
1.149 paf 371: throw Exception(0,
1.174 paf 372: 0,
1.73 paf 373: "regexp is empty");
1.154 paf 374:
1.174 paf 375: const char* pattern=regexp.cstr();
376: const char* errptr;
1.62 paf 377: int erroffset;
1.173 paf 378: bool need_pre_post_match=false;
1.174 paf 379: int option_bits[2]={0}; regex_options(options, option_bits, need_pre_post_match);
380: bool global=option_bits[1]!=0;
1.63 paf 381: pcre *code=pcre_compile(pattern, option_bits[0],
1.62 paf 382: &errptr, &erroffset,
1.174 paf 383: source_charset.pcre_tables);
1.62 paf 384:
1.67 paf 385: if(!code)
1.149 paf 386: throw Exception(0,
1.174 paf 387: ®exp.mid(erroffset, regexp.length()),
1.74 paf 388: "regular expression syntax error - %s", errptr);
1.62 paf 389:
1.174 paf 390: int subpatterns=pcre_info(code, 0, 0);
391: if(subpatterns<0) {
1.100 parser 392: pcre_free(code);
1.149 paf 393: throw Exception(0,
1.174 paf 394: ®exp,
1.76 paf 395: "pcre_info error (%d)",
1.174 paf 396: subpatterns);
1.63 paf 397: }
398:
1.174 paf 399: const char* subject=cstr();
400: size_t subject_length=strlen(subject);
401: const int oveclength=(1/*match*/+MAX_MATCH_GROUPS)*3;
402: int ovector[oveclength];
1.155 paf 403:
404: // create table
1.173 paf 405: Table::Action_options table_options;
1.174 paf 406: Table& table=*new Table(string_match_table_template, table_options);
1.63 paf 407:
1.64 paf 408: int exec_option_bits=0;
1.154 paf 409: int prestart=0;
410: int poststart=0;
1.174 paf 411: int postfinish=length();
1.63 paf 412: while(true) {
413: int exec_substrings=pcre_exec(code, 0,
1.174 paf 414: subject, subject_length, prestart,
415: exec_option_bits, ovector, oveclength);
1.63 paf 416:
417: if(exec_substrings==PCRE_ERROR_NOMATCH) {
1.100 parser 418: pcre_free(code);
1.174 paf 419: row_action(table, 0/*last time, no raw*/, 0, 0, poststart, postfinish, info);
420: if(global || subpatterns)
421: return &table; // global or with subpatterns=true+result
422: else {
423: just_matched=false; return 0; // not global=no result
424: }
1.63 paf 425: }
426:
427: if(exec_substrings<0) {
1.100 parser 428: pcre_free(code);
1.149 paf 429: throw Exception(0,
1.174 paf 430: ®exp,
1.76 paf 431: "regular expression execute error (%d)",
1.63 paf 432: exec_substrings);
433: }
434:
1.154 paf 435: int prefinish=ovector[0];
436: poststart=ovector[1];
1.174 paf 437: ArrayString* row=new ArrayString;
438: if(need_pre_post_match) {
439: *row+=&mid(0, prefinish); // .prematch column value
440: *row+=&mid(prefinish, poststart); // .match
441: *row+=&mid(poststart, postfinish); // .postmatch
442: } else {
1.185 paf 443: *row+=&Empty; // .prematch column value
444: *row+=&Empty; // .match
445: *row+=&Empty; // .postmatch
1.174 paf 446: }
1.63 paf 447:
448: for(int i=1; i<exec_substrings; i++) {
1.69 paf 449: // -1:-1 case handled peacefully by mid() itself
1.174 paf 450: *row+=&mid(ovector[i*2+0], ovector[i*2+1]); // .i column value
1.63 paf 451: }
452:
1.174 paf 453: row_action(table, row, prestart, prefinish, poststart, postfinish, info);
1.63 paf 454:
1.174 paf 455: if(!global || prestart==poststart) { // not global | going to hang
1.100 parser 456: pcre_free(code);
1.174 paf 457: row_action(table, 0/*last time, no row*/, 0, 0, poststart, postfinish, info);
458: return &table;
1.63 paf 459: }
1.154 paf 460: prestart=poststart;
1.63 paf 461:
462: /*
463: if(option_bits[0] & PCRE_MULTILINE)
1.64 paf 464: exec_option_bits|=PCRE_NOTBOL; // start of subject+startoffset not BOL
1.63 paf 465: */
466: }
1.82 parser 467: }
468:
1.174 paf 469: String& String::change_case(Charset& source_charset, Change_case_kind kind) const {
470: String& result=*new String();
471: if(is_empty())
472: return result;
473:
474: char* new_cstr=cstrm();
1.192 paf 475: size_t new_cstr_len=length();
1.181 paf 476: if(source_charset.isUTF8()) {
477: switch(kind) {
478: case CC_UPPER:
1.192 paf 479: change_case_UTF8((const XMLByte*)new_cstr, new_cstr_len, (XMLByte*)new_cstr, new_cstr_len, UTF8CaseToUpper);
1.181 paf 480: break;
481: case CC_LOWER:
1.192 paf 482: change_case_UTF8((const XMLByte*)new_cstr, new_cstr_len, (XMLByte*)new_cstr, new_cstr_len, UTF8CaseToLower);
1.181 paf 483: break;
484: default:
485: assert(!"unknown change case kind");
486: break; // never
487: }
488:
489: } else {
490: const unsigned char *tables=source_charset.pcre_tables;
1.82 parser 491:
1.181 paf 492: const unsigned char *a;
493: const unsigned char *b;
494: switch(kind) {
495: case CC_UPPER:
496: a=tables+lcc_offset;
497: b=tables+fcc_offset;
498: break;
499: case CC_LOWER:
500: a=tables+lcc_offset;
501: b=0;
502: break;
503: default:
504: assert(!"unknown change case kind");
505: a=b=0; // calm, compiler
506: break; // never
507: }
508:
1.192 paf 509: char *dest=new_cstr;
1.181 paf 510: unsigned char index;
1.190 paf 511: for(const char* current=new_cstr; (index=(unsigned char)*current); current++) {
1.181 paf 512: unsigned char c=a[index];
513: if(b)
514: c=b[c];
515:
516: *dest++=(char)c;
517: }
1.174 paf 518: }
1.176 paf 519: result.langs=langs;
1.174 paf 520: result.body=new_cstr;
1.89 parser 521:
1.101 parser 522: return result;
523: }
524:
1.174 paf 525: const String& String::replace(const Dictionary& dict) const {
526: String& result=*new String();
527: const char* old_cstr=cstr();
528: const char* prematch_begin=old_cstr;
529:
530: const char* current=old_cstr;
531: while(*current) {
1.184 paf 532: if(Dictionary::Subst subst=dict.first_that_begins(current)) {
1.174 paf 533: // prematch
534: if(size_t prematch_length=current-prematch_begin) {
1.179 paf 535: result.langs.append(result.body, langs, prematch_begin-old_cstr, prematch_length);
1.174 paf 536: result.body.append_strdup_know_length(prematch_begin, prematch_length);
1.101 parser 537: }
538:
1.174 paf 539: // match
540: // skip 'a' in 'current'; move prematch_begin
1.184 paf 541: current+=subst.from_length; prematch_begin=current;
1.174 paf 542:
1.184 paf 543: if(const String* b=subst.to) // are there any b?
1.174 paf 544: result<<*b;
545: } else // simply advance
546: current++;
547: }
1.156 paf 548:
1.174 paf 549: // postmatch
550: if(size_t postmatch_length=current-prematch_begin) {
1.179 paf 551: result.langs.append(result.body, langs, prematch_begin-old_cstr, postmatch_length);
1.174 paf 552: result.body.append_strdup_know_length(prematch_begin, postmatch_length);
553: }
1.156 paf 554:
1.174 paf 555: ASSERT_STRING_INVARIANT(result);
1.82 parser 556: return result;
1.61 paf 557: }
1.113 parser 558:
1.180 paf 559: static int serialize_body_char(char c, char** cur) {
560: *((*cur)++)=c;
561: return 0; // 0=continue
562: };
1.174 paf 563: static int serialize_body_piece(const char* s, char** cur) {
564: size_t length=strlen(s);
565: memcpy(*cur, s, length); *cur+=length;
1.178 paf 566: return 0; // 0=continue
1.174 paf 567: };
1.178 paf 568: static int serialize_lang_piece(char alang, size_t asize, char** cur) {
569: // lang
1.191 paf 570: **cur=alang; (*cur)++;
571: // length [WARNING: not cast, addresses must be %4=0 on sparc]
1.178 paf 572: memcpy(*cur, &asize, sizeof(asize)); *cur+=sizeof(asize);
573:
574: return 0; // 0=continue
575: }
1.174 paf 576: String::Cm String::serialize(size_t prolog_length) const {
1.178 paf 577: size_t fragments_count=langs.count();
1.174 paf 578: size_t buf_length=
1.178 paf 579: prolog_length //1
580: +sizeof(size_t) //2
581: +fragments_count*(sizeof(char)+sizeof(size_t)) //3
582: +body.length() //4
583: +1; // for zero terminator used in deserialize
1.174 paf 584: String::Cm result(new(PointerFreeGC) char[buf_length], buf_length);
585:
586: // 1: prolog
587: char *cur=result.str+prolog_length;
1.191 paf 588: // 2: langs.count [WARNING: not cast, addresses must be %4=0 on sparc]
1.174 paf 589: memcpy(cur, &fragments_count, sizeof(fragments_count)); cur+=sizeof(fragments_count);
590: // 3: lang info
1.178 paf 591: langs.for_each(body, serialize_lang_piece, &cur);
1.174 paf 592: // 4: letters
1.180 paf 593: body.for_each(serialize_body_char, serialize_body_piece, &cur);
1.182 paf 594: // 5: zero terminator
595: *cur=0;
1.113 parser 596:
1.174 paf 597: return result;
1.113 parser 598: }
1.174 paf 599: bool String::deserialize(size_t prolog_length, void *buf, size_t buf_length) {
600: if(buf_length<=prolog_length)
1.148 paf 601: return false;
1.174 paf 602: buf_length-=prolog_length;
1.178 paf 603: buf_length-=1; // 5: zero terminator
1.135 paf 604:
1.174 paf 605: // 1: prolog
606: const char* cur=(const char* )buf+prolog_length;
1.113 parser 607:
1.176 paf 608: // 2: langs.count
1.191 paf 609: size_t fragments_count;
610: if(buf_length<sizeof(fragments_count)) // langs.count don't fit?
1.174 paf 611: return false;
1.191 paf 612: // [WARNING: not cast, addresses must be %4=0 on sparc]
613: memcpy(&fragments_count, cur, sizeof(fragments_count)); cur+=sizeof(fragments_count);
614: buf_length-=sizeof(fragments_count);
1.174 paf 615:
616: if(fragments_count) {
617: // 3: lang info
618: size_t total_length=0;
619: for(size_t f=0; f<fragments_count; f++) {
1.191 paf 620: char lang;
621: size_t fragment_length;
622: size_t piece_length=sizeof(lang)+sizeof(fragment_length);
1.174 paf 623: if(buf_length<piece_length) // lang+length
624: return false;
625:
1.191 paf 626: // lang
627: lang=*cur++;
628: // length [WARNING: not cast, addresses must be %4=0 on sparc]
629: memcpy(&fragment_length, cur, sizeof(fragment_length)); cur+=sizeof(fragment_length);
630:
631: // uchar needed to prevent propagating 0x80 bit to upper bytes
632: langs.append(total_length, (String::Language)(uchar)lang, fragment_length);
1.174 paf 633: total_length+=fragment_length;
1.148 paf 634:
1.174 paf 635: buf_length-=piece_length;
636: }
1.128 paf 637:
1.174 paf 638: // 4: letters
639: if(buf_length!=total_length)
1.148 paf 640: return false;
641:
1.178 paf 642: // serialize wrote extra zero byte there, we can rely on that
1.176 paf 643: body=String::Body(cur, buf_length);
1.174 paf 644: }
1.113 parser 645:
1.174 paf 646: ASSERT_STRING_INVARIANT(*this);
1.148 paf 647: return true;
1.176 paf 648: }
649:
650: const char* String::Body::v() const {
651: return CORD_to_const_char_star(body);
652: }
653: const char* String::Languages::v() const {
1.177 paf 654: if(opt.is_not_just_lang)
1.176 paf 655: return CORD_to_const_char_star(langs);
656: else
657: return (const char*)&langs;
658: }
659: const char* String::v() const {
1.195 ! paf 660: const int LIMIT_VIEW=20;
1.176 paf 661: char* buf=(char*)malloc(MAX_STRING);
662: const char*body_view=body.v();
663: const char*langs_view=langs.v();
664: snprintf(buf, MAX_STRING,
1.178 paf 665: "%d:%.*s%s} "
1.176 paf 666: "{%d:%s",
1.178 paf 667: langs.count(), LIMIT_VIEW, langs_view, strlen(langs_view)>LIMIT_VIEW?"...":"",
1.176 paf 668: strlen(body_view), body_view
669: );
670:
671: return buf;
1.113 parser 672: }
1.195 ! paf 673:
! 674: const String& String::trim(String::Trim_kind kind, const char* chars) const {
! 675: if(!length())
! 676: return *this;
! 677:
! 678: size_t substr_begin, substr_length;
! 679: Body new_body=body.trim(kind, chars, &substr_begin, &substr_length);
! 680: if(new_body==body) // we received unchanged pointer, do likewise
! 681: return *this;
! 682: // new_body differs from body, adjust langs along
! 683:
! 684: String& result=*new String;
! 685: if(!new_body) // body.trim produced empty result
! 686: return result;
! 687: // body.trim produced nonempty result
! 688:
! 689: // first: their langs
! 690: result.langs.append(result.body, langs, substr_begin, substr_length);
! 691: // next: letters themselves
! 692: result.body=new_body;
! 693:
! 694: ASSERT_STRING_INVARIANT(result);
! 695: return result;
! 696: }
E-mail: