--- parser3/src/main/pa_string.C 2001/04/03 14:39:03 1.63 +++ parser3/src/main/pa_string.C 2001/05/23 08:57:39 1.91 @@ -5,12 +5,13 @@ Author: Alexander Petrosyan (http://design.ru/paf) - $Id: pa_string.C,v 1.63 2001/04/03 14:39:03 paf Exp $ + $Id: pa_string.C,v 1.91 2001/05/23 08:57:39 parser Exp $ */ #include "pa_config_includes.h" -#include +#include "pcre.h" +#include "internal.h" #include "pa_pool.h" #include "pa_string.h" @@ -20,15 +21,8 @@ #include "pa_array.h" #include "pa_globals.h" #include "pa_table.h" -#include "pa_threads.h" - -#include "pcre.h" - -//#include "pa_sapi.h" -// String - -String::String(Pool& apool, const char *src, bool tasize_ted) : +String::String(Pool& apool, const char *src, size_t src_size, bool tainted) : Pooled(apool) { last_chunk=&head; head.count=CR_PREALLOCATED_COUNT; @@ -38,14 +32,14 @@ String::String(Pool& apool, const char * fused_rows=fsize=0; if(src) - if(tasize_ted) - APPEND_TAINTED(src, 0, 0, 0); + if(tainted) + APPEND_TAINTED(src, src_size, 0, 0); else - APPEND_CONST(src); + APPEND_CLEAN(src, src_size, 0, 0); } void String::expand() { - size_t new_chunk_count=last_chunk->count+last_chunk->count*CR_GROW_PERCENT/100; + size_t new_chunk_count=last_chunk->count+CR_GROW_COUNT; last_chunk=static_cast( malloc(sizeof(size_t)+sizeof(Chunk::Row)*new_chunk_count+sizeof(Chunk *))); last_chunk->count=new_chunk_count; @@ -188,18 +182,14 @@ int String::cmp(int& partial, const Stri Chunk::Row *b_end=src.append_here; size_t a_countdown=a_chunk->count; size_t b_countdown=b_chunk->count; - bool a_break=false; - bool b_break=false; size_t result; size_t pos=0; - while(true) { - a_break=a_row==a_end; - b_break=b_row==b_end; - if(a_break || b_break) - break; + bool a_break=size()==0; + bool b_break=src.size()==0; + if(!(a_break || b_break)) while(true) { if(pos+a_row->item.size > this_offset) { - if(lang!=UL_UNKNOWN && a_row->item.lang!=lang) + if(lang!=UL_UNSPECIFIED && a_row->item.lang!=lang) return -1; // wrong lang -- bail out int size_diff= @@ -230,7 +220,10 @@ int String::cmp(int& partial, const Stri pos+=a_row->item.size; a_row++; a_countdown--; a_offset=0; } - + if(b_break=b_row==b_end) { + a_break=a_row==a_end; + break; + } if(!b_countdown) { b_chunk=b_row->link; b_row=b_chunk->rows; @@ -242,6 +235,10 @@ int String::cmp(int& partial, const Stri a_row++; a_countdown--; } + if(a_break=a_row==a_end) { + b_break=b_row==b_end; + break; + } if(!a_countdown) { a_chunk=a_row->link; a_row=a_chunk->rows; @@ -270,16 +267,13 @@ int String::cmp(int& partial, const char size_t b_offset=0; Chunk::Row *a_end=append_here; size_t a_countdown=a_chunk->count; - bool a_break=false; - bool b_break=false; size_t pos=0; - while(true) { - a_break=a_row==a_end; - if(a_break || b_break) - break; + bool a_break=size()==0; + bool b_break=b_size==0; + if(!(a_break || b_break)) while(true) { if(pos+a_row->item.size > this_offset) { - if(lang!=UL_UNKNOWN && a_row->item.lang!=lang) + if(lang!=UL_UNSPECIFIED && a_row->item.lang!=lang) return -1; // wrong lang -- bail out int size_diff= @@ -313,6 +307,9 @@ int String::cmp(int& partial, const char a_row++; a_countdown--; } + a_break=a_row==a_end; + if(a_break || b_break) + break; if(!a_countdown) { a_chunk=a_row->link; a_row=a_chunk->rows; @@ -338,11 +335,15 @@ const Origin& String::origin() const { // determining origin by last appended piece // because first one frequently constant. // ex: ^load[/file] "document_root" + "/file" - return append_here[-1].item.origin; + // when last peice is constant, + // ex: parser_root_auto_path{dynamic} / auto.p{const} + // using first piece + Origin& last_origin=append_here[-1].item.origin; + return last_origin.file ? last_origin : head.rows[0].item.origin; } #endif -String& String::piece(size_t start, size_t finish) const { +String& String::mid(size_t start, size_t finish) const { start=max(0, start); finish=min(size(), finish); if(start==finish) @@ -415,12 +416,12 @@ void String::split(Array& result, int pos_before; // while we have 'delim'... for(; (pos_before=pos(delim, delim_size, pos_after, lang))>=0 && limit; limit--) { - result+=&piece(pos_after, pos_before); + result+=&mid(pos_after, pos_before); pos_after=pos_before+delim_size; } // last piece if(pos_after=0 && limit; limit--) { - result+=&piece(pos_after, pos_before); + result+=&mid(pos_after, pos_before); pos_after=pos_before+delim.size(); } // last piece if(pos_aftercstr():0, option_bits); pcre *code=pcre_compile(pattern, option_bits[0], &errptr, &erroffset, - tables); + pcre_tables); - if(!code) { + if(!code) THROW(0, 0, - ®exp.piece(erroffset, regexp.size()), - errptr); - } + ®exp.mid(erroffset, regexp.size()), + "regular expression syntax error - %s", errptr); int info_substrings=pcre_info(code, 0, 0); if(info_substrings<0) { (*pcre_free)(code); THROW(0, 0, - aorigin, - "pcre_info error #%d", - info_substrings); + aorigin, + "pcre_info error (%d)", + info_substrings); } int startoffset=0; - const char *subject=cstr(); + const char *subject=cstr(UL_AS_IS); int length=strlen(subject); int ovecsize; int *ovector=(int *)malloc(sizeof(int)* - (ovecsize=(3/*pre/match/post*/+info_substrings)*3)); + (ovecsize=(1/*match*/+info_substrings)*3)); - // create table - Array& columns=*NEW Array(pool()); - columns+=string_pre_match_name; - columns+=string_match_name; - columns+=string_post_match_name; - for(int i=1; i<=info_substrings; i++) { - char *column=(char *)malloc(MAX_NUMBER); - snprintf(column, MAX_NUMBER, "%d", i); - columns+=NEW String(pool(), column); // .i column name + { // create table + Array& columns=*NEW Array(pool()); + columns+=string_pre_match_name; + columns+=string_match_name; + columns+=string_post_match_name; + for(int i=1; i<=info_substrings; i++) { + char *column=(char *)malloc(MAX_NUMBER); + snprintf(column, MAX_NUMBER, "%d", i); + columns+=NEW String(pool(), column); // .i column name + } + *table=NEW Table(pool(), aorigin, &columns); } - *table=NEW Table(pool(), aorigin, &columns); + int exec_option_bits=0; while(true) { int exec_substrings=pcre_exec(code, 0, subject, length, startoffset, - 0/*option_bits[0]*/, ovector, ovecsize); + exec_option_bits, ovector, ovecsize); if(exec_substrings==PCRE_ERROR_NOMATCH) { (*pcre_free)(code); + (*row_action)(**table, 0/*last time, no row*/, 0, 0, info); return option_bits[1]!=0; // global=true+table, not global=false } @@ -552,31 +555,119 @@ bool String::match(const String *aorigin (*pcre_free)(code); THROW(0, 0, aorigin, - "pcre_exec error #%d", + "regular expression execute error (%d)", exec_substrings); } Array& row=*NEW Array(pool()); - row+=&piece(0, ovector[0]); // pre-match - row+=&piece(ovector[0], ovector[1]); // match - row+=&piece(ovector[1], size()); // post-match + row+=&mid(0, ovector[0]); // .prematch column value + row+=&mid(ovector[0], ovector[1]); // .match + row+=&mid(ovector[1], size()); // .postmatch for(int i=1; i(kind)); // never + a=b=0; // calm, compiler + break; // never + } + + const Chunk *chunk=&head; + do { + const Chunk::Row *row=chunk->rows; + for(size_t i=0; icount; i++, row++) { + if(row==append_here) + goto break2; + + char *new_cstr=(char *)pool.malloc(row->item.size); + char *dest=new_cstr; + const char *src=row->item.ptr; + for(int size=row->item.size; size--; src++) { + unsigned char c=a[(unsigned char)*src]; + if(b) + c=b[c]; + + *dest++=(char)c; + } + + result.APPEND(new_cstr, row->item.size, + row->item.lang, + row->item.origin.file, row->item.origin.line); + } + chunk=row->link; + } while(chunk); +break2: + + return result; +} + +double String::as_double() const { + double result; + const char *cstr=this->cstr(); + char *error_pos=0; + // 0xABC + if(cstr[0]=='0' && (cstr[1]=='x' || cstr[1]=='X')) + result=(double)(unsigned long)strtol(cstr, &error_pos, 0); + else + result=strtod(cstr, &error_pos); + + if(error_pos && *error_pos) + THROW(0, 0, + this, + "invalid number (double)"); + + return result; +} +int String::as_int() const { + int result; + const char *cstr=this->cstr(); + char *error_pos=0; + // 0xABC + if(cstr[0]=='0' && (cstr[1]=='x' || cstr[1]=='X')) + result=(int)(unsigned long)strtol(cstr, &error_pos, 0); + else + result=(int)strtol(cstr, &error_pos, 0); + + if(error_pos && *error_pos) + THROW(0, 0, + this, + "invalid number (int)"); + + return result; +}