--- parser3/src/main/pa_string.C 2001/04/03 09:58:10 1.62 +++ parser3/src/main/pa_string.C 2001/05/21 16:38:46 1.89 @@ -5,12 +5,13 @@ Author: Alexander Petrosyan (http://design.ru/paf) - $Id: pa_string.C,v 1.62 2001/04/03 09:58:10 paf Exp $ + $Id: pa_string.C,v 1.89 2001/05/21 16:38:46 parser Exp $ */ #include "pa_config_includes.h" -#include +#include "pcre.h" +#include "internal.h" #include "pa_pool.h" #include "pa_string.h" @@ -20,19 +21,8 @@ #include "pa_array.h" #include "pa_globals.h" #include "pa_table.h" -#include "pa_threads.h" - -#include "pcre.h" - -//#include "pa_sapi.h" - -// consts -const int MAX_MATCH_COLUMNS=20; - -// String - -String::String(Pool& apool, const char *src, bool tasize_ted) : +String::String(Pool& apool, const char *src, size_t src_size, bool tainted) : Pooled(apool) { last_chunk=&head; head.count=CR_PREALLOCATED_COUNT; @@ -42,14 +32,14 @@ String::String(Pool& apool, const char * fused_rows=fsize=0; if(src) - if(tasize_ted) - APPEND_TAINTED(src, 0, 0, 0); + if(tainted) + APPEND_TAINTED(src, src_size, 0, 0); else - APPEND_CONST(src); + APPEND_CLEAN(src, src_size, 0, 0); } void String::expand() { - size_t new_chunk_count=last_chunk->count+last_chunk->count*CR_GROW_PERCENT/100; + size_t new_chunk_count=last_chunk->count+CR_GROW_COUNT; last_chunk=static_cast( malloc(sizeof(size_t)+sizeof(Chunk::Row)*new_chunk_count+sizeof(Chunk *))); last_chunk->count=new_chunk_count; @@ -192,18 +182,14 @@ int String::cmp(int& partial, const Stri Chunk::Row *b_end=src.append_here; size_t a_countdown=a_chunk->count; size_t b_countdown=b_chunk->count; - bool a_break=false; - bool b_break=false; size_t result; size_t pos=0; - while(true) { - a_break=a_row==a_end; - b_break=b_row==b_end; - if(a_break || b_break) - break; + bool a_break=size()==0; + bool b_break=size()==0; + if(!(a_break || b_break)) while(true) { if(pos+a_row->item.size > this_offset) { - if(lang!=UL_UNKNOWN && a_row->item.lang!=lang) + if(lang!=UL_UNSPECIFIED && a_row->item.lang!=lang) return -1; // wrong lang -- bail out int size_diff= @@ -234,7 +220,10 @@ int String::cmp(int& partial, const Stri pos+=a_row->item.size; a_row++; a_countdown--; a_offset=0; } - + if(b_break=b_row==b_end) { + a_break=a_row==a_end; + break; + } if(!b_countdown) { b_chunk=b_row->link; b_row=b_chunk->rows; @@ -246,6 +235,10 @@ int String::cmp(int& partial, const Stri a_row++; a_countdown--; } + if(a_break=a_row==a_end) { + b_break=b_row==b_end; + break; + } if(!a_countdown) { a_chunk=a_row->link; a_row=a_chunk->rows; @@ -274,16 +267,13 @@ int String::cmp(int& partial, const char size_t b_offset=0; Chunk::Row *a_end=append_here; size_t a_countdown=a_chunk->count; - bool a_break=false; - bool b_break=false; size_t pos=0; - while(true) { - a_break=a_row==a_end; - if(a_break || b_break) - break; + bool a_break=size()==0; + bool b_break=b_size==0; + if(!(a_break || b_break)) while(true) { if(pos+a_row->item.size > this_offset) { - if(lang!=UL_UNKNOWN && a_row->item.lang!=lang) + if(lang!=UL_UNSPECIFIED && a_row->item.lang!=lang) return -1; // wrong lang -- bail out int size_diff= @@ -317,6 +307,9 @@ int String::cmp(int& partial, const char a_row++; a_countdown--; } + a_break=a_row==a_end; + if(a_break || b_break) + break; if(!a_countdown) { a_chunk=a_row->link; a_row=a_chunk->rows; @@ -342,11 +335,15 @@ const Origin& String::origin() const { // determining origin by last appended piece // because first one frequently constant. // ex: ^load[/file] "document_root" + "/file" - return append_here[-1].item.origin; + // when last peice is constant, + // ex: parser_root_auto_path{dynamic} / auto.p{const} + // using first piece + Origin& last_origin=append_here[-1].item.origin; + return last_origin.file ? last_origin : head.rows[0].item.origin; } #endif -String& String::piece(size_t start, size_t finish) const { +String& String::mid(size_t start, size_t finish) const { start=max(0, start); finish=min(size(), finish); if(start==finish) @@ -419,12 +416,12 @@ void String::split(Array& result, int pos_before; // while we have 'delim'... for(; (pos_before=pos(delim, delim_size, pos_after, lang))>=0 && limit; limit--) { - result+=&piece(pos_after, pos_before); + result+=&mid(pos_after, pos_before); pos_after=pos_before+delim_size; } // last piece if(pos_after=0 && limit; limit--) { - result+=&piece(pos_after, pos_before); + result+=&mid(pos_after, pos_before); pos_after=pos_before+delim.size(); } // last piece if(pos_afterkey; o++) + if( + strchr(options, o->key) || + strchr(options, toupper(o->key))) { + *(o->result)&=~o->clear; + *(o->result)|=o->set; + } +} + +/// @todo maybe need speedup: some option to remove pre/match/post string generation +bool String::match(const unsigned char *pcre_tables, + const String *aorigin, const String& regexp, - const String& options, - Table **table) const { - SYNCHRONIZED(true); - static const unsigned char *tables=0; - if(!tables) { - setlocale(LC_CTYPE, "ru"); - tables = pcre_maketables(); - } - const char *pattern=regexp.cstr(); + const String *options, + Table **table, + Row_action row_action, void *info) const { + + if(!regexp.size()) + THROW(0, 0, + aorigin, + "regexp is empty"); + const char *pattern=regexp.cstr(UL_AS_IS); const char *errptr; int erroffset; - pcre *code=pcre_compile(pattern, 0, + int option_bits[2]; regex_options(options?options->cstr():0, option_bits); + pcre *code=pcre_compile(pattern, option_bits[0], &errptr, &erroffset, - tables); + pcre_tables); if(!code) THROW(0, 0, - ®exp.piece(erroffset, regexp.size()), - errptr); + ®exp.mid(erroffset, regexp.size()), + "regular expression syntax error - %s", errptr); - int ovecsize; - int *ovector=(int *)malloc(sizeof(int)*(ovecsize=(1/*.match*/+MAX_MATCH_COLUMNS)*3)); - const char *subject=cstr(); + int info_substrings=pcre_info(code, 0, 0); + if(info_substrings<0) { + (*pcre_free)(code); + THROW(0, 0, + aorigin, + "pcre_info error (%d)", + info_substrings); + } + + int startoffset=0; + const char *subject=cstr(UL_AS_IS); int length=strlen(subject); - int exec_result=pcre_exec(code, 0, - subject, length, 0/*startoffset*/, - 0/*options*/, ovector, ovecsize); + int ovecsize; + int *ovector=(int *)malloc(sizeof(int)* + (ovecsize=(1/*match*/+info_substrings)*3)); - if(exec_result==PCRE_ERROR_NOMATCH) { - *table=0; - return false; + { // create table + Array& columns=*NEW Array(pool()); + columns+=string_pre_match_name; + columns+=string_match_name; + columns+=string_post_match_name; + for(int i=1; i<=info_substrings; i++) { + char *column=(char *)malloc(MAX_NUMBER); + snprintf(column, MAX_NUMBER, "%d", i); + columns+=NEW String(pool(), column); // .i column name + } + *table=NEW Table(pool(), aorigin, &columns); } - if(exec_result<0) + int exec_option_bits=0; + while(true) { + int exec_substrings=pcre_exec(code, 0, + subject, length, startoffset, + exec_option_bits, ovector, ovecsize); + + if(exec_substrings==PCRE_ERROR_NOMATCH) { + (*pcre_free)(code); + (*row_action)(**table, 0/*last time, no row*/, 0, 0, info); + return option_bits[1]!=0; // global=true+table, not global=false + } + + if(exec_substrings<0) { + (*pcre_free)(code); + THROW(0, 0, + aorigin, + "regular expression execute error (%d)", + exec_substrings); + } + + Array& row=*NEW Array(pool()); + row+=&mid(0, ovector[0]); // .prematch column value + row+=&mid(ovector[0], ovector[1]); // .match + row+=&mid(ovector[1], size()); // .postmatch + + for(int i=1; i(kind)); // never + a=b=0; // calm, compiler + break; // never + } + + const Chunk *chunk=&head; + do { + const Chunk::Row *row=chunk->rows; + for(size_t i=0; icount; i++, row++) { + if(row==append_here) + goto break2; + + char *new_cstr=(char *)pool.malloc(row->item.size); + char *dest=new_cstr; + const char *src=row->item.ptr; + for(int size=row->item.size; size--; src++) { + unsigned char c=a[(unsigned char)*src]; + if(b) + c=b[c]; + + *dest++=(char)c; + } + + result.APPEND(new_cstr, row->item.size, + row->item.lang, + row->item.origin.file, row->item.origin.line); + } + chunk=row->link; + } while(chunk); +break2: + + return result; +} + +double String::as_double() { + double result; + const char *cstr=this->cstr(); + char *error_pos=0; + // 0xABC + if(cstr[0]=='0' && (cstr[1]=='x' || cstr[1]=='X')) + result=(double)(unsigned long)strtol(cstr, &error_pos, 0); + else + result=strtod(cstr, &error_pos); + + if(error_pos && *error_pos) THROW(0, 0, - 0, - "pcre_exec failed"); + this, + "invalid number (double)"); - if(exec_result==0) + return result; +} +int String::as_int() { + int result; + const char *cstr=this->cstr(); + char *error_pos=0; + // 0xABC + if(cstr[0]=='0' && (cstr[1]=='x' || cstr[1]=='X')) + result=(int)(unsigned long)strtol(cstr, &error_pos, 0); + else + result=(int)strtol(cstr, &error_pos, 0); + + if(error_pos && *error_pos) THROW(0, 0, - aorigin, - "produced more substrings than maximum handled by Parser, which is %d", - MAX_MATCH_COLUMNS); + this, + "invalid number (int)"); - Array& columns=*NEW Array(pool()); - columns+=string_match_name; // .match column name - Array& row=*NEW Array(pool()); - row+=&piece(ovector[0], ovector[1]); // match column value - - for(int i=1; i