--- parser3/src/main/pa_string.C 2001/04/03 09:58:10 1.62 +++ parser3/src/main/pa_string.C 2001/04/23 13:38:31 1.78 @@ -5,12 +5,12 @@ Author: Alexander Petrosyan (http://design.ru/paf) - $Id: pa_string.C,v 1.62 2001/04/03 09:58:10 paf Exp $ + $Id: pa_string.C,v 1.78 2001/04/23 13:38:31 paf Exp $ */ #include "pa_config_includes.h" -#include +#include "pcre.h" #include "pa_pool.h" #include "pa_string.h" @@ -22,17 +22,11 @@ #include "pa_table.h" #include "pa_threads.h" -#include "pcre.h" - //#include "pa_sapi.h" -// consts - -const int MAX_MATCH_COLUMNS=20; - // String -String::String(Pool& apool, const char *src, bool tasize_ted) : +String::String(Pool& apool, const char *src, size_t src_size, bool tainted) : Pooled(apool) { last_chunk=&head; head.count=CR_PREALLOCATED_COUNT; @@ -42,10 +36,10 @@ String::String(Pool& apool, const char * fused_rows=fsize=0; if(src) - if(tasize_ted) - APPEND_TAINTED(src, 0, 0, 0); + if(tainted) + APPEND_TAINTED(src, src_size, 0, 0); else - APPEND_CONST(src); + APPEND_CLEAN(src, src_size, 0, 0); } void String::expand() { @@ -203,7 +197,7 @@ int String::cmp(int& partial, const Stri break; if(pos+a_row->item.size > this_offset) { - if(lang!=UL_UNKNOWN && a_row->item.lang!=lang) + if(lang!=UL_UNSPECIFIED && a_row->item.lang!=lang) return -1; // wrong lang -- bail out int size_diff= @@ -283,7 +277,7 @@ int String::cmp(int& partial, const char break; if(pos+a_row->item.size > this_offset) { - if(lang!=UL_UNKNOWN && a_row->item.lang!=lang) + if(lang!=UL_UNSPECIFIED && a_row->item.lang!=lang) return -1; // wrong lang -- bail out int size_diff= @@ -346,7 +340,7 @@ const Origin& String::origin() const { } #endif -String& String::piece(size_t start, size_t finish) const { +String& String::mid(size_t start, size_t finish) const { start=max(0, start); finish=min(size(), finish); if(start==finish) @@ -419,12 +413,12 @@ void String::split(Array& result, int pos_before; // while we have 'delim'... for(; (pos_before=pos(delim, delim_size, pos_after, lang))>=0 && limit; limit--) { - result+=&piece(pos_after, pos_before); + result+=&mid(pos_after, pos_before); pos_after=pos_before+delim_size; } // last piece if(pos_after=0 && limit; limit--) { - result+=&piece(pos_after, pos_before); + result+=&mid(pos_after, pos_before); pos_after=pos_before+delim.size(); } // last piece if(pos_afterkey; o++) + if( + strchr(options, o->key) || + strchr(options, toupper(o->key))) { + *(o->result)&=~o->clear; + *(o->result)|=o->set; + } +} + +/** + returns true if fills table. + table format is defined and fixed[can be used by others]: + @verbatim + pre-match/match/post-match/1/2/3/... + @endverbatim +*/ +bool String::match(const unsigned char *pcre_tables, + const String *aorigin, const String& regexp, - const String& options, - Table **table) const { - SYNCHRONIZED(true); - static const unsigned char *tables=0; - if(!tables) { - setlocale(LC_CTYPE, "ru"); - tables = pcre_maketables(); - } - const char *pattern=regexp.cstr(); + const String *options, + Table **table, + Row_action row_action, void *info) const { + + if(!regexp.size()) + THROW(0, 0, + aorigin, + "regexp is empty"); + const char *pattern=regexp.cstr(UL_AS_IS); const char *errptr; int erroffset; - pcre *code=pcre_compile(pattern, 0, + int option_bits[2]; regex_options(options?options->cstr():0, option_bits); + pcre *code=pcre_compile(pattern, option_bits[0], &errptr, &erroffset, - tables); + pcre_tables); if(!code) THROW(0, 0, - ®exp.piece(erroffset, regexp.size()), - errptr); + ®exp.mid(erroffset, regexp.size()), + "regular expression syntax error - %s", errptr); - int ovecsize; - int *ovector=(int *)malloc(sizeof(int)*(ovecsize=(1/*.match*/+MAX_MATCH_COLUMNS)*3)); - const char *subject=cstr(); - int length=strlen(subject); - int exec_result=pcre_exec(code, 0, - subject, length, 0/*startoffset*/, - 0/*options*/, ovector, ovecsize); - - if(exec_result==PCRE_ERROR_NOMATCH) { - *table=0; - return false; + int info_substrings=pcre_info(code, 0, 0); + if(info_substrings<0) { + (*pcre_free)(code); + THROW(0, 0, + aorigin, + "pcre_info error (%d)", + info_substrings); } - if(exec_result<0) - THROW(0, 0, - 0, - "pcre_exec failed"); + int startoffset=0; + const char *subject=cstr(UL_AS_IS); + int length=strlen(subject); + int ovecsize; + int *ovector=(int *)malloc(sizeof(int)* + (ovecsize=(1/*match*/+info_substrings)*3)); - if(exec_result==0) - THROW(0, 0, - aorigin, - "produced more substrings than maximum handled by Parser, which is %d", - MAX_MATCH_COLUMNS); + { // create table + Array& columns=*NEW Array(pool()); + columns+=string_pre_match_name; + columns+=string_match_name; + columns+=string_post_match_name; + for(int i=1; i<=info_substrings; i++) { + char *column=(char *)malloc(MAX_NUMBER); + snprintf(column, MAX_NUMBER, "%d", i); + columns+=NEW String(pool(), column); // .i column name + } + *table=NEW Table(pool(), aorigin, &columns); + } - Array& columns=*NEW Array(pool()); - columns+=string_match_name; // .match column name - Array& row=*NEW Array(pool()); - row+=&piece(ovector[0], ovector[1]); // match column value - - for(int i=1; i