--- parser3/src/main/pa_string.C 2001/04/03 05:23:41 1.60 +++ parser3/src/main/pa_string.C 2001/04/23 13:38:31 1.78 @@ -5,11 +5,13 @@ Author: Alexander Petrosyan (http://design.ru/paf) - $Id: pa_string.C,v 1.60 2001/04/03 05:23:41 paf Exp $ + $Id: pa_string.C,v 1.78 2001/04/23 13:38:31 paf Exp $ */ #include "pa_config_includes.h" +#include "pcre.h" + #include "pa_pool.h" #include "pa_string.h" #include "pa_hash.h" @@ -17,12 +19,14 @@ #include "pa_common.h" #include "pa_array.h" #include "pa_globals.h" +#include "pa_table.h" +#include "pa_threads.h" //#include "pa_sapi.h" // String -String::String(Pool& apool, const char *src, bool tasize_ted) : +String::String(Pool& apool, const char *src, size_t src_size, bool tainted) : Pooled(apool) { last_chunk=&head; head.count=CR_PREALLOCATED_COUNT; @@ -32,10 +36,10 @@ String::String(Pool& apool, const char * fused_rows=fsize=0; if(src) - if(tasize_ted) - APPEND_TAINTED(src, 0, 0, 0); + if(tainted) + APPEND_TAINTED(src, src_size, 0, 0); else - APPEND_CONST(src); + APPEND_CLEAN(src, src_size, 0, 0); } void String::expand() { @@ -193,7 +197,7 @@ int String::cmp(int& partial, const Stri break; if(pos+a_row->item.size > this_offset) { - if(lang!=UL_UNKNOWN && a_row->item.lang!=lang) + if(lang!=UL_UNSPECIFIED && a_row->item.lang!=lang) return -1; // wrong lang -- bail out int size_diff= @@ -273,7 +277,7 @@ int String::cmp(int& partial, const char break; if(pos+a_row->item.size > this_offset) { - if(lang!=UL_UNKNOWN && a_row->item.lang!=lang) + if(lang!=UL_UNSPECIFIED && a_row->item.lang!=lang) return -1; // wrong lang -- bail out int size_diff= @@ -336,7 +340,7 @@ const Origin& String::origin() const { } #endif -String& String::piece(size_t start, size_t finish) const { +String& String::mid(size_t start, size_t finish) const { start=max(0, start); finish=min(size(), finish); if(start==finish) @@ -409,12 +413,12 @@ void String::split(Array& result, int pos_before; // while we have 'delim'... for(; (pos_before=pos(delim, delim_size, pos_after, lang))>=0 && limit; limit--) { - result+=&piece(pos_after, pos_before); + result+=&mid(pos_after, pos_before); pos_after=pos_before+delim_size; } // last piece if(pos_after=0 && limit; limit--) { - result+=&piece(pos_after, pos_before); + result+=&mid(pos_after, pos_before); pos_after=pos_before+delim.size(); } // last piece if(pos_afterkey; o++) + if( + strchr(options, o->key) || + strchr(options, toupper(o->key))) { + *(o->result)&=~o->clear; + *(o->result)|=o->set; + } +} + +/** + returns true if fills table. + table format is defined and fixed[can be used by others]: + @verbatim + pre-match/match/post-match/1/2/3/... + @endverbatim +*/ +bool String::match(const unsigned char *pcre_tables, + const String *aorigin, + const String& regexp, + const String *options, + Table **table, + Row_action row_action, void *info) const { + + if(!regexp.size()) + THROW(0, 0, + aorigin, + "regexp is empty"); + const char *pattern=regexp.cstr(UL_AS_IS); + const char *errptr; + int erroffset; + int option_bits[2]; regex_options(options?options->cstr():0, option_bits); + pcre *code=pcre_compile(pattern, option_bits[0], + &errptr, &erroffset, + pcre_tables); + + if(!code) + THROW(0, 0, + ®exp.mid(erroffset, regexp.size()), + "regular expression syntax error - %s", errptr); + + int info_substrings=pcre_info(code, 0, 0); + if(info_substrings<0) { + (*pcre_free)(code); + THROW(0, 0, + aorigin, + "pcre_info error (%d)", + info_substrings); + } + + int startoffset=0; + const char *subject=cstr(UL_AS_IS); + int length=strlen(subject); + int ovecsize; + int *ovector=(int *)malloc(sizeof(int)* + (ovecsize=(1/*match*/+info_substrings)*3)); + + { // create table + Array& columns=*NEW Array(pool()); + columns+=string_pre_match_name; + columns+=string_match_name; + columns+=string_post_match_name; + for(int i=1; i<=info_substrings; i++) { + char *column=(char *)malloc(MAX_NUMBER); + snprintf(column, MAX_NUMBER, "%d", i); + columns+=NEW String(pool(), column); // .i column name + } + *table=NEW Table(pool(), aorigin, &columns); + } + + int exec_option_bits=0; + while(true) { + int exec_substrings=pcre_exec(code, 0, + subject, length, startoffset, + exec_option_bits, ovector, ovecsize); + + if(exec_substrings==PCRE_ERROR_NOMATCH) { + (*pcre_free)(code); + (*row_action)(**table, 0/*last time, no row*/, 0, 0, info); + return option_bits[1]!=0; // global=true+table, not global=false + } + + if(exec_substrings<0) { + (*pcre_free)(code); + THROW(0, 0, + aorigin, + "regular expression execute error (%d)", + exec_substrings); + } + + Array& row=*NEW Array(pool()); + row+=&mid(0, ovector[0]); // .pre-match column value + row+=&mid(ovector[0], ovector[1]); // .match + row+=&mid(ovector[1], size()); // .post-match + + for(int i=1; i