--- parser3/src/main/pa_string.C 2001/04/03 08:23:08 1.61 +++ parser3/src/main/pa_string.C 2001/04/03 14:39:03 1.63 @@ -5,11 +5,13 @@ Author: Alexander Petrosyan (http://design.ru/paf) - $Id: pa_string.C,v 1.61 2001/04/03 08:23:08 paf Exp $ + $Id: pa_string.C,v 1.63 2001/04/03 14:39:03 paf Exp $ */ #include "pa_config_includes.h" +#include + #include "pa_pool.h" #include "pa_string.h" #include "pa_hash.h" @@ -18,6 +20,9 @@ #include "pa_array.h" #include "pa_globals.h" #include "pa_table.h" +#include "pa_threads.h" + +#include "pcre.h" //#include "pa_sapi.h" @@ -453,9 +458,125 @@ void String::split(Array& result, } } -Table& String::match(const String *aorigin, - const String& regexp, const String& options) const { - Array *columns=0; - Table& result=*NEW Table(pool(), aorigin, columns); - return result; +/// @test really @b test: s x m [tested: i & g ] +static void regex_options(char *options, int *result){ + struct Regex_option { + char key; + int clear, set; + int *result; + } regex_option[]={ + {'i', 0, PCRE_CASELESS, result}, // a=A + {'s', 0, PCRE_DOTALL, result}, // \n\n$ + {'x', 0, PCRE_EXTENDED, result}, // whitespace in regex ignored + {'m', PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$ + {'g', 0, true, result+1}, // many rows + {0}, + }; + result[0]=PCRE_EXTRA | PCRE_DOTALL; + result[1]=0; + + if(options) + for(Regex_option *o=regex_option; o->key; o++) + if( + strchr(options, o->key) || + strchr(options, toupper(o->key))) { + *(o->result)&=~o->clear; + *(o->result)|=o->set; + } +} + +/// @test setlocale param to auto.p @test pcre_malloc & pcre_free substs +bool String::match(const String *aorigin, + const String& regexp, + const String *options, + Table **table) const { + static const unsigned char *tables=0; { SYNCHRONIZED(true); + if(!tables) { + setlocale(LC_CTYPE, "ru"); + tables=pcre_maketables(); + } + } + const char *pattern=regexp.cstr(); + const char *errptr; + int erroffset; + int option_bits[2]; regex_options(options?options->cstr():0, option_bits); + pcre *code=pcre_compile(pattern, option_bits[0], + &errptr, &erroffset, + tables); + + if(!code) { + THROW(0, 0, + ®exp.piece(erroffset, regexp.size()), + errptr); + } + + int info_substrings=pcre_info(code, 0, 0); + if(info_substrings<0) { + (*pcre_free)(code); + THROW(0, 0, + aorigin, + "pcre_info error #%d", + info_substrings); + } + + int startoffset=0; + const char *subject=cstr(); + int length=strlen(subject); + int ovecsize; + int *ovector=(int *)malloc(sizeof(int)* + (ovecsize=(3/*pre/match/post*/+info_substrings)*3)); + + // create table + Array& columns=*NEW Array(pool()); + columns+=string_pre_match_name; + columns+=string_match_name; + columns+=string_post_match_name; + for(int i=1; i<=info_substrings; i++) { + char *column=(char *)malloc(MAX_NUMBER); + snprintf(column, MAX_NUMBER, "%d", i); + columns+=NEW String(pool(), column); // .i column name + } + *table=NEW Table(pool(), aorigin, &columns); + + while(true) { + int exec_substrings=pcre_exec(code, 0, + subject, length, startoffset, + 0/*option_bits[0]*/, ovector, ovecsize); + + if(exec_substrings==PCRE_ERROR_NOMATCH) { + (*pcre_free)(code); + return option_bits[1]!=0; // global=true+table, not global=false + } + + if(exec_substrings<0) { + (*pcre_free)(code); + THROW(0, 0, + aorigin, + "pcre_exec error #%d", + exec_substrings); + } + + Array& row=*NEW Array(pool()); + row+=&piece(0, ovector[0]); // pre-match + row+=&piece(ovector[0], ovector[1]); // match + row+=&piece(ovector[1], size()); // post-match + + for(int i=1; i