Annotation of parser3/src/types/pa_vregex.C, revision 1.1
1.1 ! misha 1: /** @file
! 2: Parser: @b regex class.
! 3:
! 4: Copyright(c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com)
! 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
! 6: */
! 7:
! 8: static const char * const IDENT_VREGEX_C="$Date: 2009-04-11 09:18:48 $";
! 9:
! 10: #include "pa_vregex.h"
! 11: #include "pa_vint.h"
! 12:
! 13:
! 14: char* get_pcre_exec_error_text(int exec_result){
! 15: switch(exec_result){
! 16: case PCRE_ERROR_BADUTF8:
! 17: case PCRE_ERROR_BADUTF8_OFFSET:
! 18: return "UTF-8 validation failed during pcre_exec (%d).";
! 19: break;
! 20: default:
! 21: return "execution error (%d)";
! 22: }
! 23: }
! 24:
! 25:
! 26: Value& VRegex::as_expr_result(bool/*return_string_as_is=false*/) {
! 27: return *new VInt(as_int());
! 28: }
! 29:
! 30: void VRegex::regex_options(const String* options, int* result){
! 31: struct Regex_option {
! 32: const char* key;
! 33: const char* keyAlt;
! 34: int clear;
! 35: int set;
! 36: int *result;
! 37: } regex_option[]={
! 38: {"i", "I", 0, PCRE_CASELESS, result}, // a=A
! 39: {"s", "S", 0, PCRE_DOTALL, result}, // ^\n\n$ [default]
! 40: {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$
! 41: {"x", 0, 0, PCRE_EXTENDED, result}, // whitespace in regex ignored
! 42: {"U", 0, 0, PCRE_UNGREEDY, result}, // ungreedy patterns (greedy by default)
! 43: {"g", "G", 0, MF_GLOBAL_SEARCH, result+1}, // many rows
! 44: {"'", 0, 0, MF_NEED_PRE_POST_MATCH, result+1},
! 45: {"n", 0, 0, MF_JUST_COUNT_MATCHES, result+1},
! 46: {0, 0, 0, 0, 0}
! 47: };
! 48: result[0]=PCRE_EXTRA /* backslash+non-special char causes error */
! 49: | PCRE_DOTALL /* dot matches all chars including newline char */
! 50: | PCRE_DOLLAR_ENDONLY /* dollar matches only end of string, but not newline chars */;
! 51: result[1]=0;
! 52:
! 53: if(options && !options->is_empty())
! 54: for(Regex_option *o=regex_option; o->key; o++)
! 55: if(
! 56: options->pos(o->key)!=STRING_NOT_FOUND
! 57: || (o->keyAlt && options->pos(o->keyAlt)!=STRING_NOT_FOUND)
! 58: ){
! 59: *o->result &= ~o->clear;
! 60: *o->result |= o->set;
! 61: }
! 62: }
! 63:
! 64:
! 65: void VRegex::set(Charset& acharset, const String* aregex, const String* aoptions){
! 66: if(aregex->is_empty())
! 67: throw Exception(PARSER_RUNTIME,
! 68: 0,
! 69: "regexp is empty");
! 70:
! 71: fcharset=&acharset;
! 72: fpattern=aregex->cstr(String::L_UNSPECIFIED); // fix any tainted with L_REGEX
! 73: fpattern_len=strlen(fpattern);
! 74:
! 75: regex_options(aoptions, foptions);
! 76: }
! 77:
! 78:
! 79: void VRegex::compile(){
! 80: const char* err_ptr;
! 81: int err_offset;
! 82: int options=foptions[0];
! 83:
! 84: // @todo (for UTF-8): check string & pattern and use PCRE_NO_UTF8_CHECK option
! 85: if(fcharset->isUTF8())
! 86: options|=PCRE_UTF8;
! 87:
! 88: fcode=pcre_compile(fpattern, options,
! 89: &err_ptr, &err_offset,
! 90: fcharset->pcre_tables);
! 91:
! 92: if(!fcode){
! 93: throw Exception(PCRE_EXCEPTION_TYPE,
! 94: new String(fpattern+err_offset, fpattern_len-err_offset, true/*tainted*/),
! 95: "regular expression syntax error - %s", err_ptr);
! 96: }
! 97:
! 98: }
! 99:
! 100:
! 101: size_t VRegex::full_info(int type){
! 102: size_t result;
! 103: int fullinfo_result=pcre_fullinfo(fcode, fextra, type, &result);
! 104: if(fullinfo_result<0){
! 105: throw Exception(PCRE_EXCEPTION_TYPE,
! 106: new String(fpattern, strlen(fpattern), true),
! 107: "pcre_full_info error (%d)", fullinfo_result);
! 108: }
! 109:
! 110: return result;
! 111: };
! 112:
! 113:
! 114: size_t VRegex::get_info_size(){
! 115: return full_info(PCRE_INFO_SIZE);
! 116: }
! 117:
! 118:
! 119: size_t VRegex::get_study_size(){
! 120: return full_info(PCRE_INFO_STUDYSIZE);
! 121: }
! 122:
! 123: void VRegex::study(){
! 124: if(fstudied)
! 125: return;
! 126:
! 127: const char* err_ptr;
! 128: fextra=pcre_study(fcode, 0/*options*/, &err_ptr);
! 129:
! 130: if(err_ptr){
! 131: throw Exception(PCRE_EXCEPTION_TYPE,
! 132: new String(fpattern, fpattern_len, true),
! 133: "pcre_study error: %s", err_ptr);
! 134: }
! 135:
! 136: fstudied=true;
! 137: }
! 138:
! 139:
! 140: int VRegex::exec(const char* string, size_t string_len, int* ovector, int ovector_size, int prestart){
! 141: int result=pcre_exec(fcode, fextra,
! 142: string, string_len, prestart,
! 143: 0, ovector, ovector_size);
! 144:
! 145: if(result<0 && result!=PCRE_ERROR_NOMATCH){
! 146: throw Exception(PCRE_EXCEPTION_TYPE,
! 147: new String(fpattern, fpattern_len, true),
! 148: get_pcre_exec_error_text(result), result);
! 149: }
! 150:
! 151: return result;
! 152: }
! 153:
! 154:
E-mail: