Annotation of parser3/src/types/pa_vregex.C, revision 1.5
1.1 misha 1: /** @file
2: Parser: @b regex class.
3:
4: Copyright(c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com)
5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
6: */
7:
1.5 ! misha 8: static const char * const IDENT_VREGEX_C="$Date: 2009-05-14 08:10:09 $";
1.1 misha 9:
10: #include "pa_vregex.h"
1.2 misha 11: #include "pa_vint.h"
1.1 misha 12:
13:
14: char* get_pcre_exec_error_text(int exec_result){
15: switch(exec_result){
16: case PCRE_ERROR_BADUTF8:
17: case PCRE_ERROR_BADUTF8_OFFSET:
18: return "UTF-8 validation failed during pcre_exec (%d).";
19: break;
20: default:
21: return "execution error (%d)";
22: }
23: }
24:
25:
26: Value& VRegex::as_expr_result(bool/*return_string_as_is=false*/) {
27: return *new VInt(as_int());
28: }
29:
30: void VRegex::regex_options(const String* options, int* result){
31: struct Regex_option {
32: const char* key;
33: const char* keyAlt;
34: int clear;
35: int set;
36: int *result;
37: } regex_option[]={
38: {"i", "I", 0, PCRE_CASELESS, result}, // a=A
39: {"s", "S", 0, PCRE_DOTALL, result}, // ^\n\n$ [default]
40: {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$
41: {"x", 0, 0, PCRE_EXTENDED, result}, // whitespace in regex ignored
42: {"U", 0, 0, PCRE_UNGREEDY, result}, // ungreedy patterns (greedy by default)
43: {"g", "G", 0, MF_GLOBAL_SEARCH, result+1}, // many rows
44: {"'", 0, 0, MF_NEED_PRE_POST_MATCH, result+1},
45: {"n", 0, 0, MF_JUST_COUNT_MATCHES, result+1},
46: {0, 0, 0, 0, 0}
47: };
48: result[0]=PCRE_EXTRA /* backslash+non-special char causes error */
49: | PCRE_DOTALL /* dot matches all chars including newline char */
50: | PCRE_DOLLAR_ENDONLY /* dollar matches only end of string, but not newline chars */;
51: result[1]=0;
52:
53: if(options && !options->is_empty())
54: for(Regex_option *o=regex_option; o->key; o++)
55: if(
56: options->pos(o->key)!=STRING_NOT_FOUND
57: || (o->keyAlt && options->pos(o->keyAlt)!=STRING_NOT_FOUND)
58: ){
59: *o->result &= ~o->clear;
60: *o->result |= o->set;
61: }
62: }
63:
64:
65: void VRegex::set(Charset& acharset, const String* aregex, const String* aoptions){
66: if(aregex->is_empty())
67: throw Exception(PARSER_RUNTIME,
68: 0,
69: "regexp is empty");
70:
71: fcharset=&acharset;
1.5 ! misha 72:
! 73: // for untainting L_TAINTED strings as L_REGEX
! 74: String& regex=*new String;
! 75: regex.append(*aregex, String::L_REGEX);
! 76:
! 77: fpattern=regex.cstr(String::L_UNSPECIFIED);
1.1 misha 78:
79: regex_options(aoptions, foptions);
80: }
81:
82:
83: void VRegex::compile(){
84: const char* err_ptr;
85: int err_offset;
86: int options=foptions[0];
87:
88: // @todo (for UTF-8): check string & pattern and use PCRE_NO_UTF8_CHECK option
89: if(fcharset->isUTF8())
90: options|=PCRE_UTF8;
91:
92: fcode=pcre_compile(fpattern, options,
93: &err_ptr, &err_offset,
94: fcharset->pcre_tables);
95:
96: if(!fcode){
97: throw Exception(PCRE_EXCEPTION_TYPE,
1.4 misha 98: new String(fpattern+err_offset, String::L_TAINTED),
1.1 misha 99: "regular expression syntax error - %s", err_ptr);
100: }
101:
102: }
103:
104:
105: size_t VRegex::full_info(int type){
106: size_t result;
107: int fullinfo_result=pcre_fullinfo(fcode, fextra, type, &result);
108: if(fullinfo_result<0){
109: throw Exception(PCRE_EXCEPTION_TYPE,
1.4 misha 110: new String(fpattern, String::L_TAINTED),
1.1 misha 111: "pcre_full_info error (%d)", fullinfo_result);
112: }
113:
114: return result;
115: };
116:
117:
118: size_t VRegex::get_info_size(){
119: return full_info(PCRE_INFO_SIZE);
120: }
121:
122:
123: size_t VRegex::get_study_size(){
124: return full_info(PCRE_INFO_STUDYSIZE);
125: }
126:
127: void VRegex::study(){
128: if(fstudied)
129: return;
130:
131: const char* err_ptr;
132: fextra=pcre_study(fcode, 0/*options*/, &err_ptr);
133:
134: if(err_ptr){
135: throw Exception(PCRE_EXCEPTION_TYPE,
1.4 misha 136: new String(fpattern, String::L_TAINTED),
1.1 misha 137: "pcre_study error: %s", err_ptr);
138: }
139:
140: fstudied=true;
141: }
142:
143:
144: int VRegex::exec(const char* string, size_t string_len, int* ovector, int ovector_size, int prestart){
145: int result=pcre_exec(fcode, fextra,
146: string, string_len, prestart,
147: 0, ovector, ovector_size);
148:
149: if(result<0 && result!=PCRE_ERROR_NOMATCH){
150: throw Exception(PCRE_EXCEPTION_TYPE,
1.4 misha 151: new String(fpattern, String::L_TAINTED),
1.1 misha 152: get_pcre_exec_error_text(result), result);
153: }
154:
155: return result;
156: }
157:
158:
E-mail: