Annotation of parser3/src/types/pa_vregex.C, revision 1.11
1.1 misha 1: /** @file
2: Parser: @b regex class.
3:
4: Copyright(c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com)
5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
6: */
7:
1.11 ! moko 8: static const char * const IDENT_VREGEX_C="$Date: 2009-11-06 04:54:34 $";
1.1 misha 9:
10: #include "pa_vregex.h"
1.2 misha 11: #include "pa_vint.h"
1.8 misha 12: #include "pa_vstring.h"
13:
14:
15: // defines
16:
17: #define REGEX_PATTERN_NAME "pattern"
18: #define REGEX_OPTIONS_NAME "options"
1.1 misha 19:
20:
1.11 ! moko 21: const char* get_pcre_exec_error_text(int exec_result){
1.1 misha 22: switch(exec_result){
23: case PCRE_ERROR_BADUTF8:
24: case PCRE_ERROR_BADUTF8_OFFSET:
25: return "UTF-8 validation failed during pcre_exec (%d).";
26: break;
27: default:
28: return "execution error (%d)";
29: }
30: }
31:
32:
33: Value& VRegex::as_expr_result(bool/*return_string_as_is=false*/) {
34: return *new VInt(as_int());
35: }
36:
37: void VRegex::regex_options(const String* options, int* result){
38: struct Regex_option {
39: const char* key;
40: const char* keyAlt;
41: int clear;
42: int set;
43: int *result;
44: } regex_option[]={
45: {"i", "I", 0, PCRE_CASELESS, result}, // a=A
46: {"s", "S", 0, PCRE_DOTALL, result}, // ^\n\n$ [default]
47: {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$
48: {"x", 0, 0, PCRE_EXTENDED, result}, // whitespace in regex ignored
49: {"U", 0, 0, PCRE_UNGREEDY, result}, // ungreedy patterns (greedy by default)
50: {"g", "G", 0, MF_GLOBAL_SEARCH, result+1}, // many rows
51: {"'", 0, 0, MF_NEED_PRE_POST_MATCH, result+1},
52: {"n", 0, 0, MF_JUST_COUNT_MATCHES, result+1},
53: {0, 0, 0, 0, 0}
54: };
55: result[0]=PCRE_EXTRA /* backslash+non-special char causes error */
56: | PCRE_DOTALL /* dot matches all chars including newline char */
57: | PCRE_DOLLAR_ENDONLY /* dollar matches only end of string, but not newline chars */;
58: result[1]=0;
59:
1.11 ! moko 60: if(options && !options->is_empty()){
! 61: int valid_options=0;
! 62: for(Regex_option *o=regex_option; o->key; o++)
1.1 misha 63: if(
64: options->pos(o->key)!=STRING_NOT_FOUND
65: || (o->keyAlt && options->pos(o->keyAlt)!=STRING_NOT_FOUND)
66: ){
67: *o->result &= ~o->clear;
68: *o->result |= o->set;
1.11 ! moko 69: valid_options++;
1.1 misha 70: }
1.11 ! moko 71: if(options->length()!=valid_options)
! 72: throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION);
! 73: }
1.1 misha 74: }
75:
76:
77: void VRegex::set(Charset& acharset, const String* aregex, const String* aoptions){
78: if(aregex->is_empty())
79: throw Exception(PARSER_RUNTIME,
80: 0,
81: "regexp is empty");
82:
83: fcharset=&acharset;
1.5 misha 84:
1.7 misha 85: fpattern=aregex->untaint_cstr(String::L_REGEX);
1.1 misha 86:
1.9 misha 87: foptions_cstr=aoptions?aoptions->cstr():0;
1.8 misha 88:
1.1 misha 89: regex_options(aoptions, foptions);
90: }
91:
92:
93: void VRegex::compile(){
94: const char* err_ptr;
95: int err_offset;
96: int options=foptions[0];
97:
98: // @todo (for UTF-8): check string & pattern and use PCRE_NO_UTF8_CHECK option
99: if(fcharset->isUTF8())
100: options|=PCRE_UTF8;
101:
102: fcode=pcre_compile(fpattern, options,
103: &err_ptr, &err_offset,
104: fcharset->pcre_tables);
105:
106: if(!fcode){
107: throw Exception(PCRE_EXCEPTION_TYPE,
1.4 misha 108: new String(fpattern+err_offset, String::L_TAINTED),
1.1 misha 109: "regular expression syntax error - %s", err_ptr);
110: }
111:
112: }
113:
114:
115: size_t VRegex::full_info(int type){
116: size_t result;
117: int fullinfo_result=pcre_fullinfo(fcode, fextra, type, &result);
118: if(fullinfo_result<0){
119: throw Exception(PCRE_EXCEPTION_TYPE,
1.4 misha 120: new String(fpattern, String::L_TAINTED),
1.1 misha 121: "pcre_full_info error (%d)", fullinfo_result);
122: }
123:
124: return result;
125: };
126:
127:
128: size_t VRegex::get_info_size(){
129: return full_info(PCRE_INFO_SIZE);
130: }
131:
132:
133: size_t VRegex::get_study_size(){
134: return full_info(PCRE_INFO_STUDYSIZE);
135: }
136:
137: void VRegex::study(){
138: if(fstudied)
139: return;
140:
141: const char* err_ptr;
142: fextra=pcre_study(fcode, 0/*options*/, &err_ptr);
143:
144: if(err_ptr){
145: throw Exception(PCRE_EXCEPTION_TYPE,
1.4 misha 146: new String(fpattern, String::L_TAINTED),
1.1 misha 147: "pcre_study error: %s", err_ptr);
148: }
149:
150: fstudied=true;
151: }
152:
153:
154: int VRegex::exec(const char* string, size_t string_len, int* ovector, int ovector_size, int prestart){
155: int result=pcre_exec(fcode, fextra,
156: string, string_len, prestart,
1.10 misha 157: prestart>0 ? PCRE_NO_UTF8_CHECK : 0, ovector, ovector_size);
1.1 misha 158:
159: if(result<0 && result!=PCRE_ERROR_NOMATCH){
160: throw Exception(PCRE_EXCEPTION_TYPE,
1.4 misha 161: new String(fpattern, String::L_TAINTED),
1.1 misha 162: get_pcre_exec_error_text(result), result);
163: }
164:
165: return result;
166: }
167:
168:
1.8 misha 169: Value* VRegex::get_element(const String& aname) {
170: if(aname == REGEX_PATTERN_NAME)
171: return new VString(*new String(fpattern, String::L_TAINTED));
172:
173: if(aname == REGEX_OPTIONS_NAME)
174: return new VString(*new String(foptions_cstr, String::L_TAINTED));
175:
176: // .CLASS, .CLASS_NAME
177: if(Value* result=VStateless_object::get_element(aname))
178: return result;
179:
180: throw Exception(PARSER_RUNTIME,
181: &aname,
182: "reading of invalid field");
183: }
E-mail: