Annotation of parser3/src/types/pa_vregex.C, revision 1.16
1.1 misha 1: /** @file
2: Parser: @b regex class.
3:
1.14 moko 4: Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com)
1.1 misha 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
6: */
7:
8: #include "pa_vregex.h"
1.2 misha 9: #include "pa_vint.h"
1.8 misha 10: #include "pa_vstring.h"
11:
1.16 ! misha 12: volatile const char * IDENT_PA_VREGEX_C="$Id: pa_vregex.C,v 1.15 2012/05/23 16:26:41 moko Exp $" IDENT_PA_VREGEX_H;
1.8 misha 13:
14: // defines
15:
16: #define REGEX_PATTERN_NAME "pattern"
17: #define REGEX_OPTIONS_NAME "options"
1.1 misha 18:
19:
1.11 moko 20: const char* get_pcre_exec_error_text(int exec_result){
1.1 misha 21: switch(exec_result){
22: case PCRE_ERROR_BADUTF8:
23: case PCRE_ERROR_BADUTF8_OFFSET:
24: return "UTF-8 validation failed during pcre_exec (%d).";
25: break;
26: default:
27: return "execution error (%d)";
28: }
29: }
30:
31:
1.15 moko 32: Value& VRegex::as_expr_result() {
1.1 misha 33: return *new VInt(as_int());
34: }
35:
36: void VRegex::regex_options(const String* options, int* result){
37: struct Regex_option {
38: const char* key;
39: const char* keyAlt;
40: int clear;
41: int set;
42: int *result;
43: } regex_option[]={
44: {"i", "I", 0, PCRE_CASELESS, result}, // a=A
45: {"s", "S", 0, PCRE_DOTALL, result}, // ^\n\n$ [default]
46: {"m", "M", PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$
47: {"x", 0, 0, PCRE_EXTENDED, result}, // whitespace in regex ignored
48: {"U", 0, 0, PCRE_UNGREEDY, result}, // ungreedy patterns (greedy by default)
49: {"g", "G", 0, MF_GLOBAL_SEARCH, result+1}, // many rows
50: {"'", 0, 0, MF_NEED_PRE_POST_MATCH, result+1},
51: {"n", 0, 0, MF_JUST_COUNT_MATCHES, result+1},
52: {0, 0, 0, 0, 0}
53: };
54: result[0]=PCRE_EXTRA /* backslash+non-special char causes error */
55: | PCRE_DOTALL /* dot matches all chars including newline char */
56: | PCRE_DOLLAR_ENDONLY /* dollar matches only end of string, but not newline chars */;
57: result[1]=0;
58:
1.11 moko 59: if(options && !options->is_empty()){
1.12 misha 60: size_t valid_options=0;
1.11 moko 61: for(Regex_option *o=regex_option; o->key; o++)
1.1 misha 62: if(
63: options->pos(o->key)!=STRING_NOT_FOUND
64: || (o->keyAlt && options->pos(o->keyAlt)!=STRING_NOT_FOUND)
65: ){
66: *o->result &= ~o->clear;
67: *o->result |= o->set;
1.11 moko 68: valid_options++;
1.1 misha 69: }
1.11 moko 70: if(options->length()!=valid_options)
71: throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION);
72: }
1.1 misha 73: }
74:
75:
76: void VRegex::set(Charset& acharset, const String* aregex, const String* aoptions){
77: if(aregex->is_empty())
78: throw Exception(PARSER_RUNTIME,
79: 0,
80: "regexp is empty");
81:
82: fcharset=&acharset;
1.5 misha 83:
1.7 misha 84: fpattern=aregex->untaint_cstr(String::L_REGEX);
1.1 misha 85:
1.9 misha 86: foptions_cstr=aoptions?aoptions->cstr():0;
1.8 misha 87:
1.1 misha 88: regex_options(aoptions, foptions);
89: }
90:
91:
92: void VRegex::compile(){
93: const char* err_ptr;
94: int err_offset;
95: int options=foptions[0];
96:
97: // @todo (for UTF-8): check string & pattern and use PCRE_NO_UTF8_CHECK option
98: if(fcharset->isUTF8())
1.13 misha 99: options |= (PCRE_UTF8 | PCRE_UCP);
1.1 misha 100:
101: fcode=pcre_compile(fpattern, options,
102: &err_ptr, &err_offset,
103: fcharset->pcre_tables);
104:
105: if(!fcode){
106: throw Exception(PCRE_EXCEPTION_TYPE,
1.4 misha 107: new String(fpattern+err_offset, String::L_TAINTED),
1.1 misha 108: "regular expression syntax error - %s", err_ptr);
109: }
110:
111: }
112:
113:
114: size_t VRegex::full_info(int type){
115: size_t result;
116: int fullinfo_result=pcre_fullinfo(fcode, fextra, type, &result);
117: if(fullinfo_result<0){
118: throw Exception(PCRE_EXCEPTION_TYPE,
1.4 misha 119: new String(fpattern, String::L_TAINTED),
1.1 misha 120: "pcre_full_info error (%d)", fullinfo_result);
121: }
122:
123: return result;
124: };
125:
126:
127: size_t VRegex::get_info_size(){
128: return full_info(PCRE_INFO_SIZE);
129: }
130:
131:
132: size_t VRegex::get_study_size(){
133: return full_info(PCRE_INFO_STUDYSIZE);
134: }
135:
136: void VRegex::study(){
137: if(fstudied)
138: return;
139:
140: const char* err_ptr;
141: fextra=pcre_study(fcode, 0/*options*/, &err_ptr);
142:
143: if(err_ptr){
144: throw Exception(PCRE_EXCEPTION_TYPE,
1.4 misha 145: new String(fpattern, String::L_TAINTED),
1.1 misha 146: "pcre_study error: %s", err_ptr);
147: }
148:
149: fstudied=true;
150: }
151:
152:
153: int VRegex::exec(const char* string, size_t string_len, int* ovector, int ovector_size, int prestart){
154: int result=pcre_exec(fcode, fextra,
155: string, string_len, prestart,
1.10 misha 156: prestart>0 ? PCRE_NO_UTF8_CHECK : 0, ovector, ovector_size);
1.1 misha 157:
158: if(result<0 && result!=PCRE_ERROR_NOMATCH){
159: throw Exception(PCRE_EXCEPTION_TYPE,
1.4 misha 160: new String(fpattern, String::L_TAINTED),
1.1 misha 161: get_pcre_exec_error_text(result), result);
162: }
163:
164: return result;
165: }
166:
167:
1.8 misha 168: Value* VRegex::get_element(const String& aname) {
169: if(aname == REGEX_PATTERN_NAME)
170: return new VString(*new String(fpattern, String::L_TAINTED));
171:
172: if(aname == REGEX_OPTIONS_NAME)
173: return new VString(*new String(foptions_cstr, String::L_TAINTED));
174:
1.16 ! misha 175: // method (if any)
1.8 misha 176: if(Value* result=VStateless_object::get_element(aname))
177: return result;
178:
179: throw Exception(PARSER_RUNTIME,
180: &aname,
181: "reading of invalid field");
182: }
E-mail: