|
|
| version 1.21, 2001/02/13 10:30:22 | version 1.76, 2001/04/10 11:24:00 |
|---|---|
| Line 1 | Line 1 |
| /* | /** @file |
| $Id$ | Parser: string class. @see untasize_t.C. |
| Copyright (c) 2001 ArtLebedev Group (http://www.artlebedev.com) | |
| Author: Alexander Petrosyan <paf@design.ru> (http://design.ru/paf) | |
| $Id$ | |
| */ | */ |
| #include <string.h> | #include "pa_config_includes.h" |
| #include "pcre.h" | |
| #include "pa_pool.h" | #include "pa_pool.h" |
| #include "pa_string.h" | #include "pa_string.h" |
| #include "pa_hash.h" | #include "pa_hash.h" |
| #include "pa_exception.h" | |
| #include "pa_common.h" | |
| #include "pa_array.h" | |
| #include "pa_globals.h" | |
| #include "pa_table.h" | |
| #include "pa_threads.h" | |
| //#include "pa_sapi.h" | |
| // String | // String |
| String::String(Pool& apool) : | String::String(Pool& apool, const char *src, size_t src_size, bool tainted) : |
| Pooled(apool) { | Pooled(apool) { |
| head.count=curr_chunk_rows=CR_PREALLOCATED_COUNT; | last_chunk=&head; |
| head.count=CR_PREALLOCATED_COUNT; | |
| append_here=head.rows; | append_here=head.rows; |
| head.preallocated_link=0; | head.preallocated_link=0; |
| link_row=&head.rows[curr_chunk_rows]; | link_row=&head.rows[head.count]; |
| fused_rows=fsize=0; | fused_rows=fsize=0; |
| if(src) | |
| if(tainted) | |
| APPEND_TAINTED(src, src_size, 0, 0); | |
| else | |
| APPEND_CLEAN(src, src_size, 0, 0); | |
| } | } |
| void String::expand() { | void String::expand() { |
| curr_chunk_rows+=curr_chunk_rows*CR_GROW_PERCENT/100; | size_t new_chunk_count=last_chunk->count+last_chunk->count*CR_GROW_PERCENT/100; |
| Chunk *chunk=static_cast<Chunk *>( | last_chunk=static_cast<Chunk *>( |
| pool.malloc(sizeof(int)+sizeof(Chunk::Row)*curr_chunk_rows+sizeof(Chunk *))); | malloc(sizeof(size_t)+sizeof(Chunk::Row)*new_chunk_count+sizeof(Chunk *))); |
| chunk->count=curr_chunk_rows; | last_chunk->count=new_chunk_count; |
| link_row->link=chunk; | link_row->link=last_chunk; |
| append_here=chunk->rows; | append_here=last_chunk->rows; |
| link_row=&chunk->rows[curr_chunk_rows]; | link_row=&last_chunk->rows[last_chunk->count]; |
| link_row->link=0; | link_row->link=0; |
| } | } |
| String::String(const String& src) : | String::String(const String& src) : Pooled(src.pool()) { |
| Pooled(src.pool) { | |
| head.count=CR_PREALLOCATED_COUNT; | head.count=CR_PREALLOCATED_COUNT; |
| int src_used_rows=src.used_rows(); | size_t src_used_rows=src.fused_rows; |
| if(src_used_rows<=head.count) { | if(src_used_rows<=head.count) { |
| // all new rows fit into preallocated area | // all new rows fit size_to preallocated area |
| curr_chunk_rows=head.count; | size_t curr_chunk_rows=head.count; |
| memcpy(head.rows, src.head.rows, sizeof(Chunk::Row)*src_used_rows); | memcpy(head.rows, src.head.rows, sizeof(Chunk::Row)*src_used_rows); |
| append_here=&head.rows[src_used_rows]; | append_here=&head.rows[src_used_rows]; |
| link_row=&head.rows[curr_chunk_rows]; | link_row=&head.rows[curr_chunk_rows]; |
| Line 50 String::String(const String& src) : | Line 72 String::String(const String& src) : |
| // allocating only enough mem to fit src string rows | // allocating only enough mem to fit src string rows |
| // next append would allocate a new chunk | // next append would allocate a new chunk |
| // | // |
| // new rows don't fit into preallocated area: splitting into two chunks | // new rows don't fit size_to preallocated area: splitting size_to two chunks |
| // preallocated chunk src to constructing head | // preallocated chunk src to constructing head |
| memcpy(head.rows, src.head.rows, sizeof(Chunk::Row)*head.count); | memcpy(head.rows, src.head.rows, sizeof(Chunk::Row)*head.count); |
| // remaining rows into new_chunk | // remaining rows size_to new_chunk |
| curr_chunk_rows=src_used_rows-head.count; | size_t curr_chunk_rows=src_used_rows-head.count; |
| Chunk *new_chunk=static_cast<Chunk *>( | Chunk *new_chunk=static_cast<Chunk *>( |
| pool.malloc(sizeof(int)+sizeof(Chunk::Row)*curr_chunk_rows+sizeof(Chunk *))); | malloc(sizeof(size_t)+sizeof(Chunk::Row)*curr_chunk_rows+sizeof(Chunk *))); |
| new_chunk->count=curr_chunk_rows; | new_chunk->count=curr_chunk_rows; |
| head.preallocated_link=new_chunk; | head.preallocated_link=new_chunk; |
| append_here=link_row=&new_chunk->rows[curr_chunk_rows]; | append_here=link_row=&new_chunk->rows[new_chunk->count]; |
| Chunk *old_chunk=src.head.preallocated_link; | Chunk *old_chunk=src.head.preallocated_link; |
| Chunk::Row *new_rows=new_chunk->rows; | Chunk::Row *new_rows=new_chunk->rows; |
| int rows_left_to_copy=curr_chunk_rows; | size_t rows_left_to_copy=new_chunk->count; |
| while(true) { | while(true) { |
| int old_count=old_chunk->count; | size_t old_count=old_chunk->count; |
| Chunk *next_chunk=old_chunk->rows[old_count].link; | Chunk *next_chunk=old_chunk->rows[old_count].link; |
| if(next_chunk) { | if(next_chunk) { |
| // not last source chunk | // not last source chunk |
| Line 88 String::String(const String& src) : | Line 110 String::String(const String& src) : |
| fsize=src.fsize; | fsize=src.fsize; |
| } | } |
| String& String::append(const String& src, Untaint_lang lang, bool forced) { | |
| const Chunk *chunk=&src.head; | |
| do { | |
| const Chunk::Row *row=chunk->rows; | |
| for(size_t i=0; i<chunk->count; i++, row++) { | |
| if(row==src.append_here) | |
| goto break2; | |
| APPEND(row->item.ptr, row->item.size, | |
| (lang!=UL_PASS_APPENDED && (row->item.lang==UL_TAINTED || forced))?lang:row->item.lang, | |
| row->item.origin.file, row->item.origin.line); | |
| } | |
| chunk=row->link; | |
| } while(chunk); | |
| break2: | |
| return *this; | |
| } | |
| String& String::real_append(STRING_APPEND_PARAMS) { | String& String::real_append(STRING_APPEND_PARAMS) { |
| if(!src) | if(!src) |
| return *this; | return *this; |
| int len=strlen(src); | if(!size) |
| if(!len) | size=strlen(src); |
| if(!size) | |
| return *this; | return *this; |
| if(chunk_is_full()) | if(chunk_is_full()) |
| expand(); | expand(); |
| append_here->item.ptr=src; | append_here->item.ptr=src; |
| fsize+=append_here->item.size=len; | fsize+=append_here->item.size=size; |
| append_here->item.lang=lang; | |
| #ifndef NO_STRING_ORIGIN | #ifndef NO_STRING_ORIGIN |
| append_here->item.origin.file=file; | append_here->item.origin.file=file; |
| append_here->item.origin.line=line; | append_here->item.origin.line=line; |
| Line 109 String& String::real_append(STRING_APPEN | Line 151 String& String::real_append(STRING_APPEN |
| return *this; | return *this; |
| } | } |
| char *String::cstr() const { | |
| char *result=static_cast<char *>(pool.malloc(size()+1)); | |
| char *copy_here=result; | |
| const Chunk *chunk=&head; | |
| do { | |
| const Chunk::Row *row=chunk->rows; | |
| for(int i=0; i<chunk->count; i++) { | |
| if(row==append_here) | |
| goto break2; | |
| memcpy(copy_here, row->item.ptr, row->item.size); | |
| copy_here+=row->item.size; | |
| row++; | |
| } | |
| chunk=row->link; | |
| } while(chunk); | |
| break2: | |
| *copy_here=0; | |
| return result; | |
| } | |
| uint String::hash_code() const { | uint String::hash_code() const { |
| uint result=0; | uint result=0; |
| const Chunk *chunk=&head; | const Chunk *chunk=&head; |
| do { | do { |
| const Chunk::Row *row=chunk->rows; | const Chunk::Row *row=chunk->rows; |
| for(int i=0; i<chunk->count; i++) { | for(size_t i=0; i<chunk->count; i++) { |
| if(row==append_here) | if(row==append_here) |
| goto break2; | goto break2; |
| Line 150 break2: | Line 170 break2: |
| return result; | return result; |
| } | } |
| bool String::operator == (const String& src) const { | /// @todo move 'lang' skipping to pos |
| if(size() != src.size()) | int String::cmp(int& partial, const String& src, |
| return false; | size_t this_offset, Untaint_lang lang) const { |
| partial=-1; | |
| this_offset=min(this_offset, size()-1); | |
| const Chunk *a_chunk=&head; | const Chunk *a_chunk=&head; |
| const Chunk *b_chunk=&src.head; | const Chunk *b_chunk=&src.head; |
| const Chunk::Row *a_row=a_chunk->rows; | const Chunk::Row *a_row=a_chunk->rows; |
| const Chunk::Row *b_row=b_chunk->rows; | const Chunk::Row *b_row=b_chunk->rows; |
| int a_offset=0; | size_t a_offset=this_offset; |
| int b_offset=0; | size_t b_offset=0; |
| Chunk::Row *a_end=append_here; | Chunk::Row *a_end=append_here; |
| Chunk::Row *b_end=src.append_here; | Chunk::Row *b_end=src.append_here; |
| int a_countdown=a_chunk->count; | size_t a_countdown=a_chunk->count; |
| int b_countdown=b_chunk->count; | size_t b_countdown=b_chunk->count; |
| bool a_break=false; | bool a_break=false; |
| bool b_break=false; | bool b_break=false; |
| size_t result; | |
| size_t pos=0; | |
| while(true) { | while(true) { |
| int size_diff= | |
| (a_row->item.size-a_offset)- | |
| (b_row->item.size-b_offset); | |
| if(size_diff==0) { // a has same size as b | |
| if(memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, a_row->item.size-a_offset)!=0) | |
| return false; | |
| a_row++; a_countdown--; a_offset=0; | |
| b_row++; b_countdown--; b_offset=0; | |
| } else if (size_diff>0) { // a longer | |
| if(memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, b_row->item.size-b_offset)!=0) | |
| return false; | |
| a_offset+=b_row->item.size-b_offset; | |
| b_row++; b_countdown--; b_offset=0; | |
| } else { // b longer | |
| if(memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, a_row->item.size-a_offset)!=0) | |
| return false; | |
| b_offset+=a_row->item.size-a_offset; | |
| a_row++; a_countdown--; a_offset=0; | |
| } | |
| a_break=a_row==a_end; | a_break=a_row==a_end; |
| b_break=b_row==b_end; | b_break=b_row==b_end; |
| if(a_break || b_break) | if(a_break || b_break) |
| break; | break; |
| if(pos+a_row->item.size > this_offset) { | |
| if(lang!=UL_UNSPECIFIED && a_row->item.lang!=lang) | |
| return -1; // wrong lang -- bail out | |
| int size_diff= | |
| (a_row->item.size-a_offset)- | |
| (b_row->item.size-b_offset); | |
| if(size_diff==0) { // a has same size as b | |
| result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, | |
| a_row->item.size-a_offset); | |
| if(result) | |
| return result; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; a_offset=0; | |
| b_row++; b_countdown--; b_offset=0; | |
| } else if (size_diff>0) { // a longer | |
| result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, | |
| b_row->item.size-b_offset); | |
| if(result) | |
| return result; | |
| a_offset+=b_row->item.size-b_offset; | |
| b_row++; b_countdown--; b_offset=0; | |
| } else { // b longer | |
| result=memcmp(a_row->item.ptr+a_offset, b_row->item.ptr+b_offset, | |
| a_row->item.size-a_offset); | |
| if(result) | |
| return result; | |
| b_offset+=a_row->item.size-a_offset; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; a_offset=0; | |
| } | |
| if(!b_countdown) { | |
| b_chunk=b_row->link; | |
| b_row=b_chunk->rows; | |
| b_countdown=b_chunk->count; | |
| } | |
| } else { | |
| a_offset-=a_row->item.size; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; | |
| } | |
| if(!a_countdown) { | if(!a_countdown) { |
| a_chunk=a_row->link; | a_chunk=a_row->link; |
| a_row=a_chunk->rows; | a_row=a_chunk->rows; |
| a_countdown=a_chunk->count; | a_countdown=a_chunk->count; |
| } | } |
| if(!b_countdown) { | |
| b_chunk=b_row->link; | |
| b_row=b_chunk->rows; | |
| b_countdown=b_chunk->count; | |
| } | |
| } | } |
| return a_break==b_break; | if(a_break==b_break) { // ended simultaneously |
| partial=0; return 0; | |
| } else if(a_break) { // first bytes equal, but a ended before b | |
| partial=1; return -1; | |
| } else { | |
| partial=2; return +1; | |
| } | |
| } | } |
| String& String::append(const String_iterator& begin, const String_iterator& end) { | /// @todo move 'lang' skipping to pos |
| return z; | int String::cmp(int& partial, const char* b_ptr, size_t src_size, |
| } | size_t this_offset, Untaint_lang lang) const { |
| partial=-1; | |
| size_t b_size=src_size?src_size:b_ptr?strlen(b_ptr):0; | |
| this_offset=min(this_offset, size()-1); | |
| const Chunk *a_chunk=&head; | |
| const Chunk::Row *a_row=a_chunk->rows; | |
| size_t a_offset=this_offset; | |
| size_t b_offset=0; | |
| Chunk::Row *a_end=append_here; | |
| size_t a_countdown=a_chunk->count; | |
| bool a_break=false; | |
| bool b_break=false; | |
| size_t pos=0; | |
| while(true) { | |
| a_break=a_row==a_end; | |
| if(a_break || b_break) | |
| break; | |
| // Char_types | if(pos+a_row->item.size > this_offset) { |
| if(lang!=UL_UNSPECIFIED && a_row->item.lang!=lang) | |
| return -1; // wrong lang -- bail out | |
| int size_diff= | |
| (a_row->item.size-a_offset)- | |
| (b_size-b_offset); | |
| if(size_diff==0) { // a has same size as b | |
| if(size_t result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, | |
| a_row->item.size-a_offset)!=0) | |
| return result; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; a_offset=0; | |
| b_break=true; | |
| } else if (size_diff>0) { // a longer | |
| if(size_t result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, | |
| b_size-b_offset)!=0) | |
| return result; | |
| a_offset+=b_size-b_offset; | |
| b_break=true; | |
| } else { // b longer | |
| if(size_t result=memcmp(a_row->item.ptr+a_offset, b_ptr+b_offset, | |
| a_row->item.size-a_offset)!=0) | |
| return result; | |
| b_offset+=a_row->item.size-a_offset; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; a_offset=0; | |
| } | |
| } else { | |
| a_offset-=a_row->item.size; | |
| pos+=a_row->item.size; | |
| a_row++; a_countdown--; | |
| } | |
| Char_types::Char_types() { | if(!a_countdown) { |
| memset(types, 0, sizeof(types)); | a_chunk=a_row->link; |
| a_row=a_chunk->rows; | |
| a_countdown=a_chunk->count; | |
| } | |
| } | |
| if(a_break==b_break) { // ended simultaneously | |
| partial=0; return 0; | |
| } else if(a_break) { // first bytes equal, but a ended before b | |
| partial=1; return -1; | |
| } else { | |
| partial=2; return +1; | |
| } | |
| } | } |
| // String_iterator | #ifndef NO_STRING_ORIGIN |
| const Origin& String::origin() const { | |
| if(!fused_rows) | |
| THROW(0, 0, | |
| 0, | |
| "String::origin() of empty string called"); | |
| // determining origin by last appended piece | |
| // because first one frequently constant. | |
| // ex: ^load[/file] "document_root" + "/file" | |
| return append_here[-1].item.origin; | |
| } | |
| #endif | |
| // home string | String& String::mid(size_t start, size_t finish) const { |
| String& string; | start=max(0, start); |
| // the row in which we are | finish=min(size(), finish); |
| Chunk::Row *read_here; | if(start==finish) |
| // position in that row's string fragment | return *empty_string; |
| int offset; | |
| // when read_here reaches this row, move to the next chunk | |
| Chunk::Row *link_row; | |
| bool feof; | String& result=*NEW String(pool()); |
| String_iterator::String_iterator(String& astring) : string(astring) { | size_t pos=0; |
| read_here=string.head.rows; | const Chunk *chunk=&head; |
| position=string.size()==0?0:read_here->item.ptr; | do { |
| link_row=string.preallocated_link; | const Chunk::Row *row=chunk->rows; |
| } | for(size_t i=0; i<chunk->count; pos+=row->item.size, i++, row++) { |
| if(row==append_here) | |
| goto break2; | |
| char String_iterator::operator() { | size_t item_finish=pos+row->item.size; |
| return position?*position:0; | if(item_finish > start) { // started now or already? |
| bool started=result.size()==0; // started now? | |
| bool finished=finish <= item_finish; // finished now? | |
| size_t offset=started?start-pos:0; | |
| size_t size=finished?finish-pos:row->item.size; | |
| result.APPEND( | |
| row->item.ptr+offset, size-offset, | |
| row->item.lang, | |
| row->item.origin.file, row->item.origin.line); | |
| if(finished) | |
| goto break2; | |
| } | |
| } | |
| chunk=row->link; | |
| } while(chunk); | |
| break2: | |
| // SAPI::log(pool(), "piece of '%s' from %d to %d is '%s'", | |
| //cstr(), start, finish, result.cstr()); | |
| return result; | |
| } | } |
| void String_iterator::skip() { | int String::pos(const String& substr, |
| if(!position) | size_t result, Untaint_lang lang) const { |
| return; | for(; result<size(); result++) { |
| int partial; cmp(partial, substr, result, lang); | |
| if(++position== | if( |
| read_here->item.ptr+ | partial==0 || // full match |
| read_here->item.size) { | partial==2) // 'substr' starts 'this'+'result' |
| return result; | |
| } | |
| return -1; | |
| } | |
| // next row | int String::pos(const char *substr, size_t substr_size, |
| if(++read_here==string.append_here) { | size_t result, Untaint_lang lang) const { |
| feof=true; | for(; result<size(); result++) { |
| return; | int partial; cmp(partial, substr, substr_size, result, lang); |
| } | if( |
| if(read_here==link_row) { | partial==0 || // full match |
| Chunk *chunk=link_row->link; | partial==2) // 'substr' starts 'this'+'result' |
| if(!chunk) | return result; |
| string.pool.exception().raise( | } |
| "String_iterator::skip() missed " | |
| "read_here==string.append_here check"); | return -1; |
| } | |
| read_here=chunk->rows; | void String::split(Array& result, |
| link_row=chunk->rows[chunk->count]; | size_t* pos_after_ref, |
| } | const char *delim, size_t delim_size, |
| position=read_here->item.ptr; | Untaint_lang lang, int limit) const { |
| if(delim_size) { | |
| size_t pos_after=pos_after_ref?*pos_after_ref:0; | |
| int pos_before; | |
| // while we have 'delim'... | |
| for(; (pos_before=pos(delim, delim_size, pos_after, lang))>=0 && limit; limit--) { | |
| result+=&mid(pos_after, pos_before); | |
| pos_after=pos_before+delim_size; | |
| } | |
| // last piece | |
| if(pos_after<size() && limit) { | |
| result+=&mid(pos_after, size()); | |
| pos_after=size(); | |
| } | |
| if(pos_after_ref) | |
| *pos_after_ref=pos_after; | |
| } else { // empty delim | |
| result+=this; | |
| if(pos_after_ref) | |
| *pos_after_ref+=size(); | |
| } | } |
| } | } |
| bool String_iterator::skip_to(char c) { | void String::split(Array& result, |
| if(!position) | size_t* pos_after_ref, |
| return false; | const String& delim, Untaint_lang lang, |
| int limit) const { | |
| if(delim.size()) { | |
| size_t pos_after=pos_after_ref?*pos_after_ref:0; | |
| int pos_before; | |
| // while we have 'delim'... | |
| for(; (pos_before=pos(delim, pos_after, lang))>=0 && limit; limit--) { | |
| result+=&mid(pos_after, pos_before); | |
| pos_after=pos_before+delim.size(); | |
| } | |
| // last piece | |
| if(pos_after<size() && limit) { | |
| result+=&mid(pos_after, size()); | |
| pos_after=size(); | |
| } | |
| if(pos_after_ref) | |
| *pos_after_ref=pos_after; | |
| } else { // empty delim | |
| result+=this; | |
| if(pos_after_ref) | |
| *pos_after_ref+=size(); | |
| } | |
| } | |
| while(true) { | /// @test really @b test: s x m [tested: i & g ] |
| if(char *found=static_cast<char *>( | static void regex_options(char *options, int *result){ |
| memchr(ptr, c, read_here->size-(position-read_here->ptr)))) { | struct Regex_option { |
| position=found; | char key; |
| return true; | int clear, set; |
| } | int *result; |
| } regex_option[]={ | |
| {'i', 0, PCRE_CASELESS, result}, // a=A | |
| {'s', 0, PCRE_DOTALL, result}, // \n\n$ | |
| {'x', 0, PCRE_EXTENDED, result}, // whitespace in regex ignored | |
| {'m', PCRE_DOTALL, PCRE_MULTILINE, result}, // ^aaa\n$^bbb\n$ | |
| {'g', 0, true, result+1}, // many rows | |
| {0}, | |
| }; | |
| result[0]=PCRE_EXTRA | PCRE_DOTALL; | |
| result[1]=0; | |
| if(options) | |
| for(Regex_option *o=regex_option; o->key; o++) | |
| if( | |
| strchr(options, o->key) || | |
| strchr(options, toupper(o->key))) { | |
| *(o->result)&=~o->clear; | |
| *(o->result)|=o->set; | |
| } | |
| } | |
| // next row | /** |
| if(++read_here==string.append_here) { | returns true if fills table. |
| position=0; | table format is defined and fixed[can be used by others]: |
| return false; | @verbatim |
| } | pre-match/match/post-match/1/2/3/... |
| if(read_here==link_row) { | @endverbatim |
| Chunk *chunk=link_row->link; | */ |
| if(!chunk) | bool String::match(const String *aorigin, |
| string.pool.exception().raise( | const String& regexp, |
| "String_iterator::skip_to(char) missed " | const String *options, |
| "read_here==string.append_here check"); | Table **table, |
| Row_action row_action, void *info) const { | |
| if(!regexp.size()) | |
| THROW(0, 0, | |
| aorigin, | |
| "regexp is empty"); | |
| const char *pattern=regexp.cstr(UL_AS_IS); | |
| const char *errptr; | |
| int erroffset; | |
| int option_bits[2]; regex_options(options?options->cstr():0, option_bits); | |
| pcre *code=pcre_compile(pattern, option_bits[0], | |
| &errptr, &erroffset, | |
| pcre_tables); | |
| if(!code) | |
| THROW(0, 0, | |
| ®exp.mid(erroffset, regexp.size()), | |
| "regular expression syntax error - %s", errptr); | |
| int info_substrings=pcre_info(code, 0, 0); | |
| if(info_substrings<0) { | |
| (*pcre_free)(code); | |
| THROW(0, 0, | |
| aorigin, | |
| "pcre_info error (%d)", | |
| info_substrings); | |
| } | |
| read_here=chunk->rows; | int startoffset=0; |
| link_row=chunk->rows[chunk->count]; | const char *subject=cstr(UL_AS_IS); |
| int length=strlen(subject); | |
| int ovecsize; | |
| int *ovector=(int *)malloc(sizeof(int)* | |
| (ovecsize=(1/*match*/+info_substrings)*3)); | |
| { // create table | |
| Array& columns=*NEW Array(pool()); | |
| columns+=string_pre_match_name; | |
| columns+=string_match_name; | |
| columns+=string_post_match_name; | |
| for(int i=1; i<=info_substrings; i++) { | |
| char *column=(char *)malloc(MAX_NUMBER); | |
| snprintf(column, MAX_NUMBER, "%d", i); | |
| columns+=NEW String(pool(), column); // .i column name | |
| } | } |
| position=read_here->item.ptr; | *table=NEW Table(pool(), aorigin, &columns); |
| } | } |
| } | |
| int String_iterator::skip_to(Char_type& types) { | |
| if(!position) | |
| return false; | |
| int exec_option_bits=0; | |
| while(true) { | while(true) { |
| int countdown=read_here->size-(position-read_here->ptr))); | int exec_substrings=pcre_exec(code, 0, |
| for(; countdown--; position++) | subject, length, startoffset, |
| if(int type=types.get(*position)) | exec_option_bits, ovector, ovecsize); |
| return type; | |
| if(exec_substrings==PCRE_ERROR_NOMATCH) { | |
| // next row | (*pcre_free)(code); |
| if(++read_here==string.append_here) { | (*row_action)(**table, 0/*last time, no row*/, 0, 0, info); |
| position=0; | return option_bits[1]!=0; // global=true+table, not global=false |
| return 0; | } |
| } | |
| if(read_here==link_row) { | if(exec_substrings<0) { |
| Chunk *chunk=link_row->link; | (*pcre_free)(code); |
| if(!chunk) | THROW(0, 0, |
| string.pool.exception().raise( | aorigin, |
| "String_iterator::skip_to(Char_type) missed " | "regular expression execute error (%d)", |
| "read_here==string.append_here check"); | exec_substrings); |
| } | |
| read_here=chunk->rows; | |
| link_row=chunk->rows[chunk->count]; | Array& row=*NEW Array(pool()); |
| row+=&mid(0, ovector[0]); // .pre-match column value | |
| row+=&mid(ovector[0], ovector[1]); // .match | |
| row+=&mid(ovector[1], size()); // .post-match | |
| for(int i=1; i<exec_substrings; i++) { | |
| // -1:-1 case handled peacefully by mid() itself | |
| row+=&mid(ovector[i*2+0], ovector[i*2+1]); // .i column value | |
| } | |
| (*row_action)(**table, &row, startoffset, ovector[0], info); | |
| if(!option_bits[1] || !(startoffset=ovector[1])) { // not global | going to hang | |
| (*pcre_free)(code); | |
| (*row_action)(**table, 0/*last time, no row*/, 0, 0, info); | |
| return true; | |
| } | } |
| position=read_here->item.ptr; | |
| /* | |
| if(option_bits[0] & PCRE_MULTILINE) | |
| exec_option_bits|=PCRE_NOTBOL; // start of subject+startoffset not BOL | |
| */ | |
| } | } |
| } | } |