--- parser3/src/classes/table.C 2003/11/03 13:20:30 1.186 +++ parser3/src/classes/table.C 2004/04/01 15:08:48 1.201 @@ -1,11 +1,11 @@ /** @file Parser: @b table parser class. - Copyright (c) 2001-2003 ArtLebedev Group (http://www.artlebedev.com) + Copyright (c) 2001-2004 ArtLebedev Group (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ -static const char* IDENT_TABLE_C="$Date: 2003/11/03 13:20:30 $"; +static const char * const IDENT_TABLE_C="$Date: 2004/04/01 15:08:48 $"; #include "classes.h" #include "pa_vmethod_frame.h" @@ -21,7 +21,7 @@ static const char* IDENT_TABLE_C="$Date: class MTable: public Methoded { public: // VStateless_class - Value* create_new_value() { return new VTable(); } + Value* create_new_value(Pool&) { return new VTable(); } public: MTable(); @@ -52,8 +52,8 @@ String table_reverse_name(TABLE_REVERSE_ // local defines -#define COLUMN_SEPARATOR_NAME "column-separator" -#define COLUMN_ENCLOSER_NAME "column-encloser" +#define COLUMN_SEPARATOR_NAME "separator" +#define COLUMN_ENCLOSER_NAME "encloser" // methods @@ -178,34 +178,119 @@ static void _create(Request& r, MethodPa GET_SELF(r, VTable).set_table(table); } -inline char* remove_encloser(char* cstr, char encloser) { - if(cstr[0]!=encloser) - return cstr; +struct lsplit_result { + char* piece; + char delim; - size_t length=strlen(cstr); - if(length<2 || cstr[length-1]!=encloser) - return cstr; - - // 'string' - - cstr[length-1]=0; - cstr++; + operator bool() { return piece!=0; } +}; - // double-encloser stands for encloser - char *read; - char *write; - write=read=cstr; - while(char c=*read++) { - if(c==encloser && *read==encloser) - read++; +inline lsplit_result lsplit(char* string, char delim1, char delim2) { + lsplit_result result; + if(string) { + char delims[]={delim1, delim2, 0}; + if(char* v=strpbrk(string, delims)) { + result.delim=*v; + *v=0; + result.piece=v+1; + return result; + } + } + result.piece=0; + result.delim=0; + return result; +} + +inline lsplit_result lsplit(char* *string_ref, char delim1, char delim2) { + lsplit_result result; + result.piece=*string_ref; + lsplit_result next=lsplit(*string_ref, delim1, delim2); + result.delim=next.delim; + *string_ref=next.piece; + return result; +} + +static lsplit_result lsplit(char** string_ref, char delim1, char delim2, char encloser) { + lsplit_result result; + + if(char* string=*string_ref) { + if(encloser && *string==encloser) { + string++; + + char *read; + char *write; + write=read=string; + char c; + while((c=*read++)) { + if(c==encloser) { + char n=*read; + if(n==encloser) // double-encloser stands for encloser + read++; + else if(n==delim1 || n==delim2) { + result.delim=n; + read++; + break; + } + } - *write++=c; + *write++=c; + } + *write=0; // terminate + *string_ref=c? read: 0; + result.piece=string; + return result; + } else + return lsplit(string_ref, delim1, delim2); + } + result.piece=0; + return result; +} + +static void skip_empty_and_comment_lines( char** data_ref ) { + if(char *data=*data_ref) { + while( char c=*data ) { + if( c== '\n' || c == '#' ) { + /*nowhere=*/getrow(&data); // remove empty&comment lines + if(!(*data_ref=data)) + break; + continue; + } + break; + } } - *write=0; // terminate - - return cstr; } +struct TableSeparators { + char column; + char encloser; + const String* sencloser; + + TableSeparators() { + column='\t'; + encloser=0; + } + void load( HashStringValue& options ) { + if(Value* vseparator=options.get(COLUMN_SEPARATOR_NAME)) { + options.remove(COLUMN_SEPARATOR_NAME); + const String& sseparator=vseparator->as_string(); + if(sseparator.length()!=1) + throw Exception("parser.runtime", + &sseparator, + "separator must be one character long"); + column=sseparator.first_char(); + } + if(Value* vencloser=options.get(COLUMN_ENCLOSER_NAME)) { + options.remove(COLUMN_ENCLOSER_NAME); + sencloser=&vencloser->as_string(); + if(sencloser->length()!=1) + throw Exception("parser.runtime", + sencloser, + "encloser must be one character long"); + encloser=sencloser->first_char(); + } + } +}; + static void _load(Request& r, MethodParams& params) { const String& first_param=params.as_string(0, "file name must be string"); int filename_param_index=0; @@ -215,30 +300,12 @@ static void _load(Request& r, MethodPara size_t options_param_index=filename_param_index+1; HashStringValue *options=0; - char column_separator='\t'; - char column_encloser=0; + TableSeparators separators; if(options_param_indexget(COLUMN_SEPARATOR_NAME)) { - options->remove(COLUMN_SEPARATOR_NAME); - const String& sseparator=vseparator->as_string(); - if(sseparator.length()!=1) - throw Exception("parser.runtime", - &sseparator, - "separator must be one character long"); - column_separator=sseparator.first_char(); - } - if(Value* vencloser=options->get(COLUMN_ENCLOSER_NAME)) { - options->remove(COLUMN_ENCLOSER_NAME); - const String& sencloser=vencloser->as_string(); - if(sencloser.length()!=1) - throw Exception("parser.runtime", - &sencloser, - "encloser must be one character long"); - column_encloser=sencloser.first_char(); - } + separators.load(*options); } // loading text @@ -255,60 +322,104 @@ static void _load(Request& r, MethodPara } else { columns=Table::columns_type(new ArrayString); - while(char *row_chars=getrow(&data)) { - // remove empty&comment lines - if(!*row_chars || *row_chars == '#') - continue; - do { - char *column_chars=lsplit(&row_chars, column_separator); - if(column_encloser) - column_chars=remove_encloser(column_chars, column_encloser); - *columns+=new String(column_chars, 0, true); - } while(row_chars); - - break; + skip_empty_and_comment_lines(&data); + while( lsplit_result sr=lsplit(&data, separators.column, '\n', separators.encloser) ) { + *columns+=new String(sr.piece, 0, true); + if(sr.delim=='\n') + break; } } + + Table& table=*new Table(columns); // parse cells - Table& table=*new Table(columns);//что-то очень плохое с realloc'ом: 1000 сильно помогает - char *row_chars; - int cells=0; - while(row_chars=getrow(&data)) { - // remove empty&comment lines - if(!*row_chars || *row_chars == '#') - continue; - Table::element_type row(new ArrayString); - while(char *cell_chars=lsplit(&row_chars, '\t')) { - if(column_encloser) - cell_chars=remove_encloser(cell_chars, column_encloser); - *row+=new String(cell_chars, 0, true); - cells++; + Table::element_type row(new ArrayString); + skip_empty_and_comment_lines(&data); + while( lsplit_result sr=lsplit(&data, separators.column, '\n', separators.encloser) ) { + if(!*sr.piece && !sr.delim && !row->count()) // append last empty column [if without \n] + break; + *row+=new String(sr.piece, 0, true); + if(sr.delim=='\n') { + table+=row; + row=new ArrayString; + skip_empty_and_comment_lines(&data); } + } + // last line [if without \n] + if(row->count()) table+=row; - }; - + // replace any previous table value GET_SELF(r, VTable).set_table(table); } -/// @todo "x\nx" "xxx""xx" +static void maybe_enclose( String& to, const String& from, char encloser, const String* sencloser ) { + if(encloser) { + to<<*sencloser; + // while we have 'encloser'... + size_t pos_after=0; + for( size_t pos_before; (pos_before=from.pos( encloser, pos_after ))!=STRING_NOT_FOUND; pos_after=pos_before+1) { + to<count()) + throw Exception("parser.runtime", + 0, + "invalid option passed"); + } else + throw Exception("parser.runtime", + 0, + "additional params must be hash (did you spell mode parameter correctly?)"); + + } + if(param_index i(*table.columns()); i.has_next(); ) { - sdata.append(*i.next(), String::L_TABLE); + maybe_enclose( sdata, *i.next(), separators.encloser, separators.sencloser ); if(i.has_next()) sdata.append_know_length("\t", 1, String::L_CLEAN); } - } else { // nameless table + } else { // nameless table [we were asked to output column names] if(int lsize=table.count()?table[0]->count():0) for(int column=0; column i(table); while(i.has_next()) { for(Array_iterator c(*i.next()); c.has_next(); ) { - sdata.append(*c.next(), String::L_TABLE); + maybe_enclose( sdata, *c.next(), separators.encloser, separators.sencloser ); if(c.has_next()) sdata.append_know_length("\t", 1, String::L_CLEAN); } @@ -345,7 +446,7 @@ static void _save(Request& r, MethodPara } // write - file_write(r.absolute(vfile_name.as_string()), + file_write(r.absolute(file_name), sdata.cstr(), sdata.length(), true, do_append); } @@ -529,7 +630,7 @@ static void _hash(Request& r, MethodPara } else throw Exception("parser.runtime", 0, - "value field(s) must be string or self_table" + "value field(s) must be string or table" ); } else { // by all columns, including key if(!(distinct!=D_ILLEGAL && distinct!=D_FIRST)) @@ -539,17 +640,19 @@ static void _hash(Request& r, MethodPara { - Row_info info={0}; - info.r=&r; - info.table=&self_table; Value* key_param=¶ms[0]; - info.key_code=key_param->get_junction()?key_param:0; - info.key_field=info.key_code?-1 - :self_table.column_name2index(key_param->as_string(), true); - info.value_fields=&value_fields; - info.hash=&result.hash(); - info.distinct=distinct; - info.row=0; + Row_info info={ + &r, + &self_table, + /*key_code=*/key_param->get_junction()?key_param:0, + /*key_field=*/0/*filled below*/, + &value_fields, + &result.hash(), + distinct, + /*row=*/0 + }; + info.key_field=(info.key_code?-1 + :self_table.column_name2index(key_param->as_string(), true)); int saved_current=self_table.current(); self_table.for_each(table_row_to_hash, &info); @@ -633,9 +736,8 @@ struct Expression_is_true_info { Value* expression_code; }; #endif -static bool expression_is_true(Table& self, void* ainfo) { - Expression_is_true_info& info=*static_cast(ainfo); - return info.r->process_to_value(*info.expression_code).as_bool(); +static bool expression_is_true(Table&, Expression_is_true_info* info) { + return info->r->process_to_value(*info->expression_code).as_bool(); } static bool _locate_expression(Table& table, Table::Action_options o, Request& r, MethodParams& params) { @@ -647,7 +749,7 @@ static bool _locate_expression(Table& ta return table.table_first_that(expression_is_true, &info, o); } static bool _locate_name_value(Table& table, Table::Action_options o, - Request& r, MethodParams& params) { + Request&, MethodParams& params) { check_option_param(o.defined, params, 2, "locate by locate by name has parameters: name, value and, maybe, options"); const String& name=params.as_string(0, "column name must be string"); @@ -666,7 +768,7 @@ static void _locate(Request& r, MethodPa } -static void _flip(Request& r, MethodParams& params) { +static void _flip(Request& r, MethodParams&) { Table& old_table=GET_SELF(r, VTable).table(); Table& new_table=*new Table(0); if(size_t old_count=old_table.count()) @@ -727,7 +829,7 @@ static void _join(Request& r, MethodPara 0, "source and destination are same table"); - if(Table::columns_type dest_columns=dest.columns()) // dest is named + if(dest.columns()) // dest is named src.table_for_each(join_named_row, &dest, o); else // dest is nameless src.table_for_each(join_nameless_row, &dest, o); @@ -877,7 +979,6 @@ static void _select(Request& r, MethodPa source_table.set_current(row); bool condition=r.process_to_value(vcondition, - /*0/*no name* /,*/ false/*don't intercept string*/).as_bool(); if(condition) // ...condition is true= @@ -904,7 +1005,7 @@ MTable::MTable(): Methoded("table") { // ^table.save[file] // ^table.save[nameless;file] - add_native_method("save", Method::CT_DYNAMIC, _save, 1, 2); + add_native_method("save", Method::CT_DYNAMIC, _save, 1, 3); // ^table.count[] add_native_method("count", Method::CT_DYNAMIC, _count, 0, 0);