--- parser3/src/classes/table.C 2016/09/06 22:19:47 1.319 +++ parser3/src/classes/table.C 2016/09/21 15:14:39 1.331 @@ -22,7 +22,7 @@ #define USE_STRINGSTREAM #endif -volatile const char * IDENT_TABLE_C="$Id: table.C,v 1.319 2016/09/06 22:19:47 moko Exp $"; +volatile const char * IDENT_TABLE_C="$Id: table.C,v 1.331 2016/09/21 15:14:39 moko Exp $"; // class @@ -97,17 +97,23 @@ struct TableControlChars { char separator; const String* sseparator; char encloser; const String* sencloser; + char separators[3]; + TableControlChars(): separator('\t'), sseparator(new String("\t")), encloser(0), sencloser(0) - {} + { + strcpy(separators,"\t\n"); + } + int load( HashStringValue& options ) { int result=0; if(Value* vseparator=options.get(PA_COLUMN_SEPARATOR_NAME)) { sseparator=&vseparator->as_string(); if(sseparator->length()!=1) - throw Exception(PARSER_RUNTIME, sseparator, "separator must be one character long"); + throw Exception(PARSER_RUNTIME, sseparator, "separator must be one byte character"); separator=sseparator->first_char(); + separators[0]=separator; result++; } if(Value* vencloser=options.get(PA_COLUMN_ENCLOSER_NAME)) { @@ -115,9 +121,9 @@ struct TableControlChars { if(sencloser->is_empty()){ encloser=0; } else { - if(sencloser->length()!=1) - throw Exception(PARSER_RUNTIME, sencloser, "encloser must be one character long"); - encloser=sencloser->first_char(); + if(sencloser->length()!=1) + throw Exception(PARSER_RUNTIME, sencloser, "encloser must be empty or one byte character"); + encloser=sencloser->first_char(); } result++; } @@ -126,6 +132,118 @@ struct TableControlChars { }; +struct lsplit_sresult { + String* piece; + char delim; + + lsplit_sresult() : piece(0), delim(0){} + + operator bool() { return piece!=0; } + + void append(String *str){ + if(piece) + *piece << *str; + else + piece = str; + } +}; + +class StringSplitHelper : public String { +public: + char* base; + + StringSplitHelper(String astring) : String(astring), base(cstrm()) {} + + bool check_lang(const char *pos){ + return langs.check_lang(L_AS_IS, pos-base, 1); + } + + String *extract(char *pos){ + String *result=new String; + if(size_t len=strlen(pos)){ + // first: their langs + result->langs.append(result->body, langs, pos-base, len); + // next: letters themselves + result->body=Body(pos); + } + return result; + } +}; + +inline lsplit_sresult lsplit(char* *string_ref, const char* delims, StringSplitHelper& helper) { + lsplit_sresult result; + if(char *pos=*string_ref) { + while(pos=strpbrk(pos, delims)) { + if(helper.check_lang(pos)){ + result.delim=*pos; + *pos=0; + result.piece=helper.extract(*string_ref); + *string_ref=pos+1; + return result; + } + pos++; + } + result.piece=helper.extract(*string_ref); + *string_ref=0; + } + return result; +} + +static lsplit_sresult lsplit(char** string_ref, const char* delims, char encloser, StringSplitHelper& helper) { + lsplit_sresult result; + + if(char *pos=*string_ref) { + if(encloser && *pos==encloser && helper.check_lang(pos)) { + *string_ref=++pos; + + // we are enclosed, searching for second encloser + while(1) { + if(pos=strchr(pos, encloser)){ + if(helper.check_lang(pos)){ + *(pos++)=0; + result.append(helper.extract(*string_ref)); + if(*pos==encloser && helper.check_lang(pos)){ // double-encloser stands for encloser + *string_ref=pos++; + } else { + *string_ref=pos; + break; + } + } + } else { + result.append(helper.extract(*string_ref)); + *string_ref=0; + return result; + } + } + + // we are no longer enclosed, searching for delimiter + while(pos=strpbrk(pos, delims)) { + if(helper.check_lang(pos)){ + result.delim=*pos; + if(pos>*string_ref){ + *pos=0; + result.append(helper.extract(*string_ref)); + } + *string_ref=pos+1; + return result; + } + pos++; + } + result.append(helper.extract(*string_ref)); + *string_ref=0; + } else + return lsplit(string_ref, delims, helper); + } + return result; +} + +static void skip_clean_empty_lines(char** data_ref, StringSplitHelper& helper) { + if(*data_ref) { + while(**data_ref == '\n' && helper.check_lang(*data_ref)) + (*data_ref)++; + } +} + static void _create(Request& r, MethodParams& params) { // clone/copy part? if(Table *source=params[0].get_table()) { @@ -153,61 +271,49 @@ static void _create(Request& r, MethodPa TableControlChars control_chars; size_t options_param_index=data_param_index+1; - if( - options_param_indexcount()) + throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); } // data Temp_lang temp_lang(r, String::L_PASS_APPENDED); - const String& data= - r.process_to_string(params.as_junction(data_param_index, "body must be table or code")); + StringSplitHelper sdata(r.process_to_string(params.as_junction(data_param_index, "body must be table or code"))); + char *data=sdata.base; // parse columns - size_t raw_pos_after=0; Table::columns_type columns; - - if(nameless){ - columns=Table::columns_type(0); // nameless + if(nameless) { + columns=0; // nameless } else { - columns=Table::columns_type(new ArrayString); - - ArrayString head; - data.split(head, raw_pos_after, "\n", String::L_AS_IS, 1); - if(head.count()) { - size_t col_pos_after=0; - if(head[0]->is_empty()) - *columns += new String(); - else - head[0]->split(*columns, col_pos_after, *control_chars.sseparator, String::L_AS_IS); + columns=new ArrayString; + while( lsplit_sresult sr=lsplit(&data, control_chars.separators, control_chars.encloser, sdata) ) { + *columns+=sr.piece; + if(sr.delim=='\n') + break; } } - + Table& table=*new Table(columns); - // parse cells + int columns_count=columns ? columns->count(): 0; - ArrayString rows; - data.split(rows, raw_pos_after, "\n", String::L_AS_IS); - Array_iterator i(rows); - while(i.has_next()) { - Table::element_type row(new ArrayString); - const String& string=*i.next(); - // remove comment lines - if(string.is_empty()) - continue; - - size_t col_pos_after=0; - string.split(*row, col_pos_after, *control_chars.sseparator, String::L_AS_IS); - table+=row; + // parse cells + Table::element_type row(new ArrayString(columns_count)); + skip_clean_empty_lines(&data, sdata); + while( lsplit_sresult sr=lsplit(&data, control_chars.separators, control_chars.encloser, sdata) ) { + if(sr.piece->is_empty() && !sr.delim && !row->count()) // append last empty column [if without \n] + break; + *row+=sr.piece; + if(sr.delim=='\n') { + table+=row; + row=new ArrayString(columns_count); + skip_clean_empty_lines(&data, sdata); + } } - + // last line [if without \n] + if(row->count()) + table+=row; + // replace any previous table value GET_SELF(r, VTable).set_table(table); } @@ -216,45 +322,36 @@ struct lsplit_result { char* piece; char delim; + lsplit_result(char *apiece=0) : piece(apiece), delim(0){} operator bool() { return piece!=0; } }; -inline lsplit_result lsplit(char* string, char delim1, char delim2) { - lsplit_result result; - if(string) { - char delims[]={delim1, delim2, 0}; - if(char* v=strpbrk(string, delims)) { +inline lsplit_result lsplit(char* *string_ref, const char* delims) { + lsplit_result result(*string_ref); + if(result.piece) { + if(char* v=strpbrk(result.piece, delims)) { result.delim=*v; *v=0; - result.piece=v+1; + *string_ref=v+1; return result; } + *string_ref=0; } - result.piece=0; - result.delim=0; - return result; -} - -inline lsplit_result lsplit(char* *string_ref, char delim1, char delim2) { - lsplit_result result; - result.piece=*string_ref; - lsplit_result next=lsplit(*string_ref, delim1, delim2); - result.delim=next.delim; - *string_ref=next.piece; return result; } -static lsplit_result lsplit(char** string_ref, char delim1, char delim2, char encloser) { - lsplit_result result; +static lsplit_result lsplit(char** string_ref, const char* delims, char encloser) { + lsplit_result result(*string_ref); - if(char* string=*string_ref) { - if(encloser && *string==encloser) { - string++; + if(result.piece) { + if(encloser && *result.piece==encloser) { + result.piece++; + char c; char *read; char *write; - write=read=string; - char c; + write=read=result.piece; + // we are enclosed, searching for second encloser while(c=*read++) { if(c==encloser) { @@ -265,50 +362,42 @@ static lsplit_result lsplit(char** strin } *write++=c; } - // we are no longer enclosed, searching for delimiter, skipping extra enclosers + + // we are no longer enclosed, searching for delimiter while(c=*read++) { - if(c==delim1 || c==delim2) { + if(c==delims[0] || c==delims[1]) { result.delim=c; break; - } else if(c!=encloser) + } else *write++=c; } + *write=0; // terminate - *string_ref=c? read: 0; - result.piece=string; + *string_ref=c ? read : 0; return result; } else - return lsplit(string_ref, delim1, delim2); + return lsplit(string_ref, delims); } - result.piece=0; return result; } static void skip_empty_and_comment_lines( char** data_ref ) { - if(char *data=*data_ref) { - while( char c=*data ) { - if( c== '\n' || c == '#' ) { - /*nowhere=*/getrow(&data); // remove empty&comment lines - if(!(*data_ref=data)) - break; - continue; - } - break; + while(*data_ref) { + if(**data_ref == '\n'){ + (*data_ref)++; + } else { + if(**data_ref == '#' ) + /*nowhere=*/getrow(data_ref); + else + break; } } } static void skip_empty_lines( char** data_ref ) { - if(char *data=*data_ref) { - while( char c=*data ) { - if( c== '\n' ) { - /*nowhere=*/getrow(&data); // remove empty lines - if(!(*data_ref=data)) - break; - continue; - } - break; - } + if(*data_ref) { + while(**data_ref == '\n') + (*data_ref)++; } } @@ -332,23 +421,19 @@ static void _load(Request& r, MethodPara } // loading text - char *data=file_load_text(r, - r.absolute(params.as_string(filename_param_index, FILE_NAME_MUST_BE_STRING)), - true, - options - ); + char *data=file_load_text(r, r.absolute(params.as_string(filename_param_index, FILE_NAME_MUST_BE_STRING)), true, options); Skip_lines_action skip_lines_action = (control_chars.separator=='#' || control_chars.encloser=='#') ? skip_empty_lines : skip_empty_and_comment_lines; // parse columns Table::columns_type columns; if(nameless) { - columns=Table::columns_type(0); // nameless + columns=0; // nameless } else { - columns=Table::columns_type(new ArrayString); + columns=new ArrayString; skip_lines_action(&data); - while( lsplit_result sr=lsplit(&data, control_chars.separator, '\n', control_chars.encloser) ) { + while( lsplit_result sr=lsplit(&data, control_chars.separators, control_chars.encloser) ) { *columns+=new String(sr.piece, String::L_TAINTED); if(sr.delim=='\n') break; @@ -356,12 +441,12 @@ static void _load(Request& r, MethodPara } Table& table=*new Table(columns); - int columns_count=columns? columns->count(): 0; + int columns_count=columns ? columns->count(): 0; // parse cells Table::element_type row(new ArrayString(columns_count)); skip_lines_action(&data); - while( lsplit_result sr=lsplit(&data, control_chars.separator, '\n', control_chars.encloser) ) { + while( lsplit_result sr=lsplit(&data, control_chars.separators, control_chars.encloser) ) { if(!*sr.piece && !sr.delim && !row->count()) // append last empty column [if without \n] break; *row+=new String(sr.piece, String::L_TAINTED); @@ -1063,8 +1148,8 @@ static void _flip(Request& r, MethodPara static void _foreach(Request& r, MethodParams& params) { InCycle temp(r); - const String& rownum_name=params.as_string(0, "rownum-var name must be string"); - const String& value_name=params.as_string(1, "value-var name must be string"); + const String* rownum_var_name=¶ms.as_string(0, "rownum-var name must be string"); + const String* value_var_name=¶ms.as_string(1, "value-var name must be string"); Value& body_code=params.as_junction(2, "body must be code"); @@ -1073,8 +1158,8 @@ static void _foreach(Request& r, MethodP Table& table=GET_SELF(r, VTable).table(); size_t saved_current=table.current(); - const String* rownum_var_name=rownum_name.is_empty()? 0 : &rownum_name; - const String* value_var_name=value_name.is_empty()? 0 : &value_name; + rownum_var_name=rownum_var_name->is_empty()? 0 : rownum_var_name; + value_var_name=value_var_name->is_empty()? 0 : value_var_name; Value* var_context=r.get_method_frame()->caller(); @@ -1136,8 +1221,7 @@ inline Table::element_type row_from_stri // parse cells Table::element_type row=new ArrayString; - size_t pos_after=0; - string.split(*row, pos_after, "\t", String::L_AS_IS); + string.split(*row, 0, "\t", String::L_AS_IS); return row; }