--- parser3/src/classes/table.C 2016/09/07 11:56:20 1.321 +++ parser3/src/classes/table.C 2017/05/17 14:22:11 1.343 @@ -1,7 +1,7 @@ /** @file Parser: @b table parser class. - Copyright (c) 2001-2015 Art. Lebedev Studio (http://www.artlebedev.com) + Copyright (c) 2001-2017 Art. Lebedev Studio (http://www.artlebedev.com) Author: Alexandr Petrosian (http://paf.design.ru) */ @@ -22,7 +22,7 @@ #define USE_STRINGSTREAM #endif -volatile const char * IDENT_TABLE_C="$Id: table.C,v 1.321 2016/09/07 11:56:20 moko Exp $"; +volatile const char * IDENT_TABLE_C="$Id: table.C,v 1.343 2017/05/17 14:22:11 moko Exp $"; // class @@ -74,15 +74,15 @@ static Table::Action_options get_action_ else throw Exception(PARSER_RUNTIME, &soffset, "must be 'cur' string or expression"); } else - result.offset=r.process_to_value(*voffset).as_int(); + result.offset=r.process(*voffset).as_int(); } if(Value* vlimit=options->get(sql_limit_name)) { valid_options++; - result.limit=r.process_to_value(*vlimit).as_int(); + result.limit=r.process(*vlimit).as_int(); } if(Value *vreverse=(Value *)options->get(table_reverse_name)) { valid_options++; - result.reverse=r.process_to_value(*vreverse).as_bool(); + result.reverse=r.process(*vreverse).as_bool(); if(result.reverse && !defined_offset) result.offset=source.count()-1; } @@ -111,7 +111,7 @@ struct TableControlChars { if(Value* vseparator=options.get(PA_COLUMN_SEPARATOR_NAME)) { sseparator=&vseparator->as_string(); if(sseparator->length()!=1) - throw Exception(PARSER_RUNTIME, sseparator, "separator must be one character long"); + throw Exception(PARSER_RUNTIME, sseparator, "separator must be one byte character"); separator=sseparator->first_char(); separators[0]=separator; result++; @@ -121,9 +121,9 @@ struct TableControlChars { if(sencloser->is_empty()){ encloser=0; } else { - if(sencloser->length()!=1) - throw Exception(PARSER_RUNTIME, sencloser, "encloser must be one character long"); - encloser=sencloser->first_char(); + if(sencloser->length()!=1) + throw Exception(PARSER_RUNTIME, sencloser, "encloser must be empty or one byte character"); + encloser=sencloser->first_char(); } result++; } @@ -132,6 +132,119 @@ struct TableControlChars { }; +struct lsplit_sresult { + String* piece; + char delim; + + lsplit_sresult() : piece(0), delim(0){} + + operator bool() { return piece!=0; } + + void append(String *str){ + if(piece) + *piece << *str; + else + piece = str; + } +}; + +class StringSplitHelper : public String { +public: + char* base; + + StringSplitHelper(String astring) : String(astring), base(cstrm()) {} + + bool check_lang(const char *pos){ + return langs.check_lang(L_AS_IS, pos-base, 1); + } + + String *extract(char *pos){ + String *result=new String; + if(size_t len=strlen(pos)){ + // first: their langs + result->langs.append(result->body, langs, pos-base, len); + // next: letters themselves + result->body=Body(pos); + } + return result; + } +}; + +inline lsplit_sresult lsplit(char* *string_ref, const char* delims, StringSplitHelper& helper) { + lsplit_sresult result; + if(char *pos=*string_ref) { + while(pos=strpbrk(pos, delims)) { + if(helper.check_lang(pos)){ + result.delim=*pos; + *pos=0; + result.piece=helper.extract(*string_ref); + *string_ref=pos+1; + return result; + } + pos++; + } + result.piece=helper.extract(*string_ref); + *string_ref=0; + } + return result; +} + +static lsplit_sresult lsplit(char** string_ref, const char* delims, char encloser, StringSplitHelper& helper) { + lsplit_sresult result; + + if(char *pos=*string_ref) { + if(encloser && *pos==encloser && helper.check_lang(pos)) { + *string_ref=++pos; + + // we are enclosed, searching for second encloser + while(1) { + if(pos=strchr(pos, encloser)){ + if(helper.check_lang(pos)){ + *(pos++)=0; + result.append(helper.extract(*string_ref)); + if(*pos==encloser && helper.check_lang(pos)){ // double-encloser stands for encloser + *string_ref=pos; + } else { + *string_ref=pos; + break; + } + } + pos++; + } else { + result.append(helper.extract(*string_ref)); + *string_ref=0; + return result; + } + } + + // we are no longer enclosed, searching for delimiter + while(pos=strpbrk(pos, delims)) { + if(helper.check_lang(pos)){ + result.delim=*pos; + if(pos>*string_ref){ + *pos=0; + result.append(helper.extract(*string_ref)); + } + *string_ref=pos+1; + return result; + } + pos++; + } + result.append(helper.extract(*string_ref)); + *string_ref=0; + } else + return lsplit(string_ref, delims, helper); + } + return result; +} + +static void skip_clean_empty_lines(char** data_ref, StringSplitHelper& helper) { + if(*data_ref) { + while(**data_ref == '\n' && helper.check_lang(*data_ref)) + (*data_ref)++; + } +} + static void _create(Request& r, MethodParams& params) { // clone/copy part? if(Table *source=params[0].get_table()) { @@ -159,61 +272,48 @@ static void _create(Request& r, MethodPa TableControlChars control_chars; size_t options_param_index=data_param_index+1; - if( - options_param_indexcount()) + throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); } // data - Temp_lang temp_lang(r, String::L_PASS_APPENDED); - const String& data= - r.process_to_string(params.as_junction(data_param_index, "body must be table or code")); + StringSplitHelper sdata(r.process_to_string(params.as_junction(data_param_index, "body must be table or code"))); + char *data=sdata.base; // parse columns - size_t raw_pos_after=0; Table::columns_type columns; - - if(nameless){ - columns=Table::columns_type(0); // nameless + if(nameless) { + columns=0; // nameless } else { - columns=Table::columns_type(new ArrayString); - - ArrayString head; - data.split(head, raw_pos_after, "\n", String::L_AS_IS, 1); - if(head.count()) { - size_t col_pos_after=0; - if(head[0]->is_empty()) - *columns += new String(); - else - head[0]->split(*columns, col_pos_after, *control_chars.sseparator, String::L_AS_IS); + columns=new ArrayString; + while( lsplit_sresult sr=lsplit(&data, control_chars.separators, control_chars.encloser, sdata) ) { + *columns+=sr.piece; + if(sr.delim=='\n') + break; } } - + Table& table=*new Table(columns); - // parse cells - - ArrayString rows; - data.split(rows, raw_pos_after, "\n", String::L_AS_IS); - Array_iterator i(rows); - while(i.has_next()) { - Table::element_type row(new ArrayString); - const String& string=*i.next(); - // remove comment lines - if(string.is_empty()) - continue; + int columns_count=columns ? columns->count(): 0; - size_t col_pos_after=0; - string.split(*row, col_pos_after, *control_chars.sseparator, String::L_AS_IS); - table+=row; + // parse cells + Table::element_type row(new ArrayString(columns_count)); + skip_clean_empty_lines(&data, sdata); + while( lsplit_sresult sr=lsplit(&data, control_chars.separators, control_chars.encloser, sdata) ) { + if(sr.piece->is_empty() && !sr.delim && !row->count()) // append last empty column [if without \n] + break; + *row+=sr.piece; + if(sr.delim=='\n') { + table+=row; + row=new ArrayString(columns_count); + skip_clean_empty_lines(&data, sdata); + } } - + // last line [if without \n] + if(row->count()) + table+=row; + // replace any previous table value GET_SELF(r, VTable).set_table(table); } @@ -234,9 +334,8 @@ inline lsplit_result lsplit(char* *strin *v=0; *string_ref=v+1; return result; - } else { - *string_ref=0; } + *string_ref=0; } return result; } @@ -264,12 +363,12 @@ static lsplit_result lsplit(char** strin *write++=c; } - // we are no longer enclosed, searching for delimiter, skipping extra enclosers + // we are no longer enclosed, searching for delimiter while(c=*read++) { if(c==delims[0] || c==delims[1]) { result.delim=c; break; - } else if(c!=encloser) + } else *write++=c; } @@ -283,30 +382,22 @@ static lsplit_result lsplit(char** strin } static void skip_empty_and_comment_lines( char** data_ref ) { - if(char *data=*data_ref) { - while( char c=*data ) { - if( c== '\n' || c == '#' ) { - /*nowhere=*/getrow(&data); // remove empty&comment lines - if(!(*data_ref=data)) - break; - continue; - } - break; + while(*data_ref) { + if(**data_ref == '\n'){ + (*data_ref)++; + } else { + if(**data_ref == '#' ) + /*nowhere=*/getrow(data_ref); + else + break; } } } static void skip_empty_lines( char** data_ref ) { - if(char *data=*data_ref) { - while( char c=*data ) { - if( c== '\n' ) { - /*nowhere=*/getrow(&data); // remove empty lines - if(!(*data_ref=data)) - break; - continue; - } - break; - } + if(*data_ref) { + while(**data_ref == '\n') + (*data_ref)++; } } @@ -337,9 +428,9 @@ static void _load(Request& r, MethodPara // parse columns Table::columns_type columns; if(nameless) { - columns=Table::columns_type(0); // nameless + columns=0; // nameless } else { - columns=Table::columns_type(new ArrayString); + columns=new ArrayString; skip_lines_action(&data); while( lsplit_result sr=lsplit(&data, control_chars.separators, control_chars.encloser) ) { @@ -350,7 +441,7 @@ static void _load(Request& r, MethodPara } Table& table=*new Table(columns); - int columns_count=columns? columns->count(): 0; + int columns_count=columns ? columns->count(): 0; // parse cells Table::element_type row(new ArrayString(columns_count)); @@ -620,13 +711,13 @@ static void _csv_string(Request& r, Meth table_to_csv(ost, table, control_chars, output_column_names); - r.write_no_lang(*new VString(*new String(pa_strdup(ost.str().c_str()), String::L_CLEAN))); + r.write(*new VString(*new String(pa_strdup(ost.str().c_str()), String::L_CLEAN))); #else String sdata; table_to_csv(sdata, table, control_chars, output_column_names); - r.write_no_lang(*new VString(*new String(sdata.cstr(), String::L_CLEAN))); + r.write(*new VString(*new String(sdata.cstr(), String::L_CLEAN))); #endif } @@ -646,12 +737,12 @@ static void _count(Request& r, MethodPar } else result = table.count(); - r.write_no_lang(*new VInt(result)); + r.write(*new VInt(result)); } static void _line(Request& r, MethodParams&) { int result=1+GET_SELF(r, VTable).table().current(); - r.write_no_lang(*new VInt(result)); + r.write(*new VInt(result)); } static void _offset(Request& r, MethodParams& params) { @@ -671,7 +762,7 @@ static void _offset(Request& r, MethodPa int offset=params.as_int(params.count()-1, "offset must be expression", r); table.offset(absolute, offset); } else - r.write_no_lang(*new VInt(table.current())); + r.write(*new VInt(table.current())); } static void _menu(Request& r, MethodParams& params) { @@ -689,20 +780,20 @@ static void _menu(Request& r, MethodPara for(size_t row=0; rowis_empty()) { // we have body if(need_delim) // need delim & iteration produced string? - r.write_pass_lang(r.process(*delim_maybe_code)); + r.write(r.process(*delim_maybe_code)); else need_delim=true; } - r.write_pass_lang(sv_processed); + r.write(sv_processed); - if(lskip==Request::SKIP_BREAK) + if(skip.check_break()) break; } } else { @@ -710,9 +801,8 @@ static void _menu(Request& r, MethodPara table.set_current(row); r.process_write(body_code); - Request::Skip lskip=r.get_skip(); r.set_skip(Request::SKIP_NOTHING); - if(lskip==Request::SKIP_BREAK) + if(r.check_skip_break()) break; } } @@ -737,7 +827,7 @@ static void table_row_to_hash(Table::ele const String* key; if(info->key_code) { info->table->set_current(info->row++); // change context row - StringOrValue sv_processed=info->r->process(*info->key_code); + Value& sv_processed=info->r->process(*info->key_code); key=&sv_processed.as_string(); } else { key=info->key_fieldcount()?row->get(info->key_field):0; @@ -792,7 +882,7 @@ static void table_row_to_hash(Table::ele case C_CODE: { if(!info->key_code) info->table->set_current(info->row++); // change context row - exist=info->hash->put_dont_replace(*key, &info->r->process(*info->value_code).as_value()); + exist=info->hash->put_dont_replace(*key, &info->r->process(*info->value_code)); break; } } @@ -846,7 +936,7 @@ static void _hash(Request& r, MethodPara int valid_options=0; if(Value* vdistinct_code=options->get(sql_distinct_name)) { // $.distinct ? valid_options++; - distinct=get_distinct(r.process_to_value(*vdistinct_code), value_type); + distinct=get_distinct(r.process(*vdistinct_code), value_type); } if(Value* vvalue_type_code=options->get(sql_value_type_name)) { // $.type ? if(value_type==C_TABLE) // $.distinct[tables] already was specified @@ -854,7 +944,7 @@ static void _hash(Request& r, MethodPara if(value_type==C_CODE) throw Exception(PARSER_RUNTIME, 0, "you can't specify $.type[] if value is code"); valid_options++; - value_type=get_value_type(r.process_to_value(*vvalue_type_code)); + value_type=get_value_type(r.process(*vvalue_type_code)); } if(valid_options!=options->count()) @@ -895,8 +985,8 @@ static void _hash(Request& r, MethodPara throw Exception(PARSER_RUNTIME, 0, "value field(s) must be string or table or code"); } - if(value_type==C_STRING && value_fields.count()!=1) - throw Exception(PARSER_RUNTIME, 0, "you can specify only one value field with option $.type[string]"); + if(value_type==C_STRING && value_fields.count()>1) + throw Exception(PARSER_RUNTIME, 0, "you can't specify more then one value field with option $.type[string]"); { Value* key_param=¶ms[0]; @@ -922,7 +1012,7 @@ static void _hash(Request& r, MethodPara } } } - r.write_no_lang(result); + r.write(result); } #ifndef DOXYGEN @@ -953,9 +1043,7 @@ static int sort_cmp_double(const void *a static void _sort(Request& r, MethodParams& params) { Value& key_maker=params.as_junction(0, "key-maker must be code"); - bool reverse=params.count()>1/*..[desc|asc|]*/? - reverse=params.as_no_junction(1, "order must not be code").as_string()=="desc": - false; // default=asc + bool reverse=params.count()>1 /*..[desc|asc|]*/ && params.as_no_junction(1, "order must not be code").as_string()=="desc"; // default=asc Table& old_table=GET_SELF(r, VTable).table(); Table& new_table=*new Table(old_table.columns()); @@ -970,7 +1058,7 @@ static void _sort(Request& r, MethodPara old_table.set_current(i); // calculate key value seq[i].row=old_table[i]; - Value& value=r.process_to_value(key_maker); + Value& value=r.process(key_maker); if(i==0) // determining key values type by first one key_values_are_strings=value.is_string(); @@ -984,7 +1072,7 @@ static void _sort(Request& r, MethodPara if(r.charsets.source().NAME()=="KOI8-R" && key_values_are_strings) { for(i=0; ir->process_to_value(*info->expression_code).as_bool(); + return info->r->process(*info->expression_code).as_bool(); } static bool _locate_expression(Table& table, Request& r, MethodParams& params) { @@ -1033,7 +1121,7 @@ static void _locate(Request& r, MethodPa bool result=params[0].get_junction() || (params.count() == 1) ? _locate_expression(table, r, params) : _locate_name_value(table, r, params); - r.write_no_lang(VBool::get(result)); + r.write(VBool::get(result)); } @@ -1051,14 +1139,14 @@ static void _flip(Request& r, MethodPara new_table+=new_row; } - r.write_no_lang(*new VTable(&new_table)); + r.write(*new VTable(&new_table)); } static void _foreach(Request& r, MethodParams& params) { InCycle temp(r); - const String& rownum_name=params.as_string(0, "rownum-var name must be string"); - const String& value_name=params.as_string(1, "value-var name must be string"); + const String* rownum_var_name=¶ms.as_string(0, "rownum-var name must be string"); + const String* value_var_name=¶ms.as_string(1, "value-var name must be string"); Value& body_code=params.as_junction(2, "body must be code"); @@ -1067,8 +1155,8 @@ static void _foreach(Request& r, MethodP Table& table=GET_SELF(r, VTable).table(); size_t saved_current=table.current(); - const String* rownum_var_name=rownum_name.is_empty()? 0 : &rownum_name; - const String* value_var_name=value_name.is_empty()? 0 : &value_name; + rownum_var_name=rownum_var_name->is_empty()? 0 : rownum_var_name; + value_var_name=value_var_name->is_empty()? 0 : value_var_name; Value* var_context=r.get_method_frame()->caller(); @@ -1082,20 +1170,20 @@ static void _foreach(Request& r, MethodP if(value_var_name) r.put_element(*var_context, *value_var_name, new VTable(&table)); - StringOrValue sv_processed=r.process(body_code); - Request::Skip lskip=r.get_skip(); r.set_skip(Request::SKIP_NOTHING); + Value& sv_processed=r.process(body_code); + TempSkip4Delimiter skip(r); const String* s_processed=sv_processed.get_string(); if(s_processed && !s_processed->is_empty()) { // we have body if(need_delim) // need delim & iteration produced string? - r.write_pass_lang(r.process(*delim_maybe_code)); + r.write(r.process(*delim_maybe_code)); else need_delim=true; } - r.write_pass_lang(sv_processed); + r.write(sv_processed); - if(lskip==Request::SKIP_BREAK) + if(skip.check_break()) break; } } else { @@ -1108,9 +1196,8 @@ static void _foreach(Request& r, MethodP r.put_element(*var_context, *value_var_name, new VTable(&table)); r.process_write(body_code); - Request::Skip lskip=r.get_skip(); r.set_skip(Request::SKIP_NOTHING); - if(lskip==Request::SKIP_BREAK) + if(r.check_skip_break()) break; } } @@ -1125,13 +1212,11 @@ inline Table::element_type row_from_stri if(!param.is_string() && !param.get_junction()) throw Exception(PARSER_RUNTIME, 0, "row must be string, code or hash"); - Temp_lang temp_lang(r, String::L_PASS_APPENDED); const String& string=r.process_to_string(param); // parse cells Table::element_type row=new ArrayString; - size_t pos_after=0; - string.split(*row, pos_after, "\t", String::L_AS_IS); + string.split(*row, 0, "\t", String::L_AS_IS); return row; } @@ -1214,7 +1299,7 @@ public: columns+=new String(str, String::L_TAINTED /* no length as 0x00 can be inside */); return false; } catch(...) { - error=SQL_Error("exception occured in Table_sql_event_handlers::add_column"); + error=SQL_Error("exception occurred in Table_sql_event_handlers::add_column"); return true; } } @@ -1224,7 +1309,7 @@ public: columns_count=columns.count(); return false; } catch(...) { - error=SQL_Error("exception occured in Table_sql_event_handlers::before_rows"); + error=SQL_Error("exception occurred in Table_sql_event_handlers::before_rows"); return true; } } @@ -1233,7 +1318,7 @@ public: *table+=row=new ArrayString(columns_count); return false; } catch(...) { - error=SQL_Error("exception occured in Table_sql_event_handlers::add_row"); + error=SQL_Error("exception occurred in Table_sql_event_handlers::add_row"); return true; } } @@ -1242,7 +1327,7 @@ public: *row+=str?new String(str, String::L_TAINTED /* no length as 0x00 can be inside */):&String::Empty; return false; } catch(...) { - error=SQL_Error("exception occured in Table_sql_event_handlers::add_row_cell"); + error=SQL_Error("exception occurred in Table_sql_event_handlers::add_row_cell"); return true; } } @@ -1301,11 +1386,11 @@ static void _sql(Request& r, MethodParam } if(Value* vlimit=options->get(sql_limit_name)) { valid_options++; - limit=(ulong)r.process_to_value(*vlimit).as_double(); + limit=(ulong)r.process(*vlimit).as_double(); } if(Value* voffset=options->get(sql_offset_name)) { valid_options++; - offset=(ulong)r.process_to_value(*voffset).as_double(); + offset=(ulong)r.process(*voffset).as_double(); } if(valid_options!=options->count()) throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); @@ -1316,9 +1401,8 @@ static void _sql(Request& r, MethodParam if(bind) placeholders_count=marshal_binds(*bind, placeholders); - Temp_lang temp_lang(r, String::L_SQL); const String& statement_string=r.process_to_string(statement); - const char* statement_cstr=statement_string.untaint_cstr(r.flang, r.connection()); + const char* statement_cstr=statement_string.untaint_cstr(String::L_SQL, r.connection()); Table_sql_event_handlers handlers; @@ -1360,7 +1444,7 @@ static void _columns(Request& r, MethodP } } - r.write_no_lang(*new VTable(&result_table)); + r.write(*new VTable(&result_table)); } static void _select(Request& r, MethodParams& params) { @@ -1377,15 +1461,15 @@ static void _select(Request& r, MethodPa int valid_options=0; if(Value* vlimit=options->get(sql_limit_name)) { valid_options++; - limit=r.process_to_value(*vlimit).as_int(); + limit=r.process(*vlimit).as_int(); } if(Value* voffset=options->get(sql_offset_name)) { valid_options++; - offset=r.process_to_value(*voffset).as_int(); + offset=r.process(*voffset).as_int(); } if(Value* vreverse=options->get(table_reverse_name)) { valid_options++; - reverse=r.process_to_value(*vreverse).as_bool(); + reverse=r.process(*vreverse).as_bool(); } if(valid_options!=options->count()) throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); @@ -1404,7 +1488,7 @@ static void _select(Request& r, MethodPa for(size_t row=size-1; result_table.count() < (size_t)limit; row--) { source_table.set_current(row); - bool condition=r.process_to_value(vcondition, false/*don't intercept string*/).as_bool(); + bool condition=r.process(vcondition).as_bool(); if(condition && ++appended > (size_t)offset) // ...condition is true, adding to the result result_table+=source_table[row]; @@ -1414,7 +1498,7 @@ static void _select(Request& r, MethodPa for(size_t row=0; row < size && result_table.count() < (size_t)limit; row++) { source_table.set_current(row); - bool condition=r.process_to_value(vcondition, false/*don't intercept string*/).as_bool(); + bool condition=r.process(vcondition).as_bool(); if(condition && ++appended > (size_t)offset) // ...condition is true, adding to the result result_table+=source_table[row]; @@ -1423,7 +1507,7 @@ static void _select(Request& r, MethodPa source_table.set_current(saved_current); } - r.write_no_lang(*new VTable(&result_table)); + r.write(*new VTable(&result_table)); } // constructor