--- parser3/src/classes/table.C 2001/03/12 20:36:52 1.6 +++ parser3/src/classes/table.C 2016/09/21 15:14:39 1.331 @@ -1,131 +1,1602 @@ -/* - Parser - Copyright (c) 2001 ArtLebedev Group (http://www.artlebedev.com) - Author: Alexander Petrosyan (http://design.ru/paf) +/** @file + Parser: @b table parser class. - $Id: table.C,v 1.6 2001/03/12 20:36:52 paf Exp $ + Copyright (c) 2001-2015 Art. Lebedev Studio (http://www.artlebedev.com) + Author: Alexandr Petrosian (http://paf.design.ru) */ +#include "classes.h" +#include "pa_vmethod_frame.h" + +#include "pa_common.h" #include "pa_request.h" -#include "_table.h" +#include "pa_charsets.h" #include "pa_vtable.h" -#include "pa_common.h" #include "pa_vint.h" +#include "pa_sql_connection.h" +#include "pa_vbool.h" +#include "pa_array.h" + +#if (!defined(NO_STRINGSTREAM) && !defined(FREEBSD4)) +#include +#define USE_STRINGSTREAM +#endif + +volatile const char * IDENT_TABLE_C="$Id: table.C,v 1.331 2016/09/21 15:14:39 moko Exp $"; + +// class + +class MTable: public Methoded { +public: // VStateless_class + Value* create_new_value(Pool&) { return new VTable(); } +public: + MTable(); +}; + +// global variable + +DECLARE_CLASS_VAR(table, new MTable); + +#define TABLE_REVERSE_NAME "reverse" -// global var +// globals -VClass *table_class; +String sql_bind_name(SQL_BIND_NAME); +String sql_limit_name(PA_SQL_LIMIT_NAME); +String sql_offset_name(PA_SQL_OFFSET_NAME); +String sql_default_name(SQL_DEFAULT_NAME); +String sql_distinct_name(SQL_DISTINCT_NAME); +String sql_value_type_name(SQL_VALUE_TYPE_NAME); +String table_reverse_name(TABLE_REVERSE_NAME); // methods +static Table::Action_options get_action_options(Request& r, MethodParams& params, size_t options_index, const Table& source) { + Table::Action_options result; + if(params.count() <= options_index) + return result; + + HashStringValue* options=params.as_hash(options_index); + if(!options) + return result; + + result.defined=true; + bool defined_offset=false; + + int valid_options=0; + if(Value* voffset=options->get(sql_offset_name)) { + valid_options++; + defined_offset=true; + if(voffset->is_string()) { + const String& soffset=*voffset->get_string(); + if(soffset == "cur") + result.offset=source.current(); + else + throw Exception(PARSER_RUNTIME, &soffset, "must be 'cur' string or expression"); + } else + result.offset=r.process_to_value(*voffset).as_int(); + } + if(Value* vlimit=options->get(sql_limit_name)) { + valid_options++; + result.limit=r.process_to_value(*vlimit).as_int(); + } + if(Value *vreverse=(Value *)options->get(table_reverse_name)) { + valid_options++; + result.reverse=r.process_to_value(*vreverse).as_bool(); + if(result.reverse && !defined_offset) + result.offset=source.count()-1; + } + + if(valid_options!=options->count()) + throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); + + return result; +} + +struct TableControlChars { + char separator; const String* sseparator; + char encloser; const String* sencloser; + + char separators[3]; + + TableControlChars(): + separator('\t'), sseparator(new String("\t")), + encloser(0), sencloser(0) + { + strcpy(separators,"\t\n"); + } + + int load( HashStringValue& options ) { + int result=0; + if(Value* vseparator=options.get(PA_COLUMN_SEPARATOR_NAME)) { + sseparator=&vseparator->as_string(); + if(sseparator->length()!=1) + throw Exception(PARSER_RUNTIME, sseparator, "separator must be one byte character"); + separator=sseparator->first_char(); + separators[0]=separator; + result++; + } + if(Value* vencloser=options.get(PA_COLUMN_ENCLOSER_NAME)) { + sencloser=&vencloser->as_string(); + if(sencloser->is_empty()){ + encloser=0; + } else { + if(sencloser->length()!=1) + throw Exception(PARSER_RUNTIME, sencloser, "encloser must be empty or one byte character"); + encloser=sencloser->first_char(); + } + result++; + } + return result; + } +}; + + +struct lsplit_sresult { + String* piece; + char delim; + + lsplit_sresult() : piece(0), delim(0){} + + operator bool() { return piece!=0; } + + void append(String *str){ + if(piece) + *piece << *str; + else + piece = str; + } +}; + +class StringSplitHelper : public String { +public: + char* base; + + StringSplitHelper(String astring) : String(astring), base(cstrm()) {} + + bool check_lang(const char *pos){ + return langs.check_lang(L_AS_IS, pos-base, 1); + } + + String *extract(char *pos){ + String *result=new String; + if(size_t len=strlen(pos)){ + // first: their langs + result->langs.append(result->body, langs, pos-base, len); + // next: letters themselves + result->body=Body(pos); + } + return result; + } +}; + +inline lsplit_sresult lsplit(char* *string_ref, const char* delims, StringSplitHelper& helper) { + lsplit_sresult result; + if(char *pos=*string_ref) { + while(pos=strpbrk(pos, delims)) { + if(helper.check_lang(pos)){ + result.delim=*pos; + *pos=0; + result.piece=helper.extract(*string_ref); + *string_ref=pos+1; + return result; + } + pos++; + } + result.piece=helper.extract(*string_ref); + *string_ref=0; + } + return result; +} + +static lsplit_sresult lsplit(char** string_ref, const char* delims, char encloser, StringSplitHelper& helper) { + lsplit_sresult result; + + if(char *pos=*string_ref) { + if(encloser && *pos==encloser && helper.check_lang(pos)) { + *string_ref=++pos; + + // we are enclosed, searching for second encloser + while(1) { + if(pos=strchr(pos, encloser)){ + if(helper.check_lang(pos)){ + *(pos++)=0; + result.append(helper.extract(*string_ref)); + if(*pos==encloser && helper.check_lang(pos)){ // double-encloser stands for encloser + *string_ref=pos++; + } else { + *string_ref=pos; + break; + } + } + } else { + result.append(helper.extract(*string_ref)); + *string_ref=0; + return result; + } + } + + // we are no longer enclosed, searching for delimiter + while(pos=strpbrk(pos, delims)) { + if(helper.check_lang(pos)){ + result.delim=*pos; + if(pos>*string_ref){ + *pos=0; + result.append(helper.extract(*string_ref)); + } + *string_ref=pos+1; + return result; + } + pos++; + } + result.append(helper.extract(*string_ref)); + *string_ref=0; + } else + return lsplit(string_ref, delims, helper); + } + return result; +} + +static void skip_clean_empty_lines(char** data_ref, StringSplitHelper& helper) { + if(*data_ref) { + while(**data_ref == '\n' && helper.check_lang(*data_ref)) + (*data_ref)++; + } +} -// TODO: проверить ^set в ^menu & co +static void _create(Request& r, MethodParams& params) { + // clone/copy part? + if(Table *source=params[0].get_table()) { + Table::Action_options o=get_action_options(r, params, 1, *source); + if(params.count()>2) + throw Exception(PARSER_RUNTIME, 0, "too many parameters"); + GET_SELF(r, VTable).set_table(*new Table(*source, o)); + return; + } -static void set_or_load( - Request& r, - const String& method_name, Array *params, - bool is_load) { - Pool& pool=r.pool(); - // data is last parameter - Value *vdata=static_cast(params->get(params->size()-1)); - // forcing - // [this param type] - // [this param type] - r.fail_if_junction_(true, *vdata, - method_name, "body must not be a junction"); + size_t data_param_index=0; + bool nameless=false; + + if(params.count()>1) { + if(params[0].is_string()){ // can be nameless only + const String& snameless=params.as_string(0, "called with more then 1 param, first param may be only string 'nameless' or junction"); + if(snameless!="nameless") + throw Exception(PARSER_RUNTIME, &snameless, "table::create called with more then 1 param, first param may be only 'nameless'"); + nameless=true; + data_param_index++; + } + } + + HashStringValue *options=0; + TableControlChars control_chars; + + size_t options_param_index=data_param_index+1; + if( options_param_indexcount()) + throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); + } - // data or file_name - char *data_or_filename=vdata->as_string().cstr(); // data - char *data=is_load?file_read(pool, r.absolute(data_or_filename)):data_or_filename; + Temp_lang temp_lang(r, String::L_PASS_APPENDED); + StringSplitHelper sdata(r.process_to_string(params.as_junction(data_param_index, "body must be table or code"))); + char *data=sdata.base; + + // parse columns + Table::columns_type columns; + if(nameless) { + columns=0; // nameless + } else { + columns=new ArrayString; + while( lsplit_sresult sr=lsplit(&data, control_chars.separators, control_chars.encloser, sdata) ) { + *columns+=sr.piece; + if(sr.delim=='\n') + break; + } + } + + Table& table=*new Table(columns); + int columns_count=columns ? columns->count(): 0; + + // parse cells + Table::element_type row(new ArrayString(columns_count)); + skip_clean_empty_lines(&data, sdata); + while( lsplit_sresult sr=lsplit(&data, control_chars.separators, control_chars.encloser, sdata) ) { + if(sr.piece->is_empty() && !sr.delim && !row->count()) // append last empty column [if without \n] + break; + *row+=sr.piece; + if(sr.delim=='\n') { + table+=row; + row=new ArrayString(columns_count); + skip_clean_empty_lines(&data, sdata); + } + } + // last line [if without \n] + if(row->count()) + table+=row; + + // replace any previous table value + GET_SELF(r, VTable).set_table(table); +} + +struct lsplit_result { + char* piece; + char delim; + + lsplit_result(char *apiece=0) : piece(apiece), delim(0){} + operator bool() { return piece!=0; } +}; + +inline lsplit_result lsplit(char* *string_ref, const char* delims) { + lsplit_result result(*string_ref); + if(result.piece) { + if(char* v=strpbrk(result.piece, delims)) { + result.delim=*v; + *v=0; + *string_ref=v+1; + return result; + } + *string_ref=0; + } + return result; +} + +static lsplit_result lsplit(char** string_ref, const char* delims, char encloser) { + lsplit_result result(*string_ref); + + if(result.piece) { + if(encloser && *result.piece==encloser) { + result.piece++; + + char c; + char *read; + char *write; + write=read=result.piece; + + // we are enclosed, searching for second encloser + while(c=*read++) { + if(c==encloser) { + if(*read==encloser) // double-encloser stands for encloser + read++; + else + break; // note: skipping encloser + } + *write++=c; + } + + // we are no longer enclosed, searching for delimiter + while(c=*read++) { + if(c==delims[0] || c==delims[1]) { + result.delim=c; + break; + } else + *write++=c; + } + + *write=0; // terminate + *string_ref=c ? read : 0; + return result; + } else + return lsplit(string_ref, delims); + } + return result; +} + +static void skip_empty_and_comment_lines( char** data_ref ) { + while(*data_ref) { + if(**data_ref == '\n'){ + (*data_ref)++; + } else { + if(**data_ref == '#' ) + /*nowhere=*/getrow(data_ref); + else + break; + } + } +} + +static void skip_empty_lines( char** data_ref ) { + if(*data_ref) { + while(**data_ref == '\n') + (*data_ref)++; + } +} + +typedef void (*Skip_lines_action)(char** data_ref); + +static void _load(Request& r, MethodParams& params) { + const String& first_param=params.as_string(0, FILE_NAME_MUST_BE_STRING); + int filename_param_index=0; + bool nameless=first_param=="nameless"; + if(nameless) + filename_param_index++; + size_t options_param_index=filename_param_index+1; + + HashStringValue *options=0; + TableControlChars control_chars; + if(options_param_indexsize()==2) { - columns=0; - } else { - columns=new(pool) Array(pool); - - if(char *row_chars=getrow(&data)) - do { - String *name=new(pool) String(pool); - name->APPEND(lsplit(&row_chars, '\t'), 0, file, line++); - *columns+=name; - } while(row_chars); + Table::columns_type columns; + if(nameless) { + columns=0; // nameless + } else { + columns=new ArrayString; + + skip_lines_action(&data); + while( lsplit_result sr=lsplit(&data, control_chars.separators, control_chars.encloser) ) { + *columns+=new String(sr.piece, String::L_TAINTED); + if(sr.delim=='\n') + break; + } } + + Table& table=*new Table(columns); + int columns_count=columns ? columns->count(): 0; // parse cells - Table& table=*new(pool) Table(pool, method_name, columns); - char *row_chars; - while(row_chars=getrow(&data)) { - Array *row=new(pool) Array(pool); - while(char *cell_chars=lsplit(&row_chars, '\t')) { - String *cell=new(pool) String(pool); - cell->APPEND(cell_chars, 0, file, line); - *row+=cell; + Table::element_type row(new ArrayString(columns_count)); + skip_lines_action(&data); + while( lsplit_result sr=lsplit(&data, control_chars.separators, control_chars.encloser) ) { + if(!*sr.piece && !sr.delim && !row->count()) // append last empty column [if without \n] + break; + *row+=new String(sr.piece, String::L_TAINTED); + if(sr.delim=='\n') { + table+=row; + row=new ArrayString(columns_count); + skip_lines_action(&data); } - line++; + } + // last line [if without \n] + if(row->count()) table+=row; - }; + + // replace any previous table value + GET_SELF(r, VTable).set_table(table); +} + +#ifdef USE_STRINGSTREAM +#include "gc_allocator.h" + +typedef std::basic_stringstream, gc_allocator > pa_stringstream; +typedef std::basic_string, gc_allocator > pa_string; + +static void enclose( pa_stringstream& to, const String* from, char encloser ) { + if(from){ + to<pos( encloser, pos_after ))!=STRING_NOT_FOUND; pos_after=pos_before) { + pos_before++; // including first encloser (and skipping it for next pos) + to<mid(pos_after, pos_before).cstr(); + to<length(); + if(pos_aftermid(pos_after, from_length).cstr(); + to< i(*table.columns()); i.has_next(); ) { + enclose( result, i.next(), control_chars.encloser ); + if(i.has_next()) + result< i(*table.columns()); i.has_next(); ) { + result<cstr(); + if(i.has_next()) + result<count():0) + for(int column=0; column i(table); + if(control_chars.encloser){ + while(i.has_next()) { + for(Array_iterator c(*i.next()); c.has_next(); ) { + enclose( result, c.next(), control_chars.encloser ); + if(c.has_next()) + result< c(*i.next()); c.has_next(); ) { + result<cstr(); + if(c.has_next()) + result<pos( encloser, pos_after ))!=STRING_NOT_FOUND; pos_after=pos_before) { + pos_before++; // including first encloser (and skipping it for next pos) + to<mid(pos_after, pos_before); + to<<*sencloser; // doubling encloser + } + // last piece + size_t from_length=from->length(); + if(pos_aftermid(pos_after, from_length); + to<<*sencloser; + } else { + to<<*sencloser<<*sencloser; + } +} + +static void table_to_csv(String& result, Table& table, TableControlChars& control_chars, bool output_column_names) { + if(output_column_names) { + if(table.columns()) { // named table + if(control_chars.encloser) { + for(Array_iterator i(*table.columns()); i.has_next(); ) { + enclose( result, i.next(), control_chars.encloser, control_chars.sencloser ); + if(i.has_next()) + result<<*control_chars.sseparator; + } + } else { + for(Array_iterator i(*table.columns()); i.has_next(); ) { + result<<*i.next(); + if(i.has_next()) + result<<*control_chars.sseparator; + } + } + } else { // nameless table [we were asked to output column names] + if(int lsize=table.count()?table[0]->count():0) + for(int column=0; column i(table); + if(control_chars.encloser){ + while(i.has_next()) { + for(Array_iterator c(*i.next()); c.has_next(); ) { + enclose( result, c.next(), control_chars.encloser, control_chars.sencloser ); + if(c.has_next()) + result<<*control_chars.sseparator; + } + result.append_know_length("\n", 1, String::L_CLEAN); + } + } else { + while(i.has_next()) { + for(Array_iterator c(*i.next()); c.has_next(); ) { + result<<*c.next(); + if(c.has_next()) + result<<*control_chars.sseparator; + } + result.append_know_length("\n", 1, String::L_CLEAN); + } + } +} +#endif // don't use stringstream + + +static void _save(Request& r, MethodParams& params) { + const String& first_arg=params.as_string(0, FIRST_ARG_MUST_NOT_BE_CODE); + size_t param_index=1; + + bool do_append=false; + bool output_column_names=true; + + // mode? + if(first_arg=="append") + do_append=true; + else if(first_arg=="nameless") + output_column_names=false; + else + --param_index; + + const String& file_name=params.as_string(param_index++, FILE_NAME_MUST_NOT_BE_CODE); + String file_spec=r.absolute(file_name); + + if(do_append && file_exist(file_spec)) + output_column_names=false; + + TableControlChars control_chars; + if(param_indexcount()) + throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); + } + + if(param_index0 && params[0].is_string()) { + if(params.as_string(0, FIRST_ARG_MUST_NOT_BE_CODE)=="nameless") { + output_column_names=false; + param_index++; + } else { + throw Exception(PARSER_RUNTIME, 0, "bad mode (must be nameless)"); + } + } + + TableControlChars control_chars; + if(param_indexcount()) + throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); + } + + Table& table=GET_SELF(r, VTable).table(); + +#ifdef USE_STRINGSTREAM + pa_stringstream ost(std::stringstream::out); + + table_to_csv(ost, table, control_chars, output_column_names); + + r.write_no_lang(*new VString(*new String(pa_strdup(ost.str().c_str()), String::L_CLEAN))); +#else + String sdata; + + table_to_csv(sdata, table, control_chars, output_column_names); + + r.write_no_lang(*new VString(*new String(sdata.cstr(), String::L_CLEAN))); +#endif +} + +static void _count(Request& r, MethodParams& params) { + Table& table=GET_SELF(r, VTable).table(); + size_t result=0; + if(params.count()) { + const String& param=params.as_string(0, PARAMETER_MUST_BE_STRING); + if(param == "columns") + result = table.columns() ? table.columns()->count() : table.max_cells(); + else if(param == "cells") + result = table.count() ? table[table.current()]->count() : 0; + else if(param == "rows") // synonim for ^table.count[] + result = table.count(); + else + throw Exception(PARSER_RUNTIME, ¶m, "parameter must be 'columns', 'cells' and 'rows' only"); + } else + result = table.count(); + + r.write_no_lang(*new VInt(result)); +} + +static void _line(Request& r, MethodParams&) { + int result=1+GET_SELF(r, VTable).table().current(); + r.write_no_lang(*new VInt(result)); +} + +static void _offset(Request& r, MethodParams& params) { + Table& table=GET_SELF(r, VTable).table(); + if(params.count()) { + bool absolute=false; + if(params.count()>1) { + const String& whence=params.as_string(0, "whence must be string"); + if(whence=="cur") + absolute=false; + else if(whence=="set") + absolute=true; + else + throw Exception(PARSER_RUNTIME, &whence, "is invalid whence, valid are 'cur' or 'set'"); + } + + int offset=params.as_int(params.count()-1, "offset must be expression", r); + table.offset(absolute, offset); + } else + r.write_no_lang(*new VInt(table.current())); +} + +static void _menu(Request& r, MethodParams& params) { + InCycle temp(r); + + Value& body_code=params.as_junction(0, "body must be code"); + + Value* delim_maybe_code=params.count()>1?¶ms[1]:0; + + Table& table=GET_SELF(r, VTable).table(); + size_t saved_current=table.current(); + + if(delim_maybe_code) { // delimiter set + bool need_delim=false; + for(size_t row=0; rowis_empty()) { // we have body + if(need_delim) // need delim & iteration produced string? + r.write_pass_lang(r.process(*delim_maybe_code)); + else + need_delim=true; + } + + r.write_pass_lang(sv_processed); + + if(lskip==Request::SKIP_BREAK) + break; + } + } else { + for(size_t row=0; row* value_fields; + Value* value_code; + HashStringValue* hash; + Table2hash_distint distinct; + size_t row; + Table2hash_value_type value_type; +}; +#endif +static void table_row_to_hash(Table::element_type row, Row_info *info) { + const String* key; + if(info->key_code) { + info->table->set_current(info->row++); // change context row + StringOrValue sv_processed=info->r->process(*info->key_code); + key=&sv_processed.as_string(); + } else { + key=info->key_fieldcount()?row->get(info->key_field):0; + } + + if(!key) + return; // ignore rows without key [too-short-record_array if-indexed] + + bool exist=false; + switch(info->value_type) { + case C_STRING: { + if(info->value_fields->count()){ + size_t index=info->value_fields->get(0); + exist=info->hash->put_dont_replace(*key, (index < row->count()) ? new VString(*row->get(index)) : VString::empty()); + } else { + exist=info->hash->put_dont_replace(*key, VString::empty()); + } + break; + } + case C_HASH: { + VHash* vhash=new VHash; + HashStringValue& hash=vhash->hash(); + for(Array_iterator i(*info->value_fields); i.has_next(); ) { + size_t value_field=i.next(); + if(value_fieldcount()) + hash.put(*info->table->columns()->get(value_field), new VString(*row->get(value_field))); + } + + exist=info->hash->put_dont_replace(*key, vhash); + break; + } + case C_TABLE: { + VTable* vtable=(VTable*)info->hash->get(*key); // table exist? + Table* table; + if(vtable) { + if(info->distinct==D_ILLEGAL) { + exist=true; + break; + } + table=vtable->get_table(); + } else { + // no? creating table of same structure as source + Table::Action_options table_options(0, 0); + table=new Table(*info->table, table_options/*no rows, just structure*/); + info->hash->put(*key, new VTable(table)); + } + Table::element_type row_copy(new ArrayString(row->count())); + row_copy->append(*row); + *table+=row_copy; + break; + } + case C_CODE: { + if(!info->key_code) + info->table->set_current(info->row++); // change context row + exist=info->hash->put_dont_replace(*key, &info->r->process(*info->value_code).as_value()); + break; + } + } + if(exist && info->distinct==D_ILLEGAL) + throw Exception(PARSER_RUNTIME, key, "duplicate key"); +} + +Table2hash_value_type get_value_type(Value& vvalue_type){ + if(vvalue_type.is_string()) { + const String& svalue_type=*vvalue_type.get_string(); + if(svalue_type == "table"){ + return C_TABLE; + } else if (svalue_type == "string") { + return C_STRING; + } else if (svalue_type == "hash") { + return C_HASH; + } else { + throw Exception(PARSER_RUNTIME, &svalue_type, "must be 'hash', 'table' or 'string'"); + } + } else { + throw Exception(PARSER_RUNTIME, 0, "'type' must be string"); + } +} + +static Table2hash_distint get_distinct(Value& vdistinct, Table2hash_value_type& value_type){ + if(vdistinct.is_string()) { + const String& sdistinct=*vdistinct.get_string(); + if(sdistinct!="tables") + throw Exception(PARSER_RUNTIME, &sdistinct, "must be 'tables' or true/false"); + value_type=C_TABLE; + return D_FIRST; + } + return vdistinct.as_bool() ? D_FIRST : D_ILLEGAL; +} + +static void _hash(Request& r, MethodParams& params) { + Table& self_table=GET_SELF(r, VTable).table(); + VHash& result=*new VHash; + if(Table::columns_type columns=self_table.columns()){ + if(columns->count()>0) { + Table2hash_distint distinct=D_ILLEGAL; + Table2hash_value_type value_type=C_HASH; + int param_index=params.count()-1; + if(param_index>0) { + + if(params[1].get_junction()) + value_type=C_CODE; + + if(HashStringValue* options=params[param_index].get_hash()){ // can't use .as_hash because the 2nd param could be table so .as_hash throws an error + --param_index; + int valid_options=0; + if(Value* vdistinct_code=options->get(sql_distinct_name)) { // $.distinct ? + valid_options++; + distinct=get_distinct(r.process_to_value(*vdistinct_code), value_type); + } + if(Value* vvalue_type_code=options->get(sql_value_type_name)) { // $.type ? + if(value_type==C_TABLE) // $.distinct[tables] already was specified + throw Exception(PARSER_RUNTIME, 0, "you can't specify $.distinct[tables] and $.type[] together"); + if(value_type==C_CODE) + throw Exception(PARSER_RUNTIME, 0, "you can't specify $.type[] if value is code"); + valid_options++; + value_type=get_value_type(r.process_to_value(*vvalue_type_code)); + } + + if(valid_options!=options->count()) + throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); + } + } + + if(param_index==2) // options were specified but not as hash + throw Exception(PARSER_RUNTIME, 0, "options must be hash"); + + Array value_fields; + Value* value_code=0; + + if(param_index==0){ // list of columns wasn't specified + if(value_type==C_STRING) // $.type[string] + throw Exception(PARSER_RUNTIME, 0, "you must specify one value field with option $.type[string]"); + + for(size_t i=0; icount(); i++) // by all columns, including key + value_fields+=i; + + } else { // list of columns or code was specified + if(value_type==C_TABLE) + throw Exception(PARSER_RUNTIME, 0, "you can't specify value field(s) with option $.distinct[tables] or $.type[tables]"); + + Value& value_fields_param=params[1]; + if(value_fields_param.get_junction()){ // code specified + value_code=&value_fields_param; + } else if(value_fields_param.is_string()) { // one column as string was specified + const String &field_name=*value_fields_param.get_string(); + if(!field_name.is_empty()) + value_fields+=self_table.column_name2index(field_name, true); + } else if(Table* value_fields_table=value_fields_param.get_table()) { // list of columns were specified in table + for(Array_iterator i(*value_fields_table); i.has_next(); ) { + const String& value_field_name =*i.next()->get(0); + value_fields +=self_table.column_name2index(value_field_name, true); + } + } else + throw Exception(PARSER_RUNTIME, 0, "value field(s) must be string or table or code"); + } + + if(value_type==C_STRING && value_fields.count()!=1) + throw Exception(PARSER_RUNTIME, 0, "you can specify only one value field with option $.type[string]"); + + { + Value* key_param=¶ms[0]; + Row_info info={ + &r, + &self_table, + /*key_code=*/key_param->get_junction() ? key_param : 0, + /*key_field=*/0/*filled below*/, + &value_fields, + value_code, + &result.hash(), + distinct, + /*row=*/0, + value_type + }; + info.key_field=(info.key_code ? -1 : self_table.column_name2index(key_param->as_string(), true)); + + int saved_current=self_table.current(); + self_table.for_each(table_row_to_hash, &info); + self_table.set_current(saved_current); + + result.extract_default(); + } + } + } + r.write_no_lang(result); +} + +#ifndef DOXYGEN +struct Table_seq_item { + ArrayString* row; + union { + const char *c_str; + double d; + } value; +}; +#endif +static int sort_cmp_string(const void *a, const void *b) { + return strcmp( + static_cast(a)->value.c_str, + static_cast(b)->value.c_str + ); +} +static int sort_cmp_double(const void *a, const void *b) { + double va=static_cast(a)->value.d; + double vb=static_cast(b)->value.d; + if(vavb) + return +1; + else + return 0; +} +static void _sort(Request& r, MethodParams& params) { + Value& key_maker=params.as_junction(0, "key-maker must be code"); + + bool reverse=params.count()>1/*..[desc|asc|]*/? + reverse=params.as_no_junction(1, "order must not be code").as_string()=="desc": + false; // default=asc + + Table& old_table=GET_SELF(r, VTable).table(); + Table& new_table=*new Table(old_table.columns()); + + Table_seq_item* seq=new(PointerFreeGC) Table_seq_item[old_table.count()]; + int i; + + // calculate key values + bool key_values_are_strings=true; + int old_count=old_table.count(); + for(i=0; ias_vtable().set_table(table); + GET_SELF(r, VTable).set_table(new_table); } +#ifndef DOXYGEN +struct Expression_is_true_info { + Request* r; + Value* expression_code; +}; +#endif -static void _set(Request& r, const String& method_name, Array *params) { - set_or_load(r, method_name, params, false); +static bool expression_is_true(Table&, Expression_is_true_info* info) { + return info->r->process_to_value(*info->expression_code).as_bool(); } -static void _load(Request& r, const String& method_name, Array *params) { - set_or_load(r, method_name, params, true); +static bool _locate_expression(Table& table, Request& r, MethodParams& params) { + Value& expression_code=params.as_junction(0, "must be expression"); + Table::Action_options o=get_action_options(r, params, 1, table); + if(params.count()>2) + throw Exception(PARSER_RUNTIME, 0, "locate by expression only has parameters: expression and, maybe, options"); + Expression_is_true_info info={&r, &expression_code}; + return table.table_first_that(expression_is_true, &info, o); } -static void _count(Request& r, const String&, Array *) { - Pool& pool=r.pool(); - Value& value=*new(pool) VInt(pool, r.self->as_vtable().table().size()); - r.wcontext->write(value, String::Untaint_lang::NO /*always object, not string*/); +static bool _locate_name_value(Table& table, Request& r, MethodParams& params) { + const String& name=params.as_string(0, "column name must be string"); + const String& value=params.as_string(1, VALUE_MUST_BE_STRING); + Table::Action_options o=get_action_options(r, params, 2, table); + return table.locate(name, value, o); } -static void _line(Request& r, const String&, Array *) { - Pool& pool=r.pool(); - Value& value=*new(pool) VInt(pool, 1+r.self->as_vtable().table().get_current()); - r.wcontext->write(value, String::Untaint_lang::NO /*always object, not string*/); +static void _locate(Request& r, MethodParams& params) { + Table& table=GET_SELF(r, VTable).table(); + + bool result=params[0].get_junction() || (params.count() == 1) ? + _locate_expression(table, r, params) : + _locate_name_value(table, r, params); + r.write_no_lang(VBool::get(result)); } -static void _offset(Request& r, const String&, Array *params) { - Pool& pool=r.pool(); - Table& table=r.self->as_vtable().table(); - if(params->size()) { - if(int size=table.size()) { - int offset=static_cast( - r.process(*static_cast(params->get(0))).get_double()); - table.set_current((table.get_current()+offset+size)%size); + +static void _flip(Request& r, MethodParams&) { + Table& old_table=GET_SELF(r, VTable).table(); + Table& new_table=*new Table(0); + if(size_t old_count=old_table.count()) + if(size_t old_cols=old_table.columns()?old_table.columns()->count():old_table.max_cells()) + for(size_t column=0; columncount()?old_row->get(column):new String; + } + new_table+=new_row; + } + + r.write_no_lang(*new VTable(&new_table)); +} + +static void _foreach(Request& r, MethodParams& params) { + InCycle temp(r); + + const String* rownum_var_name=¶ms.as_string(0, "rownum-var name must be string"); + const String* value_var_name=¶ms.as_string(1, "value-var name must be string"); + + Value& body_code=params.as_junction(2, "body must be code"); + + Value* delim_maybe_code=params.count()>3?¶ms[3]:0; + + Table& table=GET_SELF(r, VTable).table(); + size_t saved_current=table.current(); + + rownum_var_name=rownum_var_name->is_empty()? 0 : rownum_var_name; + value_var_name=value_var_name->is_empty()? 0 : value_var_name; + + Value* var_context=r.get_method_frame()->caller(); + + if(delim_maybe_code) { // delimiter set + bool need_delim=false; + for(size_t row=0; rowis_empty()) { // we have body + if(need_delim) // need delim & iteration produced string? + r.write_pass_lang(r.process(*delim_maybe_code)); + else + need_delim=true; + } + + r.write_pass_lang(sv_processed); + + if(lskip==Request::SKIP_BREAK) + break; } } else { - Value& value=*new(pool) VInt(pool, table.get_current()); - r.wcontext->write(value, String::Untaint_lang::NO /*always object, not string*/); + for(size_t row=0; rowput_element(String(aname, String::L_CLEAN), avalue); // new not required +} + +inline Table::element_type row_from_string(Request& r, Value ¶m){ + if(!param.is_string() && !param.get_junction()) + throw Exception(PARSER_RUNTIME, 0, "row must be string, code or hash"); + + Temp_lang temp_lang(r, String::L_PASS_APPENDED); + const String& string=r.process_to_string(param); + + // parse cells + Table::element_type row=new ArrayString; + string.split(*row, 0, "\t", String::L_AS_IS); + + return row; +} + +static void _append(Request& r, MethodParams& params) { + VTable vtable=GET_SELF(r, VTable); + Table& table=vtable.table(); + + HashStringValue* hash=params[0].get_hash(); + if(hash){ + table+=new ArrayString(); + size_t saved_current=table.current(); + table.set_current(table.count()-1); + hash->for_each(update_cell, &vtable); + table.set_current(saved_current); + } else { + table+=row_from_string(r, params[0]); + } +} + +static void _insert(Request& r, MethodParams& params) { + VTable vtable=GET_SELF(r, VTable); + Table& table=vtable.table(); + HashStringValue* hash=params[0].get_hash(); + if(hash){ + table.insert(table.current(), new ArrayString()); + hash->for_each(update_cell, &vtable); + } else { + table.insert(table.current(), row_from_string(r, params[0])); + } +} + +static void _delete(Request& r, MethodParams&) { + Table& table=GET_SELF(r, VTable).table(); + table.remove_current(); +} + +static void join_named_row(Table& src, Table* dest) { + Table::columns_type dest_columns=dest->columns(); + size_t dest_columns_count=dest_columns->count(); + Table::element_type dest_row(new ArrayString(dest_columns_count)); + for(size_t dest_column=0; dest_columnget(dest_column)); + *dest_row+=src_item?src_item:new String; + } + *dest+=dest_row; +} +static void join_nameless_row(Table& src, Table* dest) { + *dest+=src[src.current()]; +} +static void _join(Request& r, MethodParams& params) { + Table& src=*params.as_table(0, "source"); + + Table::Action_options o=get_action_options(r, params, 1, src); + + Table& dest=GET_SELF(r, VTable).table(); + if(&src == &dest) + throw Exception(PARSER_RUNTIME, 0, "source and destination are same table"); + + if(dest.columns()) // dest is named + src.table_for_each(join_named_row, &dest, o); + else // dest is nameless + src.table_for_each(join_nameless_row, &dest, o); +} + +#ifndef DOXYGEN +class Table_sql_event_handlers: public SQL_Driver_query_event_handlers { + ArrayString& columns; + int columns_count; + ArrayString* row; +public: + Table* table; +public: + Table_sql_event_handlers() : + columns(*new ArrayString), row(0), table(0) { + } + + bool add_column(SQL_Error& error, const char *str, size_t ) { + try { + columns+=new String(str, String::L_TAINTED /* no length as 0x00 can be inside */); + return false; + } catch(...) { + error=SQL_Error("exception occured in Table_sql_event_handlers::add_column"); + return true; + } + } + bool before_rows(SQL_Error& error) { + try { + table=new Table(&columns); + columns_count=columns.count(); + return false; + } catch(...) { + error=SQL_Error("exception occured in Table_sql_event_handlers::before_rows"); + return true; + } + } + bool add_row(SQL_Error& error) { + try { + *table+=row=new ArrayString(columns_count); + return false; + } catch(...) { + error=SQL_Error("exception occured in Table_sql_event_handlers::add_row"); + return true; + } + } + bool add_row_cell(SQL_Error& error, const char* str, size_t ) { + try { + *row+=str?new String(str, String::L_TAINTED /* no length as 0x00 can be inside */):&String::Empty; + return false; + } catch(...) { + error=SQL_Error("exception occured in Table_sql_event_handlers::add_row_cell"); + return true; + } + } +}; +#endif + +static void marshal_bind( + HashStringValue::key_type aname, + HashStringValue::value_type avalue, + SQL_Driver::Placeholder** pptr) +{ + SQL_Driver::Placeholder& ph=**pptr; + ph.name=aname.cstr(); + ph.value=avalue->as_string().untaint_cstr(String::L_AS_IS); + ph.is_null=avalue->get_class()==void_class; + ph.were_updated=false; + + (*pptr)++; +} + +// not static, used elsewhere +int marshal_binds(HashStringValue& hash, SQL_Driver::Placeholder*& placeholders) { + int hash_count=hash.count(); + placeholders=new SQL_Driver::Placeholder[hash_count]; + SQL_Driver::Placeholder* ptr=placeholders; + hash.for_each(marshal_bind, &ptr); + return hash_count; +} + +// not static, used elsewhere +void unmarshal_bind_updates(HashStringValue& hash, int placeholder_count, SQL_Driver::Placeholder* placeholders) { + SQL_Driver::Placeholder* ph=placeholders; + for(int i=0; iwere_updated) { + Value* value; + if(ph->is_null) + value=VVoid::get(); + else + value=new VString(*new String(ph->value, String::L_TAINTED)); + hash.put(ph->name, value); + } +} + +static void _sql(Request& r, MethodParams& params) { + Value& statement=params.as_junction(0, "statement must be code"); + + HashStringValue* bind=0; + ulong limit=SQL_NO_LIMIT; + ulong offset=0; + if(params.count()>1) + if(HashStringValue* options=params.as_hash(1, "sql options")) { + int valid_options=0; + if(Value* vbind=options->get(sql_bind_name)) { + valid_options++; + bind=vbind->get_hash(); + } + if(Value* vlimit=options->get(sql_limit_name)) { + valid_options++; + limit=(ulong)r.process_to_value(*vlimit).as_double(); + } + if(Value* voffset=options->get(sql_offset_name)) { + valid_options++; + offset=(ulong)r.process_to_value(*voffset).as_double(); + } + if(valid_options!=options->count()) + throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); + } + + SQL_Driver::Placeholder* placeholders=0; + uint placeholders_count=0; + if(bind) + placeholders_count=marshal_binds(*bind, placeholders); + + Temp_lang temp_lang(r, String::L_SQL); + const String& statement_string=r.process_to_string(statement); + const char* statement_cstr=statement_string.untaint_cstr(r.flang, r.connection()); + + Table_sql_event_handlers handlers; + + r.connection()->query( + statement_cstr, + placeholders_count, placeholders, + offset, limit, + handlers, + statement_string); + + if(bind) + unmarshal_bind_updates(*bind, placeholders_count, placeholders); + + Table& result= + handlers.table?*handlers.table: // query resulted in table? return it + *new Table(Table::columns_type(0)); // query returned no table, fake it + + // replace any previous table value + GET_SELF(r, VTable).set_table(result); +} + +static void _columns(Request& r, MethodParams& params) { + const String* column_column_name; + if(params.count()>0) + column_column_name=¶ms.as_string(0, COLUMN_NAME_MUST_BE_STRING); + else + column_column_name=new String("column"); + + Table::columns_type result_columns(new ArrayString); + *result_columns+=column_column_name; + Table& result_table=*new Table(result_columns); + + Table& source_table=GET_SELF(r, VTable).table(); + if(Table::columns_type source_columns=source_table.columns()) { + for(Array_iterator i(*source_columns); i.has_next(); ) { + Table::element_type result_row(new ArrayString); + *result_row+=i.next(); + result_table+=result_row; + } } + + r.write_no_lang(*new VTable(&result_table)); } -void initialize_table_class(Pool& pool, VClass& vclass) { - // ^table.set[data] ^table.set[nameless;data] - vclass.add_native_method("set", _set, 1, 2); +static void _select(Request& r, MethodParams& params) { + Value& vcondition=params.as_expression(0, "condition must be number, bool or expression"); + + Table& source_table=GET_SELF(r, VTable).table(); + + int limit=source_table.count(); + int offset=0; + bool reverse=false; + + if(params.count()>1) + if(HashStringValue* options=params.as_hash(1)) { + int valid_options=0; + if(Value* vlimit=options->get(sql_limit_name)) { + valid_options++; + limit=r.process_to_value(*vlimit).as_int(); + } + if(Value* voffset=options->get(sql_offset_name)) { + valid_options++; + offset=r.process_to_value(*voffset).as_int(); + } + if(Value* vreverse=options->get(table_reverse_name)) { + valid_options++; + reverse=r.process_to_value(*vreverse).as_bool(); + } + if(valid_options!=options->count()) + throw Exception(PARSER_RUNTIME, 0, CALLED_WITH_INVALID_OPTION); + } - // ^table.load[file] ^table.load[nameless;file] - vclass.add_native_method("load", _load, 1, 2); + Table& result_table=*new Table(source_table.columns()); + + size_t size=source_table.count(); + if(offset<0) + offset+=size; + if(size && limit>0 && offset>=0 && (size_t)offset (size_t)offset) // ...condition is true, adding to the result + result_table+=source_table[row]; + if(row==0) break; + } + } else { + for(size_t row=0; row < size && result_table.count() < (size_t)limit; row++) { + source_table.set_current(row); + + bool condition=r.process_to_value(vcondition, false/*don't intercept string*/).as_bool(); + + if(condition && ++appended > (size_t)offset) // ...condition is true, adding to the result + result_table+=source_table[row]; + } + } + source_table.set_current(saved_current); + } + + r.write_no_lang(*new VTable(&result_table)); +} + +// constructor + +MTable::MTable(): Methoded("table") { + // ^table::create{data} + // ^table::create[nameless]{data} + // ^table::create[table] + add_native_method("create", Method::CT_DYNAMIC, _create, 1, 3); + // old name for compatibility with <= v 1.141 2002/01/25 11:33:45 paf + add_native_method("set", Method::CT_DYNAMIC, _create, 1, 3); + + // ^table::load[file] + // ^table::load[nameless;file] + add_native_method("load", Method::CT_DYNAMIC, _load, 1, 3); + + // ^table.save[file] + // ^table.save[nameless;file] + add_native_method("save", Method::CT_DYNAMIC, _save, 1, 3); + + // add_native_method("save_old", Method::CT_DYNAMIC, _save_old, 1, 3); + + // ^table.csv-string[] + // ^table.csv-string[nameless] + // ^table.csv-string[nameless;$.encloser["] $.separator[,]] + add_native_method("csv-string", Method::CT_DYNAMIC, _csv_string, 0, 2); // ^table.count[] - vclass.add_native_method("count", _count, 0, 0); + // ^table.count[rows] + // ^table.count[columns] + // ^table.count[cells] + add_native_method("count", Method::CT_DYNAMIC, _count, 0, 1); // ^table.line[] - vclass.add_native_method("line", _line, 0, 0); + add_native_method("line", Method::CT_DYNAMIC, _line, 0, 0); + + // ^table.offset[] + // ^table.offset(offset) + // ^table.offset[cur|set](offset) + add_native_method("offset", Method::CT_DYNAMIC, _offset, 0, 2); + + // ^table.menu{code} + // ^table.menu{code}[delim] + add_native_method("menu", Method::CT_DYNAMIC, _menu, 1, 2); + + // ^table.hash[key field name] + // ^table.hash[key field name][value field name(s) string/table] + add_native_method("hash", Method::CT_DYNAMIC, _hash, 1, 3); + + // ^table.sort{string-key-maker} ^table.sort{string-key-maker}[desc|asc] + // ^table.sort(numeric-key-maker) ^table.sort(numeric-key-maker)[desc|asc] + add_native_method("sort", Method::CT_DYNAMIC, _sort, 1, 2); + + // ^table.locate[field;value] + add_native_method("locate", Method::CT_DYNAMIC, _locate, 1, 3); + + // ^table.flip[] + add_native_method("flip", Method::CT_DYNAMIC, _flip, 0, 0); - // ^table.offset[] ^table.offset[offset] - vclass.add_native_method("offset", _offset, 0, 1); -} + // ^table.foreach[row-num;value]{code} + // ^table.foreach[row-num;value]{code}[delim] + add_native_method("foreach", Method::CT_DYNAMIC, _foreach, 3, 4); + + // ^table.append{row{tab}data} + add_native_method("append", Method::CT_DYNAMIC, _append, 1, 1); + + // ^table.insert{row{tab}data} before current row + add_native_method("insert", Method::CT_DYNAMIC, _insert, 1, 1); + + // ^table.delete[] current row + add_native_method("delete", Method::CT_DYNAMIC, _delete, 0, 0); + + // ^table.join[table][$.limit(10) $.offset(1) $.offset[cur] ] + add_native_method("join", Method::CT_DYNAMIC, _join, 1, 2); + + + // ^table::sql[query] + // ^table::sql[query][$.limit(1) $.offset(2)] + add_native_method("sql", Method::CT_DYNAMIC, _sql, 1, 2); + + // ^table.columns[[column name]] + add_native_method("columns", Method::CT_DYNAMIC, _columns, 0, 1); + + // ^table.select(expression) = table + add_native_method("select", Method::CT_DYNAMIC, _select, 1, 2); +}