--- parser3/src/main/compile.y 2001/02/20 19:21:13 1.2 +++ parser3/src/main/compile.y 2009/05/24 07:32:40 1.243 @@ -1,526 +1,1592 @@ %{ -#define YYSTYPE void * -#define YYPARSE_PARAM pc -#define YYLEX_PARAM pc -#define YYDEBUG 1 -#define YYERROR_VERBOSE -#define YYPRINT(file, type, value) yyprint (file, type, value) - -#include -#include -#include +/** @file + Parser: compiler(lexical parser and grammar). -#include "compile_tools.h" + Copyright (c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com) + Author: Alexander Petrosyan (http://design.ru/paf) + + $Id: compile.y,v 1.243 2009/05/24 07:32:40 misha Exp $ +*/ + +/** + @todo parser4: + - cache compiled code from request to request. to do that... + -#: make method definitions, @CLASS, @BASE, @USE instructions, + which would be executed afterwards, and actions + now performed at compile time would be delayed to run time. + -#: make cache expiration on time and on disk-change of class source + -#: in apache use subpools for compiled class storage + -#: in iis make up specialized Pool object for that +*/ -int yyerror (char *s); -static void yyprint (FILE *file, int type, YYSTYPE value); -int yylex(YYSTYPE *lvalp, void *pc); +#define YYSTYPE ArrayOperation* +#define YYPARSE_PARAM pc +#define YYLEX_PARAM pc +#define YYDEBUG 1 +#define YYERROR_VERBOSE 1 +#define yyerror(msg) real_yyerror((Parse_control *)pc, msg) +#define YYPRINT(file, type, value) yyprint(file, type, value) +// includes -#define PC ((struct parse_control *)pc) +#include "compile_tools.h" +#include "pa_value.h" +#include "pa_request.h" +#include "pa_vobject.h" +#include "pa_vdouble.h" +#include "pa_globals.h" +#include "pa_vvoid.h" +#include "pa_vmethod_frame.h" + +// defines + +#define USE_CONTROL_METHOD_NAME "USE" +#define OPTIONS_CONTROL_METHOD_NAME "OPTIONS" +#define OPTION_ALL_VARS_LOCAL_NAME "locals" +#define OPTION_PARTIAL_CLASS "partial" +#define REM_OPERATOR_NAME "rem" + +// forwards + +static int real_yyerror(Parse_control* pc, char* s); +static void yyprint(FILE* file, int type, YYSTYPE value); +static int yylex(YYSTYPE* lvalp, void* pc); + +static const VBool vfalse(false); +static const VBool vtrue(true); +static const VVoid vvoid; + +// local convinient inplace typecast & var +#undef PC +#define PC (*(Parse_control *)pc) +#undef POOL +#define POOL (*PC.pool) +#ifndef DOXYGEN %} %pure_parser -%token BREAK -%token STR_LITERAL +%token EON +%token STRING %token BOGUS +%token BAD_STRING_COMPARISON_OPERATOR +%token BAD_HEX_LITERAL +%token BAD_METHOD_DECL_START +%token BAD_METHOD_PARAMETER_NAME_CHARACTER +%token BAD_NONWHITESPACE_CHARACTER_IN_EXPLICIT_RESULT_MODE + +%token LAND "&&" +%token LOR "||" +%token LXOR "!||" +%token NXOR "!|" + +%token NLE "<=" +%token NGE ">=" +%token NEQ "==" +%token NNE "!=" +%token NSL "<<" +%token NSR ">>" + +%token SLT "lt" +%token SGT "gt" +%token SLE "le" +%token SGE "ge" +%token SEQ "eq" +%token SNE "ne" + +%token DEF "def" +%token IN "in" +%token FEXISTS "-f" +%token DEXISTS "-d" +%token IS "is" + +%token LITERAL_TRUE "true" +%token LITERAL_FALSE "false" + +/* logical */ +%left "!||" +%left "||" +%left "&&" +%left '<' '>' "<=" ">=" "lt" "gt" "le" "ge" +%left "==" "!=" "eq" "ne" +%left "is" "def" "in" "-f" "-d" + +/* bitwise */ +%left "!|" +%left '|' +%left '&' +%left "<<" ">>" + +/* numerical */ +%left '+' '-' +%left '*' '/' '\\' '%' +%left NUNARY /* unary - + */ + +/* out-of-group */ +%left '~' /* bitwise */ +%left '!' /* logical */ + %% +all: + one_big_piece { + Method& method=*new Method(Method::CT_ANY, + 0, 0, /*min, max numbered_params_count*/ + 0/*param_names*/, 0/*local_names*/, + $1/*parser_code*/, 0/*native_code*/); + PC.cclass->add_method(PC.alias_method(main_method_name), method); +} +| methods; -result: input { PC->result=$1 }; -input: empty | codes; +methods: method | methods method; +one_big_piece: maybe_codes; -/* codes */ +method: control_method | code_method; -codes: code | codes code { - $$=$1; - P(&$$,$2); +control_method: '@' STRING '\n' + maybe_control_strings { + const String& command=*LA2S(*$2); + YYSTYPE strings_code=$4; + if(strings_code->count()<1*OPERATIONS_PER_OPVALUE) { + strcpy(PC.error, "@"); + strcat(PC.error, command.cstr()); + strcat(PC.error, " is empty"); + YYERROR; + } + if(command==CLASS_NAME) { + if(strings_code->count()==1*OPERATIONS_PER_OPVALUE) { + // new class' name + const String& name=*LA2S(*strings_code); + // creating the class + VStateless_class* cclass=new VClass; + PC.cclass_new=cclass; + PC.cclass_new->set_name(name); + } else { + strcpy(PC.error, "@"CLASS_NAME" must contain only one line with class name (contains more then one)"); + YYERROR; + } + } else if(command==USE_CONTROL_METHOD_NAME) { + for(size_t i=0; icount(); i+=OPERATIONS_PER_OPVALUE) + PC.request.use_file(PC.request.main_class, *LA2S(*strings_code, i)); + } else if(command==BASE_NAME) { + if(PC.append){ + strcpy(PC.error, "can't set base while appending methods to class '"); + strncat(PC.error, PC.cclass->name().cstr(), MAX_STRING/2); + strcat(PC.error, "'"); + YYERROR; + } + PC.class_add(); + if(PC.cclass->base_class()) { // already changed from default? + strcpy(PC.error, "class already have a base '"); + strncat(PC.error, PC.cclass->base_class()->name().cstr(), MAX_STRING/2); + strcat(PC.error, "'"); + YYERROR; + } + if(strings_code->count()==1*OPERATIONS_PER_OPVALUE) { + const String& base_name=*LA2S(*strings_code); + if(Value* base_class_value=PC.request.classes().get(base_name)) { + // @CLASS == @BASE sanity check + if(VStateless_class *base_class=base_class_value->get_class()) { + if(PC.cclass==base_class) { + strcpy(PC.error, "@"CLASS_NAME" equals @"BASE_NAME); + YYERROR; + } + PC.cclass->get_class()->set_base(base_class); + } else { // they asked to derive from a class without methods ['env' & co] + strcpy(PC.error, base_name.cstr()); + strcat(PC.error, ": you can not derive from this class in @"BASE_NAME); + YYERROR; + } + } else { + strcpy(PC.error, base_name.cstr()); + strcat(PC.error, ": undefined class in @"BASE_NAME); + YYERROR; + } + } else { + strcpy(PC.error, "@"BASE_NAME" must contain sole name"); + YYERROR; + } + } else if(command==OPTIONS_CONTROL_METHOD_NAME) { + for(size_t i=0; icount(); i+=OPERATIONS_PER_OPVALUE) { + const String& option=*LA2S(*strings_code, i); + if(option==OPTION_ALL_VARS_LOCAL_NAME){ + PC.set_all_vars_local(); + } else if(option==OPTION_PARTIAL_CLASS){ + if(PC.cclass_new){ + if(VStateless_class* existed=PC.get_existed_class(PC.cclass_new)){ + if(!PC.reuse_existed_class(existed)){ + strcpy(PC.error, "can't append methods to '"); + strncat(PC.error, PC.cclass_new->name().cstr(), MAX_STRING/2); + strcat(PC.error, "' - the class wasn't marked as partial"); + YYERROR; + } + } else { + // mark new class as partial. we can add methods to it later. + PC.cclass_new->set_partial(); + } + } else { + strcpy(PC.error, "'"OPTION_PARTIAL_CLASS"' option should be used straight after @"CLASS_NAME); + YYERROR; + } + } else { + strcpy(PC.error, "'"); + strncat(PC.error, option.cstr(), MAX_STRING/2); + strcat(PC.error, "' invalid option. valid options are " + "'"OPTION_PARTIAL_CLASS"' and '"OPTION_ALL_VARS_LOCAL_NAME"'"); + YYERROR; + } + } + } else { + strcpy(PC.error, "'"); + strncat(PC.error, command.cstr(), MAX_STRING/2); + strcat(PC.error, "' invalid special name. valid names are " + "'"CLASS_NAME"', '"USE_CONTROL_METHOD_NAME"', '"BASE_NAME"' and '"OPTIONS_CONTROL_METHOD_NAME"'."); + YYERROR; + } }; -code: write_str_literal | action; -action: get | put | with | call; +maybe_control_strings: empty | control_strings; +control_strings: control_string | control_strings control_string { $$=$1; P(*$$, *$2) }; +control_string: maybe_string '\n'; +maybe_string: empty | STRING; + +code_method: '@' STRING bracketed_maybe_strings maybe_bracketed_strings maybe_comment '\n' { + PC.class_add(); + PC.explicit_result=false; + + YYSTYPE params_names_code=$3; + ArrayString* params_names=0; + if(int size=params_names_code->count()) { + params_names=new ArrayString; + for(int i=0; icount()) { + locals_names=new ArrayString; + for(int i=0; iis_vars_local()) + all_vars_local=true; -get: '$' any_name { - $$=$2; /* stack: resulting value */ - A(&$$,OP_WRITE); /* value=pop; write(value) */ + Method* method=new Method( + //name, + Method::CT_ANY, + 0, 0/*min,max numbered_params_count*/, + params_names, locals_names, + 0/*to be filled later in next {} */, 0, all_vars_local); + + *reinterpret_cast(&$$)=method; + + // todo: check [][;result;] +} maybe_codes { + Method& method=*reinterpret_cast($7); + // fill in the code + method.parser_code=$8; + + // register in class + const String& name=*LA2S(*$2); + PC.cclass->add_method(PC.alias_method(name), method); }; -any_name: name_without_curly_rdive BREAK | name_in_curly_rdive; +maybe_bracketed_strings: empty | bracketed_maybe_strings; +bracketed_maybe_strings: '[' maybe_strings ']' {$$=$2}; +maybe_strings: empty | strings; +strings: STRING | strings ';' STRING { $$=$1; P(*$$, *$3) }; + +maybe_comment: empty | STRING; +/* codes */ + +maybe_codes: empty | codes; + +codes: code | codes code { $$=$1; P(*$$, *$2) }; +code: write_string | action; +action: get | put | call; + +/* get */ + +get: get_value { + $$=N(); + YYSTYPE code=$1; +#ifdef OPTIMIZE_BYTECODE_GET_ELEMENT + if(!maybe_change_first_opcode(*code, OP::OP_VALUE__GET_ELEMENT, /*=>*/OP::OP_VALUE__GET_ELEMENT__WRITE)) +#endif + { + size_t count=code->count(); + size_t len=6; + +#ifdef OPTIMIZE_BYTECODE_GET_OBJECT_ELEMENT + if( + count==len + && maybe_change_first_opcode(*code, OP::OP_GET_OBJECT_ELEMENT, OP::OP_GET_ELEMENT, /*=>*/OP::OP_GET_OBJECT_ELEMENT__WRITE) + ){ + //P(*$$, *code, 0/*offset*/, count-1/*limit*/); // someday skip last OP_GET_ELEMENT + //break; + } else +#endif + +#ifdef OPTIMIZE_BYTECODE_GET_OBJECT_VAR_ELEMENT + if( + count==len + && maybe_change_first_opcode(*code, OP::OP_GET_OBJECT_VAR_ELEMENT, OP::OP_GET_ELEMENT, /*=>*/OP::OP_GET_OBJECT_VAR_ELEMENT__WRITE) + ){ + //P(*$$, *code, 0/*offset*/, count-1/*limit*/); // someday skip last OP_GET_ELEMENT + //break; + } else +#endif + + changetail_or_append(*code, + OP::OP_GET_ELEMENT, false, /*=>*/OP::OP_GET_ELEMENT__WRITE, + /*or */OP::OP_WRITE_VALUE + ); /* value=pop; wcontext.write(value) */ + } + P(*$$, *code); +}; +get_value: '$' get_name_value { $$=$2 }; +get_name_value: name_without_curly_rdive EON | name_in_curly_rdive; name_in_curly_rdive: '{' name_without_curly_rdive '}' { $$=$2 }; -name_without_curly_rdive: name_rdive { - /* - TODO: подсмотреть в $1, и если там в первом элементе первая буква ":" - то выкинуть её и делать не OP_WITH_READ, а WITH_ROOT - TODO: подсмотреть в $1, и если там первым элементом self, - то выкинуть его и делать не OP_WITH_READ, а WITH_SELF - */ - $$=N(PC->pool); A(&$$, OP_WITH_READ); /* stack: starting context */ - P(&$$,$1); /* diving code; stack: current context */ +name_without_curly_rdive: + name_without_curly_rdive_read +| name_without_curly_rdive_class; +name_without_curly_rdive_read: name_without_curly_rdive_code { + $$=N(); + YYSTYPE diving_code=$1; + const String* first_name=LA2S(*diving_code); + size_t count=diving_code->count(); + // self.xxx... => xxx... + // OP_VALUE+origin+string+OP_GET_ELEMENT+... -> OP_WITH_SELF+... + if(first_name && *first_name==SELF_ELEMENT_NAME) { + O(*$$, OP::OP_WITH_SELF); /* stack: starting context */ + P(*$$, *diving_code, + /* skip over... */ + count>=4?4/*OP_VALUE+origin+string+OP_GET_ELEMENTx*/:3/*OP::OP_+origin+string*/); + } + +#ifdef OPTIMIZE_BYTECODE_GET_OBJECT_ELEMENT + else if(maybe_make_get_object_element(*$$, *diving_code, count)){ + // optimisation for $object.field + ^object.method[ + } +#endif + +#ifdef OPTIMIZE_BYTECODE_GET_OBJECT_VAR_ELEMENT + else if(maybe_make_get_object_var_element(*$$, *diving_code, count)){ + // optimisation for $object.$var + } +#endif + +#ifdef OPTIMIZE_BYTECODE_GET_ELEMENT + else if(count==4){ // optimization + O(*$$, + (PC.in_call_value) + ? OP::OP_VALUE__GET_ELEMENT_OR_OPERATOR // ^object[ : OP_VALUE+origin+string+OP_GET_ELEMENT => OP_VALUE__GET_ELEMENT_OR_OPERATOR+origin+string + : OP::OP_VALUE__GET_ELEMENT // $object : OP_VALUE+origin+string+OP_GET_ELEMENT => OP_VALUE__GET_ELEMENT+origin+string + ); + P(*$$, *diving_code, 1/*offset*/, 2/*limit*/); // copy origin+value + } else { + O(*$$, OP::OP_WITH_READ); /* stack: starting context */ + P(*$$, *diving_code); + } +#else + else { + O(*$$, OP::OP_WITH_READ); /* stack: starting context */ + + // ^if OP_ELEMENT => ^if OP_ELEMENT_OR_OPERATOR + // optimized OP_VALUE+origin+string+OP_GET_ELEMENT. => OP_VALUE+origin+string+OP_GET_ELEMENT_OR_OPERATOR. + if(PC.in_call_value && count==4) + diving_code->put(count-1, OP::OP_GET_ELEMENT_OR_OPERATOR); + P(*$$, *diving_code); + } +#endif + /* diving code; stack: current context */ }; -name_rdive: name_advance2 | name_path name_advance2 { $$=$1; P(&$$,$2) } +name_without_curly_rdive_class: class_prefix name_without_curly_rdive_code { $$=$1; P(*$$, *$2) }; +name_without_curly_rdive_code: name_advance2 | name_path name_advance2 { $$=$1; P(*$$, *$2) }; /* put */ -put: '$' name_expr_dive '(' constructor_value ')' { -/* - TODO: подсмотреть в $3, и если там в первом элементе первая буква ":" - то выкинуть её и делать не OP_WITH_OP_WRITE, а WITH_ROOT - TODO: подсмотреть в $3, и если там первым элементом self, - то выкинуть его и делать не OP_WITH_OP_WRITE, а WITH_SELF - если ничего не осталось - $self(xxx) - обругать -*/ - $$=N(PC->pool); - A(&$$, OP_WITH_WRITE); /* stack: starting context */ - P(&$$,$2); /* diving code; stack: context,name */ - P(&$$,$4); /* stack: context,name,constructor_value */ - A(&$$,OP_CONSTRUCT); /* value=pop; name=pop; context=pop; construct(context,name,value) */ -}; -constructor_value: - constructor_one_param_value -| constructor_two_params_value /* $var(=;2*2) $var(%d;2*2) $var(+;1) */ -; -constructor_one_param_value: - empty_value /* optimized $var() case */ -| STR_LITERAL /* optimized $var(STR_LITERAL) case */ -| complex_constructor_param_value /* $var(something complex) */ -; -empty_value: empty; -complex_constructor_param_value: complex_constructor_param_body { - $$=N(PC->pool); - A(&$$, OP_CREATE_EWPOOL); /* stack: empty write context */ - P(&$$,$1); /* some codes to that context */ - A(&$$,OP_REDUCE_EWPOOL); /* context=pop; stack: context.value() */ -}; -complex_constructor_param_body: - codes__excluding_sole_str_literal -| codes__str__followed_by__excluding_sole_str_literal -; -constructor_two_params_value: STR_LITERAL ';' constructor_one_param_value { - char *operator_or_fmt=string_cstr(LS($1)); - $$=N(PC->pool); - G(&$$, operator_or_fmt);/* stack: ncontext name char*operator_or_fmt */ - P(&$$, $3); /* stack: ncontext name char*operator_or_fmt expr */ - switch(operator_or_fmt[0]) { - case '=': case '%': - A(&$$, OP_EXPRESSION_EVAL); - break; - case '+': case '-': case '*': case '/': - A(&$$, OP_MODIFY_EVAL); - break; - default: - exception(PC->pool, 0,0, LS($1), "invalid modification operator"); +put: '$' name_expr_wdive construct { + $$=N(); +#if defined(OPTIMIZE_BYTECODE_CONSTRUCT) || defined(OPTIMIZE_BYTECODE_CALL_CONSTRUCT) + if(maybe_make_root_or_write_construct(*$$, *$2, *$3)){ + // $a(1), $.a(1), $a[b], $.a[b] + // $a($b), $.a($b), $a[$b], $.a[$b] + // $a($b.c), $.a($b.c), $a[$b.c], $.a[$b.c] + // $a($b.$c), $.a($b.$c), $a[$b.$c], $.a[$b.$c] + } else +#endif + { + P(*$$, *$2); /* stack: context,name */ + P(*$$, *$3); /* stack: context,name,constructor_value */ + } +}; +name_expr_wdive: + name_expr_wdive_root +| name_expr_wdive_write +| name_expr_wdive_class; +name_expr_wdive_root: name_expr_dive_code { + $$=N(); + YYSTYPE diving_code=$1; + const String* first_name=LA2S(*diving_code); + // $self.xxx... => $xxx... + // OP_VALUE+origin+string+OP_GET_ELEMENT+... => OP_WITH_SELF+... + if(first_name && *first_name==SELF_ELEMENT_NAME) { + O(*$$, OP::OP_WITH_SELF); /* stack: starting context */ + P(*$$, *diving_code, + /* skip over... */ + diving_code->count()>=4?4/*OP::OP_VALUE+origin+string+OP::OP_GET_ELEMENTx*/:3/*OP::OP_+origin+string*/); + } else { + O(*$$, OP::OP_WITH_ROOT); /* stack: starting context */ + P(*$$, *diving_code); } - /* stack: ncontext name value */ + /* diving code; stack: current context */ +}; +name_expr_wdive_write: '.' name_expr_dive_code { + $$=N(); + O(*$$, OP::OP_WITH_WRITE); /* stack: starting context */ + P(*$$, *$2); /* diving code; stack: context,name */ }; +name_expr_wdive_class: class_prefix name_expr_dive_code { $$=$1; P(*$$, *$2) }; +construct: + construct_square +| construct_round +| construct_curly +; +construct_square: '[' { + // allow $result_or_other_variable[ letters here any time ] + *reinterpret_cast(&$$)=PC.explicit_result; PC.explicit_result=false; +} any_constructor_code_value { + PC.explicit_result=*reinterpret_cast(&$2); +} ']' { + // stack: context, name + $$=$3; // stack: context, name, value + O(*$$, OP::OP_CONSTRUCT_VALUE); /* value=pop; name=pop; context=pop; construct(context,name,value) */ +} +; +construct_round: '(' expr_value ')' { + $$=N(); + O(*$$, OP::OP_PREPARE_TO_EXPRESSION); + // stack: context, name + P(*$$, *$2); // stack: context, name, value + O(*$$, OP::OP_CONSTRUCT_EXPR); /* value=pop->as_expr_result; name=pop; context=pop; construct(context,name,value) */ +} +; +construct_curly: '{' maybe_codes '}' { + // stack: context, name + $$=N(); + OA(*$$, OP::OP_CURLY_CODE__CONSTRUCT, $2); /* code=pop; name=pop; context=pop; construct(context,name,junction(code)) */ +}; + +any_constructor_code_value: + void_value /* optimized $var[] case */ +| STRING /* optimized $var[STRING] case */ +| constructor_code_value /* $var[something complex] */ +; +constructor_code_value: constructor_code { + $$=N(); + OA(*$$, OP::OP_OBJECT_POOL, $1); /* stack: empty write context */ + /* some code that writes to that context */ + /* context=pop; stack: context.value() */ +}; +constructor_code: codes__excluding_sole_str_literal; +codes__excluding_sole_str_literal: action | code codes { $$=$1; P(*$$, *$2) }; /* call */ -call: '^' name_expr_dive store_params BREAK { /* ^field.$method{vasya} */ -/* - TODO: подсмотреть в $3, и если там в первом элементе первая буква ":" - то выкинуть её и делать не OP_WITH_READ, а WITH_ROOT - TODO: подсмотреть в $3, и если там первым элементом self, - то выкинуть его и делать не OP_WITH_READ, а WITH_SELF - TODO: - если первым в $3 идёт result - то - выкинуть его - если там ещё что-то осталось, - то - не OP_WITH_READ, а WITH_RESULT - иначе // ^result(value) - обругать безобразие -*/ - $$=N(PC->pool); - A(&$$, OP_WITH_READ); /* stack: starting context */ - P(&$$,$2); /* diving code; stack: context,method_name */ - A(&$$,OP_GET_METHOD_FRAME); /* stack: context,method_frame */ - P(&$$,$3); /* filling method_frame.store_params */ - A(&$$,OP_CALL); /* method_frame=pop; ncontext=pop; call(ncontext,method_frame) */ +call: call_value { +#ifdef OPTIMIZE_BYTECODE_CUT_REM_OPERATOR + if((*$1).count()) +#endif + { + $$=$1; /* stack: value */ + changetail_or_append(*$$, + OP::OP_CALL, true, /*=>*/ OP::OP_CALL__WRITE, + /*or */OP::OP_WRITE_VALUE); /* value=pop; wcontext.write(value) */ + } }; +call_value: '^' { + PC.in_call_value=true; + } + call_name { + PC.in_call_value=false; + } + store_params EON { /* ^field.$method{vasya} */ +#ifdef OPTIMIZE_BYTECODE_CUT_REM_OPERATOR +#ifdef OPTIMIZE_BYTECODE_GET_ELEMENT + const String* operator_name=LA2S(*$3, 0, OP::OP_VALUE__GET_ELEMENT_OR_OPERATOR); +#else + const String* operator_name=LA2S(*$3, 1); +#endif + if(operator_name && *operator_name == REM_OPERATOR_NAME){ + $$=N(); + } else +#endif + { + $$=$3; /* with_xxx,diving code; stack: context,method_junction */ + + YYSTYPE params_code=$5; + if(params_code->count()==3) { // probably [] case. [OP::OP_VALUE+origin+Void] + if(Value* value=LA2V(*params_code)) // it is OP_VALUE+origin+value? + if(value->is_void()) // value is VVoid? + params_code=0; // ^zzz[] case. don't append lone empty param. + } + /* stack: context, method_junction */ + OA(*$$, OP::OP_CALL, params_code); // method_frame=make frame(pop junction); ncontext=pop; call(ncontext,method_frame) stack: value + } +}; + +call_name: name_without_curly_rdive; -store_params: store_param | store_params store_param { $$=$1; P(&$$,$2) }; -store_param: store_round_param | store_curly_param; -store_round_param: '(' store_param_parts ')' {$$=$2}; -store_param_parts: store_param_part | store_param_parts ';' store_param_part { $$=$1; P(&$$,$3) }; -store_param_part: constructor_one_param_value { +store_params: store_param | store_params store_param { $$=$1; P(*$$, *$2) }; +store_param: + store_square_param +| store_round_param +| store_curly_param +; +store_square_param: '[' { + // allow ^call[ letters here any time ] + *reinterpret_cast(&$$)=PC.explicit_result; PC.explicit_result=false; +} store_code_param_parts { + PC.explicit_result=*reinterpret_cast(&$2); +} ']' {$$=$3}; +store_round_param: '(' store_expr_param_parts ')' {$$=$2}; +store_curly_param: '{' store_curly_param_parts '}' {$$=$2}; +store_code_param_parts: + store_code_param_part +| store_code_param_parts ';' store_code_param_part { $$=$1; P(*$$, *$3) } +; +store_expr_param_parts: + store_expr_param_part +| store_expr_param_parts ';' store_expr_param_part { $$=$1; P(*$$, *$3) } +; +store_curly_param_parts: + store_curly_param_part +| store_curly_param_parts ';' store_curly_param_part { $$=$1; P(*$$, *$3) } +; +store_code_param_part: code_param_value { $$=$1; - A(&$$,OP_STORE_PARAM); -} -store_curly_param: '{' input '}' { - $$=N(PC->pool); - A(&$$, OP_CODE_ARRAY); - G(&$$,$2); - A(&$$,OP_CREATE_JUNCTION); - A(&$$,OP_STORE_PARAM); }; +store_expr_param_part: expr_value { + YYSTYPE expr_code=$1; + if(expr_code->count()==3 + && (*expr_code)[0].code==OP::OP_VALUE) { // optimizing (double/bool/incidently 'string' too) case. [OP::OP_VALUE+origin+Double]. no evaluating + $$=expr_code; + } else { + YYSTYPE code=N(); + O(*code, OP::OP_PREPARE_TO_EXPRESSION); + P(*code, *expr_code); + O(*code, OP::OP_WRITE_EXPR_RESULT); + $$=N(); + OA(*$$, OP::OP_EXPR_CODE__STORE_PARAM, code); + } +}; +store_curly_param_part: maybe_codes { + $$=N(); + OA(*$$, OP::OP_CURLY_CODE__STORE_PARAM, $1); +}; +code_param_value: + void_value /* optimized [;...] case */ +| STRING /* optimized [STRING] case */ +| constructor_code_value /* [something complex] */ +; /* name */ -name_expr_dive: name_expr_value | name_path name_expr_value { $$=$1; P(&$$,$2) }; +name_expr_dive_code: name_expr_value | name_path name_expr_value { $$=$1; P(*$$, *$2) }; -name_path: name_step | name_path name_step { $$=$1; P(&$$,$2) }; +name_path: name_step | name_path name_step { $$=$1; P(*$$, *$2) }; name_step: name_advance1 '.'; name_advance1: name_expr_value { + // we know that name_advance1 not called from ^xxx context + // so we'll not check for operator call possibility as we do in name_advance2 + /* stack: context */ $$=$1; /* stack: context,name */ - A(&$$,OP_GET_ELEMENT); /* name=pop; context=pop; stack: context.get_element(name) */ + O(*$$, OP::OP_GET_ELEMENT); /* name=pop; context=pop; stack: context.get_element(name) */ }; name_advance2: name_expr_value { /* stack: context */ $$=$1; /* stack: context,name */ - A(&$$,OP_GET_ELEMENT); /* name=pop; context=pop; stack: context.get_element(name) */ + O(*$$, OP::OP_GET_ELEMENT); /* name=pop; context=pop; stack: context.get_element(name) */ } -| STR_LITERAL BOGUS +| STRING BOGUS ; name_expr_value: - STR_LITERAL /* subname_is_const */ + STRING /* subname_is_const */ | name_expr_subvar_value /* $subname_is_var_value */ -| name_expr_with_subvar_value /* xxx$part_of_subname_is_var_value[$...] */ +| name_expr_with_subvar_value /* xxx$part_of_subname_is_var_value */ +| name_square_code_value /* [codes] */ ; name_expr_subvar_value: '$' subvar_ref_name_rdive { $$=$2; - A(&$$,OP_GET_ELEMENT); + O(*$$, OP::OP_GET_ELEMENT); }; -name_expr_with_subvar_value: STR_LITERAL subvar_get_writes { - $$=N(PC->pool); - A(&$$, OP_CREATE_EWPOOL); - P(&$$,$1); - A(&$$,OP_WRITE); - P(&$$,$2); - A(&$$,OP_REDUCE_EWPOOL); +name_expr_with_subvar_value: STRING subvar_get_writes { + YYSTYPE code; + { + change_string_literal_to_write_string_literal(*(code=$1)); + P(*code, *$2); + } + $$=N(); + OA(*$$, OP::OP_STRING_POOL, code); }; -subvar_ref_name_rdive: STR_LITERAL { -/* - TODO: подсмотреть в $1, и если там в первом элементе первая буква ":" - то выкинуть её и делать не OP_WITH_READ, а WITH_ROOT -*/ - $$=N(PC->pool); A(&$$, OP_WITH_READ); - P(&$$,$1); +name_square_code_value: '[' { + // allow $result_or_other_variable[ letters here any time ] + *reinterpret_cast(&$$)=PC.explicit_result; PC.explicit_result=false; +} codes { + PC.explicit_result=*reinterpret_cast(&$2); +} ']' { + $$=N(); + OA(*$$, OP::OP_OBJECT_POOL, $3); /* stack: empty write context */ + /* some code that writes to that context */ + /* context=pop; stack: context.value() */ +}; +subvar_ref_name_rdive: STRING { + $$=N(); + O(*$$, OP::OP_WITH_READ); + P(*$$, *$1); }; -subvar_get_writes: subvar__get_write | subvar_get_writes subvar__get_write { $$=$1; P(&$$,$2) }; +subvar_get_writes: subvar__get_write | subvar_get_writes subvar__get_write { $$=$1; P(*$$, *$2) }; subvar__get_write: '$' subvar_ref_name_rdive { $$=$2; - A(&$$,OP_GET_ELEMENT__WRITE); + O(*$$, OP::OP_GET_ELEMENT__WRITE); }; +class_prefix: + class_static_prefix +| class_constructor_prefix +; +class_static_prefix: STRING ':' { + $$=$1; // stack: class name string + if(*LA2S(*$$) == BASE_NAME) { // pseudo BASE class + if(VStateless_class* base=PC.cclass->base_class()) { + change_string_literal_value(*$$, base->name()); + } else { + strcpy(PC.error, "no base class declared"); + YYERROR; + } + } +#ifdef OPTIMIZE_BYTECODE_GET_CLASS + // optimized OP_VALUE+origin+string+OP_GET_CLASS => OP_VALUE__GET_CLASS+origin+string + maybe_change_first_opcode(*$$, OP::OP_VALUE, OP::OP_VALUE__GET_CLASS, true/*assert if top opcode != OP_VALUE*/) +#else + O(*$$, OP::OP_GET_CLASS); +#endif +}; +class_constructor_prefix: class_static_prefix ':' { + $$=$1; + if(!PC.in_call_value) { + strcpy(PC.error, ":: not allowed here"); + YYERROR; + } + O(*$$, OP::OP_PREPARE_TO_CONSTRUCT_OBJECT); +}; -/* with */ -with: '$' name_without_curly_rdive '{' codes '}' { - $$=$2; - A(&$$,OP_CREATE_RWPOOL); - P(&$$,$4); - A(&$$,OP_REDUCE_RWPOOL); - A(&$$,OP_WRITE); -}; +/* expr */ -/* codes_in_brackets */ - -codes__str__followed_by__excluding_sole_str_literal: - write_str_literal codes__excluding_sole_str_literal { - $$=$1; - P(&$$,$2); -} -; -codes__excluding_sole_str_literal: - action -| codes__excluding_sole_str_literal write_str_literal { - $$=$1; - P(&$$,$2); -} +expr_value: expr; +expr: + double_or_STRING +| true_value +| false_value +| get_value +| call_value +| '"' string_inside_quotes_value '"' { $$ = $2 } +| '\'' string_inside_quotes_value '\'' { $$ = $2 } +| '(' expr ')' { $$ = $2; } +/* stack: operand // stack: @operand */ +| '-' expr %prec NUNARY { $$=$2; O(*$$, OP::OP_NEG) } +| '+' expr %prec NUNARY { $$=$2 } +| '~' expr { $$=$2; O(*$$, OP::OP_INV) } +| '!' expr { $$=$2; O(*$$, OP::OP_NOT) } +| "def" expr { $$=$2; O(*$$, OP::OP_DEF) } +| "in" expr { $$=$2; O(*$$, OP::OP_IN) } +| "-f" expr { $$=$2; O(*$$, OP::OP_FEXISTS) } +| "-d" expr { $$=$2; O(*$$, OP::OP_DEXISTS) } +/* stack: a,b // stack: a@b */ +| expr '-' expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_SUB) } +| expr '+' expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_ADD) } +| expr '*' expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_MUL) } +| expr '/' expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_DIV) } +| expr '%' expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_MOD) } +| expr '\\' expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_INTDIV) } +| expr "<<" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_BIN_SL) } +| expr ">>" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_BIN_SR) } +| expr '&' expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_BIN_AND) } +| expr '|' expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_BIN_OR) } +| expr "!|" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_BIN_XOR) } +| expr "&&" expr { $$=$1; OA(*$$, OP::OP_NESTED_CODE, $3); O(*$$, OP::OP_LOG_AND) } +| expr "||" expr { $$=$1; OA(*$$, OP::OP_NESTED_CODE, $3); O(*$$, OP::OP_LOG_OR) } +| expr "!||" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_LOG_XOR) } +| expr '<' expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_NUM_LT) } +| expr '>' expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_NUM_GT) } +| expr "<=" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_NUM_LE) } +| expr ">=" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_NUM_GE) } +| expr "==" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_NUM_EQ) } +| expr "!=" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_NUM_NE) } +| expr "lt" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_STR_LT) } +| expr "gt" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_STR_GT) } +| expr "le" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_STR_LE) } +| expr "ge" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_STR_GE) } +| expr "eq" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_STR_EQ) } +| expr "ne" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_STR_NE) } +| expr "is" expr { $$=$1; P(*$$, *$3); O(*$$, OP::OP_IS) } ; -write_str_literal: STR_LITERAL { - $$=$1; - A(&$$,OP_WRITE); + +double_or_STRING: STRING { + // optimized OP_STRING => OP_VALUE for doubles + maybe_change_string_literal_to_double_literal(*($$=$1)); }; -/* */ +string_inside_quotes_value: maybe_codes { +#ifdef OPTIMIZE_BYTECODE_STRING_POOL + // it brakes ^if(" 09 "){...} + YYSTYPE code=$1; + $$=N(); + if(code->count()==3 && maybe_change_first_opcode(*code, OP::OP_STRING__WRITE, OP::OP_VALUE)){ + // optimized OP_STRING__WRITE+origin+value => OP_VALUE+origin+value without starting OP_STRING_POOL + P(*$$, *code); + } else { + OA(*$$, OP::OP_STRING_POOL, code); /* stack: empty write context */ + } +#else + $$=N(); + OA(*$$, OP::OP_STRING_POOL, $1); /* stack: empty write context */ +#endif + /* some code that writes to that context */ + /* context=pop; stack: context.get_string() */ +}; + +/* basics */ + +write_string: STRING { + // optimized OP_STRING+OP_WRITE_VALUE => OP_STRING__WRITE + change_string_literal_to_write_string_literal(*($$=$1)) +}; -empty: /* empty */ { $$=N(PC->pool) }; +void_value: /* empty */ { $$=VL(/*we know that we will not change it*/const_cast(&vvoid), 0, 0, 0) } +true_value: "true" { $$ = VL(/*we know that we will not change it*/const_cast(&vtrue), 0, 0, 0) } +false_value: "false" { $$ = VL(/*we know that we will not change it*/const_cast(&vfalse), 0, 0, 0) } + +empty: /* empty */ { $$=N() }; %% +#endif /* 000$111(2222)00 000$111{3333}00 - $,^: push, =0 + $,^: push,=0 1:( { break=pop 2:( ) pop 3:{ } pop 000^111(2222)4444{33333}4000 - $,^: push, =0 + $,^: push,=0 1:( { break=pop 2:( )=4 3:{ }=4 4:[^({]=pop */ -int yylex(YYSTYPE *lvalp, void *pc) { - #define lexical_brackets_nestage PC->brackets_nestages[PC->sp] +inline void ungetc(Parse_control& pc, uint last_line_end_col) { + pc.source--; + if(pc.pos.col==0) { + --pc.pos.line; pc.pos.col=last_line_end_col; + } else + --pc.pos.col; + +} +static int yylex(YYSTYPE *lvalp, void *apc) { + register Parse_control& pc=*static_cast(apc); + + #define lexical_brackets_nestage pc.brackets_nestages[pc.ls_sp] + #define RC {result=c; goto break2; } - register int c; - int result; - char *start; - int start_line; + register int c; + int result; - if(PC->pending_state) { - result=PC->pending_state; - PC->pending_state=0; + if(pc.pending_state) { + result=pc.pending_state; + pc.pending_state=0; return result; } - start=PC->source; - start_line=PC->line; - while(1) { - c=*PC->source++; + const char *begin=pc.source; + Pos begin_pos=pc.pos; + const char *end; + int skip_analized=0; + while(true) { + c=*(end=(pc.source++)); +// fprintf(stderr, "\nchar: %c %02X; nestage: %d, sp=%d", c, c, lexical_brackets_nestage, pc.sp); if(c=='\n') - PC->line++; - - /* escaping: ^^ ^$ ^; ^) ^} ^( ^{ */ + pc.pos_next_line(); + else + pc.pos_next_c(c); +// fprintf(stderr, "\nchar: %c file(%d:%d)", c, pc.pos.line, pc.pos.col); + + if(pc.pos.col==0+1 && c=='@') { + if(pc.ls==LS_DEF_SPECIAL_BODY) { + // @SPECIAL + // ... + // @source; + if(pc.ls==LS_METHOD_AFTER) { + // handle after-method situation + pop_LS(pc); + result=EON; + skip_analized=-1; // return to punctuation afterwards to assure it's literality + goto break2; + } + switch(pc.ls) { +case LS_EXPRESSION_VAR_NAME_WITH_COLON: +case LS_EXPRESSION_VAR_NAME_WITHOUT_COLON: +case LS_VAR_NAME_SIMPLE_WITH_COLON: +case LS_VAR_NAME_SIMPLE_WITHOUT_COLON: +case LS_VAR_NAME_CURLY: +case LS_METHOD_NAME: +case LS_USER_COMMENT: +case LS_DEF_COMMENT: + // no literals in names, please + break; +default: + switch(*pc.source) { + // ^escaping some punctuators + case '^': case '$': case ';': case '@': + case '(': case ')': + case '[': case ']': + case '{': case '}': + case '"': case ':': + if(end!=begin) { + if(!pc.string_start) + pc.string_start=begin_pos; + // append piece till ^ + pc.string.append_strdup_know_length(begin, end-begin); + } + // reset piece 'begin' position & line + begin=pc.source; // ->punctuation + begin_pos=pc.pos; + // skip over _ after ^ + pc.source++; pc.pos.col++; + // skip analysis = forced literal + continue; - if(pending_c == '^' || pending_c == '$' || pending_c == ';' || - pending_c == '(' || pending_c == ')' || - pending_c == '{' || pending_c == '}') { - /* append piece till ^ */ - CSTRING_APPEND(PC->string, start, PC->source-start -1/*^*/, PC->file, start_line); - /* reset piece 'start' position & line */ - start=PC->source+1/*^*/; - start_line=PC->line; - /* skip over ^ and _ */ - PC->source+=2; - /* skip analysis = forced literal */ + // converting ^#HH into char(hex(HH)) + case '#': + if(end!=begin) { + if(!pc.string_start) + pc.string_start=begin_pos; + // append piece till ^ + pc.string.append_strdup_know_length(begin, end-begin); + } + // #HH ? + if(pc.source[1] && isxdigit(pc.source[1]) && pc.source[2] && isxdigit(pc.source[2])) { + char c=(char)( + hex_value[(unsigned char)pc.source[1]]*0x10+ + hex_value[(unsigned char)pc.source[2]]); + if(c==0) { + result=BAD_HEX_LITERAL; + goto break2; // wrong hex value[no ^#00 chars allowed]: bail out + } + // append char(hex(HH)) + pc.string.append(c); + // skip over ^#HH + pc.source+=3; + pc.pos.col+=3; + // reset piece 'begin' position & line + begin=pc.source; // ->after ^#HH + begin_pos=pc.pos; + // skip analysis = forced literal + continue; + } + // just escaped char + // reset piece 'begin' position & line + begin=pc.source; + begin_pos=pc.pos; + // skip over _ after ^ + pc.source++; pc.pos.col++; + // skip analysis = forced literal continue; } + break; + } } - switch(PC->ls) { + // #comment start skipping + if(c=='#' && pc.pos.col==1) { + if(end!=begin) { + if(!pc.string_start) + pc.string_start=begin_pos; + // append piece till # + pc.string.append_strdup_know_length(begin, end-begin); + } + // fall into COMMENT lexical state [wait for \n] + push_LS(pc, LS_USER_COMMENT); + continue; + } + switch(pc.ls) { + + // USER'S = NOT OURS case LS_USER: - if(c=='$') { - push_LS(PC); PC->ls=LS_VAR_NAME_SIMPLE; - result=c; - goto break2; + case LS_NAME_SQUARE_PART: // name.[here].xxx + if(pc.trim_bof) + switch(c) { + case '\n': case ' ': case '\t': + begin=pc.source; + begin_pos=pc.pos; + continue; // skip it + default: + pc.trim_bof=false; + } + switch(c) { + case '$': + push_LS(pc, LS_VAR_NAME_SIMPLE_WITH_COLON); + RC; + case '^': + push_LS(pc, LS_METHOD_NAME); + RC; + case ']': + if(pc.ls==LS_NAME_SQUARE_PART) + if(--lexical_brackets_nestage==0) {// $name.[co<]?>de<]?> + pop_LS(pc); // $name.[co<]>de<]!> + RC; + } + break; + case '[': // $name.[co<[>de] + if(pc.ls==LS_NAME_SQUARE_PART) + lexical_brackets_nestage++; + break; + } + if(pc.explicit_result && c) + switch(c) { + case '\n': case ' ': case '\t': + begin=pc.source; + begin_pos=pc.pos; + continue; // skip it + default: + result=BAD_NONWHITESPACE_CHARACTER_IN_EXPLICIT_RESULT_MODE; + goto break2; + } + break; + + // #COMMENT + case LS_USER_COMMENT: + if(c=='\n') { + // skip comment + begin=pc.source; + begin_pos=pc.pos; + + pop_LS(pc); + continue; } - if(c=='^') { - push_LS(PC); PC->ls=LS_METHOD_NAME; - result=c; - goto break2; + break; + + // STRING IN EXPRESSION + case LS_EXPRESSION_STRING_QUOTED: + case LS_EXPRESSION_STRING_APOSTROFED: + switch(c) { + case '"': + case '\'': + if( + pc.ls == LS_EXPRESSION_STRING_QUOTED && c=='"' || + pc.ls == LS_EXPRESSION_STRING_APOSTROFED && c=='\'') { + pop_LS(pc); //"abc". | 'abc'. + RC; + } + break; + case '$': + push_LS(pc, LS_VAR_NAME_SIMPLE_WITH_COLON); + RC; + case '^': + push_LS(pc, LS_METHOD_NAME); + RC; } break; - /* VAR */ - case LS_VAR_NAME_SIMPLE: - if(c==0 || - c==' '|| c=='\t' || c=='\n' || - c==')' || c=='}') { - pop_LS(PC); - PC->source--; - result=BREAK; - goto break2; + // METHOD DEFINITION + case LS_DEF_NAME: + switch(c) { + case '[': + pc.ls=LS_DEF_PARAMS; + RC; + case '\n': + pc.ls=LS_DEF_SPECIAL_BODY; + RC; } - if(PC->source==start && c=='{') { /* ${name}, no need of BREAK, switching LS */ - PC->ls=LS_VAR_NAME_CURLY; - result=c; - goto break2; + break; + + case LS_DEF_PARAMS: + switch(c) { + case '$': // common error + result=BAD_METHOD_PARAMETER_NAME_CHARACTER; + goto break2; + case ';': + RC; + case ']': + pc.ls=*pc.source=='['?LS_DEF_LOCALS:LS_DEF_COMMENT; + RC; + case '\n': // wrong. bailing out + pop_LS(pc); + RC; } - if(c=='(') { - PC->ls=LS_VAR_ROUND; - lexical_brackets_nestage=1; - result=c; - goto break2; + break; + + case LS_DEF_LOCALS: + switch(c) { + case '[': + case ';': + RC; + case ']': + pc.ls=LS_DEF_COMMENT; + RC; + case '\n': // wrong. bailing out + pop_LS(pc); + RC; + } + break; + + case LS_DEF_COMMENT: + if(c=='\n') { + pop_LS(pc); + RC; } - if(c=='{') { - PC->ls=LS_VAR_CURLY; + break; + + case LS_DEF_SPECIAL_BODY: + if(c=='\n') + RC; + break; + + // (EXPRESSION) + case LS_VAR_ROUND: + case LS_METHOD_ROUND: + switch(c) { + case ')': + if(--lexical_brackets_nestage==0) + if(pc.ls==LS_METHOD_ROUND) // method round param ended + pc.ls=LS_METHOD_AFTER; // look for method end + else // pc.ls==LS_VAR_ROUND // variable constructor ended + pop_LS(pc); // return to normal life + RC; + case '#': // comment start skipping + if(end!=begin) { + if(!pc.string_start) + pc.string_start=begin_pos; + // append piece till # + pc.string.append_strdup_know_length(begin, end-begin); + } + // fall into COMMENT lexical state [wait for \n] + push_LS(pc, LS_EXPRESSION_COMMENT); lexical_brackets_nestage=1; - result=c; + continue; + case '$': + push_LS(pc, LS_EXPRESSION_VAR_NAME_WITH_COLON); + RC; + case '^': + push_LS(pc, LS_METHOD_NAME); + RC; + case '(': + lexical_brackets_nestage++; + RC; + case '-': + switch(*pc.source) { + case 'f': // -f + skip_analized=1; + result=FEXISTS; + goto break2; + case 'd': // -d + skip_analized=1; + result=DEXISTS; + goto break2; + default: // minus + result=c; + goto break2; + } goto break2; - } - if(c=='.'/* name part delim */ || c=='$'/* name part subvar */) { - result=c; + case '+': case '*': case '/': case '%': case '\\': + case '~': + case ';': + RC; + case '&': case '|': + if(*pc.source==c) { // && || + result=c=='&'?LAND:LOR; + skip_analized=1; + } else + result=c; goto break2; - } - break; - case LS_VAR_NAME_CURLY: - if(c=='}') { /* ${name} finished, restoring LS */ - pop_LS(PC); - result=c; + case '!': + switch(pc.source[0]) { + case '|': // !| !|| + skip_analized=1; + if(pc.source[1]=='|') { + skip_analized++; + result=LXOR; + } else + result=NXOR; + goto break2; + case '=': // != + skip_analized=1; + result=NNE; + goto break2; + } + RC; + + case '<': // <<, <=, < + switch(*pc.source) { + case '<': // <[<] + skip_analized=1; result=NSL; break; + case '=': // <[=] + skip_analized=1; result=NLE; break; + default: // <[] + result=c; break; + } goto break2; - } - if(c=='.'/* name part delim */ || c=='$'/*name part subvar*/) { - result=c; + case '>': // >>, >=, > + switch(*pc.source) { + case '>': // >[>] + skip_analized=1; result=NSR; break; + case '=': // >[=] + skip_analized=1; result=NGE; break; + default: // >[] + result=c; break; + } + goto break2; + case '=': // == + switch(*pc.source) { + case '=': // =[=] + skip_analized=1; result=NEQ; break; + default: // =[] + result=c; break; // not used now + } goto break2; + + case '"': + push_LS(pc, LS_EXPRESSION_STRING_QUOTED); + RC; + case '\'': + push_LS(pc, LS_EXPRESSION_STRING_APOSTROFED); + RC; + case 'l': case 'g': case 'e': case 'n': + if(end==begin) // right after whitespace + if(isspace(pc.source[1])) { + switch(*pc.source) { + // case '?': // ok [and bad cases, yacc would bark at them] + case 't': // lt gt [et nt] + result=c=='l'?SLT:c=='g'?SGT:BAD_STRING_COMPARISON_OPERATOR; + skip_analized=1; + goto break2; + case 'e': // le ge ne [ee] + result=c=='l'?SLE:c=='g'?SGE:c=='n'?SNE:BAD_STRING_COMPARISON_OPERATOR; + skip_analized=1; + goto break2; + case 'q': // eq [lq gq nq] + result=c=='e'?SEQ:BAD_STRING_COMPARISON_OPERATOR; + skip_analized=1; + goto break2; + } + } + break; + case 'i': + if(end==begin) // right after whitespace + if(isspace(pc.source[1])) { + switch(pc.source[0]) { + case 'n': // in + skip_analized=1; + result=IN; + goto break2; + case 's': // is + skip_analized=1; + result=IS; + goto break2; + } + } + break; + case 'd': + if(end==begin) // right after whitespace + if(pc.source[0]=='e' && pc.source[1]=='f') { // def + switch(pc.source[2]){ + case ' ': case '\t': case '\n': case '"': case '\'': case '^': case '$': // non-quoted string without whitespace after 'def' is not allowed + skip_analized=2; + result=DEF; + goto break2; + } + // error: incorrect char after 'def' + } + break; + case 't': + if(end==begin) // right after whitespace + if(pc.source[0]=='r' && pc.source[1]=='u' && pc.source[2]=='e') { // true + skip_analized=3; + result=LITERAL_TRUE; + goto break2; + } + break; + case 'f': + if(end==begin) // right after whitespace + if(pc.source[0]=='a' && pc.source[1]=='l' && pc.source[2]=='s' && pc.source[3]=='e') { // false + skip_analized=4; + result=LITERAL_FALSE; + goto break2; + } + break; + case ' ': case '\t': case '\n': + if(end!=begin) { // there were a string after previous operator? + result=0; // return that string + goto break2; + } + // that's a leading|traling space or after-operator-space + // ignoring it + // reset piece 'begin' position & line + begin=pc.source; // after whitespace char + begin_pos=pc.pos; + continue; } break; - case LS_VAR_ROUND: - if(c=='$') { - push_LS(PC); PC->ls=LS_VAR_NAME_SIMPLE; - result=c; - goto break2; + case LS_EXPRESSION_COMMENT: + if(c=='(') + lexical_brackets_nestage++; + + switch(*pc.source) { + case '\n': case ')': + if(*pc.source==')') + if(--lexical_brackets_nestage!=0) + continue; + + // skip comment + begin=pc.source; + begin_pos=pc.pos; + + pop_LS(pc); + continue; } - if(c=='^') { - push_LS(PC); PC->ls=LS_METHOD_NAME; - result=c; - goto break2; + break; + + // VARIABLE GET/PUT/WITH + case LS_VAR_NAME_SIMPLE_WITH_COLON: + case LS_VAR_NAME_SIMPLE_WITHOUT_COLON: + case LS_EXPRESSION_VAR_NAME_WITH_COLON: + case LS_EXPRESSION_VAR_NAME_WITHOUT_COLON: + if( + pc.ls==LS_EXPRESSION_VAR_NAME_WITH_COLON || + pc.ls==LS_EXPRESSION_VAR_NAME_WITHOUT_COLON) { + // name in expr ends also before + switch(c) { + // expression minus + case '-': + // expression integer division + case '\\': + pop_LS(pc); + pc.ungetc(); + result=EON; + goto break2; + } } - if(c==')') { - if(--lexical_brackets_nestage==0) { - pop_LS(PC); - result=c; + if( + pc.ls==LS_VAR_NAME_SIMPLE_WITHOUT_COLON || + pc.ls==LS_EXPRESSION_VAR_NAME_WITHOUT_COLON) { + // name already has ':', stop before next + switch(c) { + case ':': + pop_LS(pc); + pc.ungetc(); + result=EON; goto break2; } } - if(c==';'/* operator_or_fmt;value delim */) { - result=c; - goto break2; + switch(c) { + case 0: + case ' ': case '\t': case '\n': + case ';': + case ']': case '}': case ')': + case '"': case '\'': + case '<': case '>': // these stand for HTML brackets AND expression binary ops + case '+': case '*': case '/': case '%': + case '&': case '|': + case '=': case '!': + // common delimiters + case ',': case '?': case '#': + // mysql column separators + case '`': + // before call + case '^': + pop_LS(pc); + pc.ungetc(); + result=EON; + goto break2; + case '[': + // $name.<[>code] + if(pc.pos.col>1/*not first column*/ && ( + end[-1]=='$'/*was start of get*/ || + end[-1]==':'/*was class name delim */ || + end[-1]=='.'/*was name delim */ + )) { + push_LS(pc, LS_NAME_SQUARE_PART); + lexical_brackets_nestage=1; + RC; + } + pc.ls=LS_VAR_SQUARE; + lexical_brackets_nestage=1; + RC; + case '{': + if(begin==end) { // ${name}, no need of EON, switching LS + pc.ls=LS_VAR_NAME_CURLY; + } else { + pc.ls=LS_VAR_CURLY; + lexical_brackets_nestage=1; + } + + RC; + case '(': + pc.ls=LS_VAR_ROUND; + lexical_brackets_nestage=1; + RC; + case '.': // name part delim + case '$': // name part subvar + case ':': // class<:>name + // go to _WITHOUT_COLON state variant... + if(pc.ls==LS_VAR_NAME_SIMPLE_WITH_COLON) + pc.ls=LS_VAR_NAME_SIMPLE_WITHOUT_COLON; + else if(pc.ls==LS_EXPRESSION_VAR_NAME_WITH_COLON) + pc.ls=LS_EXPRESSION_VAR_NAME_WITHOUT_COLON; + // ...stop before next ':' + RC; } - if(c=='(') - lexical_brackets_nestage++; break; - case LS_VAR_CURLY: - if(c=='$') { - push_LS(PC); PC->ls=LS_VAR_NAME_SIMPLE; - result=c; - goto break2; + + case LS_VAR_NAME_CURLY: + switch(c) { + case '[': + // ${name.<[>code]} + push_LS(pc, LS_NAME_SQUARE_PART); + lexical_brackets_nestage=1; + RC; + case '}': // ${name} finished, restoring LS + pop_LS(pc); + RC; + case '.': // name part delim + case '$': // name part subvar + case ':': // ':name' or 'class:name' + RC; } - if(c=='^') { - push_LS(PC); PC->ls=LS_METHOD_NAME; - result=c; - goto break2; + break; + + case LS_VAR_SQUARE: + switch(c) { + case '$': + push_LS(pc, LS_VAR_NAME_SIMPLE_WITH_COLON); + RC; + case '^': + push_LS(pc, LS_METHOD_NAME); + RC; + case ']': + if(--lexical_brackets_nestage==0) { + pop_LS(pc); + RC; + } + break; + case ';': // operator_or_fmt;value delim + RC; + case '[': + lexical_brackets_nestage++; + break; } - if(c=='}') + break; + + case LS_VAR_CURLY: + switch(c) { + case '$': + push_LS(pc, LS_VAR_NAME_SIMPLE_WITH_COLON); + RC; + case '^': + push_LS(pc, LS_METHOD_NAME); + RC; + case '}': if(--lexical_brackets_nestage==0) { - pop_LS(PC); - result=c; - goto break2; + pop_LS(pc); + RC; } - if(c=='{') + break; + case '{': lexical_brackets_nestage++; + break; + } break; - /* METHOD */ + // METHOD CALL case LS_METHOD_NAME: - if(c=='(') { - PC->ls=LS_METHOD_ROUND; + switch(c) { + case '[': + // ^name.<[>code].xxx + if(pc.pos.col>1/*not first column*/ && ( + end[-1]=='^'/*was start of call*/ || // never, ^[ is literal... + end[-1]==':'/*was class name delim */ || + end[-1]=='.'/*was name delim */ + )) { + push_LS(pc, LS_NAME_SQUARE_PART); + lexical_brackets_nestage=1; + RC; + } + pc.ls=LS_METHOD_SQUARE; lexical_brackets_nestage=1; - result=c; - goto break2; - } - if(c=='{') { - PC->ls=LS_METHOD_CURLY; + RC; + case '{': + pc.ls=LS_METHOD_CURLY; lexical_brackets_nestage=1; - result=c; - goto break2; - } - if(c=='.'/* name part delim */ || c=='$'/* name part subvar */) { - result=c; - goto break2; + RC; + case '(': + pc.ls=LS_METHOD_ROUND; + lexical_brackets_nestage=1; + RC; + case '.': // name part delim + case '$': // name part subvar + case ':': // ':name' or 'class:name' + case '^': // ^abc^xxx wrong. bailing out + case ']': case '}': case ')': // ^abc]}) wrong. bailing out + case ' ': // ^if ( wrong. bailing out + RC; } break; - case LS_METHOD_ROUND: - if(c=='$') { - push_LS(PC); PC->ls=LS_VAR_NAME_SIMPLE; - result=c; - goto break2; - } - if(c=='^') { - push_LS(PC); PC->ls=LS_METHOD_NAME; - result=c; - goto break2; - } - if(c==';'/* param delim */) { - result=c; - goto break2; - } - if(c==')') + + case LS_METHOD_SQUARE: + switch(c) { + case '$': + push_LS(pc, LS_VAR_NAME_SIMPLE_WITH_COLON); + RC; + case '^': + push_LS(pc, LS_METHOD_NAME); + RC; + case ';': // param delim + RC; + case ']': if(--lexical_brackets_nestage==0) { - PC->ls=LS_METHOD_AFTER; - result=c; - goto break2; + pc.ls=LS_METHOD_AFTER; + RC; } - if(c=='(') + break; + case '[': lexical_brackets_nestage++; + break; + } break; + case LS_METHOD_CURLY: - if(c=='$') { - push_LS(PC); PC->ls=LS_VAR_NAME_SIMPLE; - result=c; - goto break2; - } - if(c=='^') { - push_LS(PC); PC->ls=LS_METHOD_NAME; - result=c; - goto break2; - } - if(c=='}') + switch(c) { + case '$': + push_LS(pc, LS_VAR_NAME_SIMPLE_WITH_COLON); + RC; + case '^': + push_LS(pc, LS_METHOD_NAME); + RC; + case ';': // param delim + RC; + case '}': if(--lexical_brackets_nestage==0) { - PC->ls=LS_METHOD_AFTER; - result=c; - goto break2; + pc.ls=LS_METHOD_AFTER; + RC; } - if(c=='{') + break; + case '{': lexical_brackets_nestage++; + break; + } + if(pc.explicit_result && c) + switch(c) { + case '\n': case ' ': case '\t': + begin=pc.source; + begin_pos=pc.pos; + continue; // skip it + default: + result=BAD_NONWHITESPACE_CHARACTER_IN_EXPLICIT_RESULT_MODE; + goto break2; + } break; + case LS_METHOD_AFTER: - if(c=='(') {/* )( }( */ - PC->ls=LS_METHOD_ROUND; + if(c=='[') {/* ][ }[ )[ */ + pc.ls=LS_METHOD_SQUARE; lexical_brackets_nestage=1; - result=c; - goto break2; + RC; } - if(c=='{') {/* ){ }{ */ - PC->ls=LS_METHOD_CURLY; + if(c=='{') {/* ]{ }{ ){ */ + pc.ls=LS_METHOD_CURLY; lexical_brackets_nestage=1; - result=c; - goto break2; + RC; } - pop_LS(PC); - PC->source--; - result=BREAK; + if(c=='(') {/* ]( }( )( */ + pc.ls=LS_METHOD_ROUND; + lexical_brackets_nestage=1; + RC; + } + pop_LS(pc); + pc.ungetc(); + result=EON; goto break2; } - if (c == 0) { + if(c==0) { result=-1; break; } } break2: - if(PC->source-1<=start) - return result; - else { - PC->pending_state=result; - /* append last piece */ - CSTRING_APPEND(PC->string, start, PC->source-start-1, PC->file, start_line); - /* create STR_LITERAL value: array of OP_STRING+string */ - *lvalp=L(PC->string); - /* new pieces storage */ - PC->string=string_create(PC->pool); - /* go */ - return STR_LITERAL; + if(end!=begin) { // there is last piece? + if((c=='@' || c==0) && end[-1]=='\n') { // we are before LS_DEF_NAME or EOF? + // strip last \n + end--; + if(end!=begin && end[-1]=='\n') // allow one empty line before LS_DEF_NAME + end--; + } + if(end!=begin && pc.ls!=LS_USER_COMMENT) { // last piece still alive and not comment? + if(!pc.string_start) + pc.string_start=begin_pos; + // append it + pc.string.append_strdup_know_length(begin, end-begin); + } + } + if(!pc.string.is_empty()) { // something accumulated? + // create STRING value: array of OP_VALUE+origin+vstring + *lvalp=VL( + new VString(*new String(pc.string, String::L_CLEAN)), + pc.file_no, pc.string_start.line, pc.string_start.col); + // new pieces storage + pc.string.clear(); + pc.string_start.clear(); + // make current result be pending for next call, return STRING for now + pc.pending_state=result; result=STRING; + } + if(skip_analized) { + pc.source+=skip_analized; pc.pos.col+=skip_analized; } + return result; } -int yyerror (char *s) /* Called by yyparse on error */ - { - printf ("[%s]\n", s); +static int real_yyerror(Parse_control *pc, char *s) { // Called by yyparse on error + strncpy(PC.error, s, MAX_STRING); return 1; - } - -static void - yyprint ( - FILE *file, - int type, - YYSTYPE value) - { - if (type == STR_LITERAL) - fprintf (file, " \"%s\"", string_cstr(LS(value))); - } +} +static void yyprint(FILE *file, int type, YYSTYPE value) { + if(type==STRING) + fprintf(file, " \"%s\"", LA2S(*value)->cstr()); +}