|
|
| version 1.4, 2006/11/01 13:56:34 | version 1.17, 2008/07/23 14:08:26 |
|---|---|
| Line 19 static const char * const IDENT_HTTP_C=" | Line 19 static const char * const IDENT_HTTP_C=" |
| #define HTTP_BODY_NAME "body" | #define HTTP_BODY_NAME "body" |
| #define HTTP_TIMEOUT_NAME "timeout" | #define HTTP_TIMEOUT_NAME "timeout" |
| #define HTTP_HEADERS_NAME "headers" | #define HTTP_HEADERS_NAME "headers" |
| #define HTTP_COOKIES_NAME "cookies" | |
| #define HTTP_ANY_STATUS_NAME "any-status" | #define HTTP_ANY_STATUS_NAME "any-status" |
| #define HTTP_CHARSET_NAME "charset" | #define HTTP_OMIT_POST_CHARSET "omit-post-charset" // ^file::load[...;http://...;$.form[...]$.method[post]] |
| // by default add charset to content-type | |
| #define HTTP_TABLES_NAME "tables" | #define HTTP_TABLES_NAME "tables" |
| #define HTTP_USER "user" | #define HTTP_USER "user" |
| #define HTTP_PASSWORD "password" | #define HTTP_PASSWORD "password" |
| Line 283 struct Http_pass_header_info { | Line 287 struct Http_pass_header_info { |
| Request_charsets* charsets; | Request_charsets* charsets; |
| String* request; | String* request; |
| bool user_agent_specified; | bool user_agent_specified; |
| bool content_type_specified; | |
| }; | }; |
| #endif | #endif |
| static void http_pass_header(HashStringValue::key_type key, | static void http_pass_header(HashStringValue::key_type name, |
| HashStringValue::value_type value, | HashStringValue::value_type value, |
| Http_pass_header_info *info) { | Http_pass_header_info *info) { |
| *info->request <<key<<": " | |
| << attributed_meaning_to_string(*value, String::L_HTTP_HEADER, false) | |
| << CRLF; | |
| if(String(key, String::L_TAINTED).change_case(info->charsets->source(), String::CC_UPPER)=="USER-AGENT") | |
| info->user_agent_specified=true; | |
| } | |
| String aname=String(name, String::L_URI); | |
| static Charset* detect_charset(Charset& source_charset, const String& content_type_value) { | *info->request <<aname<<": " |
| const String::Body CONTENT_TYPE_VALUE= | << attributed_meaning_to_string(*value, String::L_URI, false) |
| content_type_value.change_case(source_charset, String::CC_UPPER); | << CRLF; |
| // content-type: xxx/xxx; source_charset=WE-NEED-THIS | |
| // content-type: xxx/xxx; source_charset="WE-NEED-THIS" | const String::Body name_upper=aname.change_case(info->charsets->source(), String::CC_UPPER); |
| // content-type: xxx/xxx; source_charset="WE-NEED-THIS"; | if(name_upper==HTTP_USER_AGENT) |
| size_t before_charseteq_pos=CONTENT_TYPE_VALUE.pos("CHARSET="); | info->user_agent_specified=true; |
| if(before_charseteq_pos!=STRING_NOT_FOUND) { | if(name_upper==HTTP_CONTENT_TYPE) |
| size_t charset_begin=before_charseteq_pos+8/*CHARSET="*/; | info->content_type_specified=true; |
| size_t open_quote_pos=CONTENT_TYPE_VALUE.pos('"', charset_begin); | } |
| bool quoted=open_quote_pos==charset_begin; | |
| if(quoted) | |
| charset_begin++; // skip opening '"' | |
| size_t charset_end=CONTENT_TYPE_VALUE.length(); | |
| if(quoted) { | |
| size_t close_quote_pos=CONTENT_TYPE_VALUE.pos('"', charset_begin); | |
| if(close_quote_pos!=STRING_NOT_FOUND) | |
| charset_end=close_quote_pos; | |
| } else { | |
| size_t delim_pos=CONTENT_TYPE_VALUE.pos(';', charset_begin); | |
| if(delim_pos!=STRING_NOT_FOUND) | |
| charset_end=delim_pos; | |
| } | |
| const String::Body CHARSET_NAME_BODY= | |
| CONTENT_TYPE_VALUE.mid(charset_begin, charset_end - charset_begin); | |
| return &charsets.get(CHARSET_NAME_BODY); | static void http_pass_cookie(HashStringValue::key_type name, |
| } | HashStringValue::value_type value, |
| Http_pass_header_info *info) { | |
| *info->request << String(name, String::L_HTTP_COOKIE) << "=" | |
| << attributed_meaning_to_string(*value, String::L_HTTP_COOKIE, false) | |
| << "; "; | |
| return 0; | |
| } | } |
| static const String* basic_authorization_field(const char* user, const char* pass) { | static const String* basic_authorization_field(const char* user, const char* pass) { |
| Line 380 static void form_value2string( | Line 368 static void form_value2string( |
| new String(key, String::L_TAINTED), | new String(key, String::L_TAINTED), |
| "is %s, "HTTP_FORM_NAME" option value must either string or table", value->type()); | "is %s, "HTTP_FORM_NAME" option value must either string or table", value->type()); |
| } | } |
| const char* pa_form2string(HashStringValue& form) { | const char* pa_form2string(HashStringValue& form, Request_charsets& charsets) { |
| String string; | String string; |
| form.for_each<String*>(form_value2string, &string); | form.for_each<String*>(form_value2string, &string); |
| return string.cstr(String::L_UNSPECIFIED); | return string.cstr(String::L_UNSPECIFIED, 0, &charsets); |
| } | } |
| static void find_headers_end(char* p, | static void find_headers_end(char* p, |
| char*& headers_end_at, | char*& headers_end_at, |
| Line 408 static void find_headers_end(char* p, | Line 396 static void find_headers_end(char* p, |
| File_read_http_result pa_internal_file_read_http(Request_charsets& charsets, | File_read_http_result pa_internal_file_read_http(Request_charsets& charsets, |
| const String& file_spec, | const String& file_spec, |
| bool as_text, | bool as_text, |
| HashStringValue *options) { | HashStringValue *options, |
| bool transcode_text_result) { | |
| File_read_http_result result; | File_read_http_result result; |
| char host[MAX_STRING]; | char host[MAX_STRING]; |
| const char* uri; | const char* uri; |
| short port; | short port; |
| const char* method="GET"; bool method_is_get; | const char* method="GET"; |
| HashStringValue* form=0; | HashStringValue* form=0; |
| const char* body_cstr=0; | const char* body_cstr=0; |
| int timeout_secs=2; | int timeout_secs=2; |
| bool fail_on_status_ne_200=true; | bool fail_on_status_ne_200=true; |
| bool omit_post_charset=false; | |
| Value* vheaders=0; | Value* vheaders=0; |
| Value* vcookies=0; | |
| Value* vbody=0; | |
| Charset *asked_remote_charset=0; | Charset *asked_remote_charset=0; |
| const char* user_cstr=0; | const char* user_cstr=0; |
| const char* password_cstr=0; | const char* password_cstr=0; |
| Line 434 File_read_http_result pa_internal_file_r | Line 426 File_read_http_result pa_internal_file_r |
| valid_options++; | valid_options++; |
| form=vform->get_hash(); | form=vform->get_hash(); |
| } | } |
| if(Value* vbody=options->get(HTTP_BODY_NAME)) { | if(vbody=options->get(HTTP_BODY_NAME)) { |
| valid_options++; | valid_options++; |
| body_cstr=vbody->as_string().cstr(String::L_UNSPECIFIED); | |
| } | } |
| if(Value* vtimeout=options->get(HTTP_TIMEOUT_NAME)) { | if(Value* vtimeout=options->get(HTTP_TIMEOUT_NAME)) { |
| valid_options++; | valid_options++; |
| timeout_secs=vtimeout->as_int(); | timeout_secs=vtimeout->as_int(); |
| } | } |
| if((vheaders=options->get(HTTP_HEADERS_NAME))) { | if(vheaders=options->get(HTTP_HEADERS_NAME)) { |
| valid_options++; | |
| } | |
| if(vcookies=options->get(HTTP_COOKIES_NAME)) { | |
| valid_options++; | valid_options++; |
| } | } |
| if(Value* vany_status=options->get(HTTP_ANY_STATUS_NAME)) { | if(Value* vany_status=options->get(HTTP_ANY_STATUS_NAME)) { |
| valid_options++; | valid_options++; |
| fail_on_status_ne_200=!vany_status->as_bool(); | fail_on_status_ne_200=!vany_status->as_bool(); |
| } | } |
| if(Value* vcharset_name=options->get(HTTP_CHARSET_NAME)) { | if(Value* vomit_post_charset=options->get(HTTP_OMIT_POST_CHARSET)){ |
| valid_options++; | valid_options++; |
| omit_post_charset=vomit_post_charset->as_bool(); | |
| } | |
| if(Value* vcharset_name=options->get(PA_CHARSET_NAME)) { | |
| asked_remote_charset=&::charsets.get(vcharset_name->as_string(). | asked_remote_charset=&::charsets.get(vcharset_name->as_string(). |
| change_case(charsets.source(), String::CC_UPPER)); | change_case(charsets.source(), String::CC_UPPER)); |
| } | } |
| Line 464 File_read_http_result pa_internal_file_r | Line 461 File_read_http_result pa_internal_file_r |
| } | } |
| if(valid_options!=options->count()) | if(valid_options!=options->count()) |
| throw Exception("parser.runtime", | throw Exception(PARSER_RUNTIME, |
| 0, | 0, |
| "invalid option passed"); | "invalid option passed"); |
| } | } |
| if(!asked_remote_charset) // defaulting to $request:charset | if(!asked_remote_charset) // defaulting to $request:charset |
| asked_remote_charset=&charsets.source(); | asked_remote_charset=&charsets.source(); |
| method_is_get=strcmp(method, "GET")==0; | bool method_is_get=strcmp(method, "GET")==0; |
| if(method_is_get && body_cstr) | if(vbody){ |
| throw Exception("parser.runtime", | if(method_is_get) |
| 0, | throw Exception(PARSER_RUNTIME, |
| "you can not use $."HTTP_BODY_NAME" option with method GET"); | 0, |
| "you can not use $."HTTP_BODY_NAME" option with method GET"); | |
| if(form) | |
| throw Exception(PARSER_RUNTIME, | |
| 0, | |
| "you can not use options $."HTTP_BODY_NAME" and $."HTTP_FORM_NAME" together"); | |
| } | |
| //preparing request | //preparing request |
| String& connect_string=*new String; | String& connect_string=*new String; |
| Line 487 File_read_http_result pa_internal_file_r | Line 491 File_read_http_result pa_internal_file_r |
| // influence URLencoding of tainted pieces to String::L_URI lang | // influence URLencoding of tainted pieces to String::L_URI lang |
| Temp_client_charset temp(charsets, *asked_remote_charset); | Temp_client_charset temp(charsets, *asked_remote_charset); |
| const char* connect_string_cstr=connect_string.cstr(String::L_UNSPECIFIED); | const char* connect_string_cstr=connect_string.cstr(String::L_UNSPECIFIED, 0, &charsets); |
| const char* current=connect_string_cstr; | const char* current=connect_string_cstr; |
| if(strncmp(current, "http://", 7)!=0) | if(strncmp(current, "http://", 7)!=0) |
| Line 505 File_read_http_result pa_internal_file_r | Line 509 File_read_http_result pa_internal_file_r |
| bool uri_has_query_string=strchr(uri, '?')!=0; | bool uri_has_query_string=strchr(uri, '?')!=0; |
| //making request head | // making request head |
| String head; | String head; |
| head << method; | head << method << " " << uri; |
| head << " " << uri; | if(form && method_is_get) |
| if(form) | head << (uri_has_query_string?"&":"?") << pa_form2string(*form, charsets); |
| if(method_is_get) | |
| head << (uri_has_query_string?"&":"?") << pa_form2string(*form); | head <<" HTTP/1.0" CRLF "host: "<< host << CRLF; |
| head <<" HTTP/1.0" CRLF | |
| "host: "<< host << CRLF; | if(form && !method_is_get) { // POST |
| if(form && !method_is_get) { | head << "content-type: " << HTTP_CONTENT_TYPE_FORM_URLENCODED; |
| head << "content-type: application/x-www-form-urlencoded" CRLF; | if(!omit_post_charset) |
| body_cstr = pa_form2string(*form); | head << "; charset=" << asked_remote_charset->NAME_CSTR() << ";"; |
| head << CRLF; | |
| body_cstr=pa_form2string(*form, charsets); | |
| } else if (vbody) { | |
| body_cstr=vbody->as_string().cstr(String::L_UNSPECIFIED, 0, &charsets); | |
| // needed for transcoded $.body[] first of all | |
| body_cstr=Charset::transcode( | |
| String::C(body_cstr, strlen(body_cstr)), | |
| charsets.source(), | |
| *asked_remote_charset | |
| ); | |
| } | } |
| // http://www.ietf.org/rfc/rfc2617.txt | // http://www.ietf.org/rfc/rfc2617.txt |
| Line 524 File_read_http_result pa_internal_file_r | Line 538 File_read_http_result pa_internal_file_r |
| head<<"authorization: "<<*authorization_field_value<<CRLF; | head<<"authorization: "<<*authorization_field_value<<CRLF; |
| bool user_agent_specified=false; | bool user_agent_specified=false; |
| bool content_type_specified=false; | |
| if(vheaders && !vheaders->is_string()) { // allow empty | if(vheaders && !vheaders->is_string()) { // allow empty |
| if(HashStringValue *headers=vheaders->get_hash()) { | if(HashStringValue *headers=vheaders->get_hash()) { |
| Http_pass_header_info info={&charsets, &head, false}; | Http_pass_header_info info={&charsets, &head, false}; |
| headers->for_each<Http_pass_header_info*>(http_pass_header, &info); | headers->for_each<Http_pass_header_info*>(http_pass_header, &info); |
| user_agent_specified=info.user_agent_specified; | user_agent_specified=info.user_agent_specified; |
| content_type_specified=info.content_type_specified; | |
| } else | } else |
| throw Exception("parser.runtime", | throw Exception(PARSER_RUNTIME, |
| &connect_string, | &connect_string, |
| "headers param must be hash"); | "headers param must be hash"); |
| }; | }; |
| if(!user_agent_specified) // defaulting | if(!user_agent_specified) // defaulting |
| head << "user-agent: " DEFAULT_USER_AGENT CRLF; | head << "user-agent: " DEFAULT_USER_AGENT CRLF; |
| if(body_cstr) { | if(form && !method_is_get && content_type_specified) // POST + form + content-type was specified |
| // recode those pieces which are not in String::L_URI lang | throw Exception(PARSER_RUNTIME, |
| // [those violating HTTP standard, but widly used] | &connect_string, |
| body_cstr=Charset::transcode( | "$.content-type can't be specified with method POST"); |
| String::C(body_cstr, strlen(body_cstr)), | |
| charsets.source(), | if(vcookies && !vcookies->is_string()){ // allow empty |
| *asked_remote_charset); | if(HashStringValue* cookies=vcookies->get_hash()) { |
| head << "cookie: "; | |
| Http_pass_header_info info={&charsets, &head, false}; | |
| cookies->for_each<Http_pass_header_info*>(http_pass_cookie, &info); | |
| head << CRLF; | |
| } else | |
| throw Exception(PARSER_RUNTIME, | |
| &connect_string, | |
| "cookies param must be hash"); | |
| } | |
| if(body_cstr) { | |
| head << "content-length: " << format(strlen(body_cstr), "%u") << CRLF; | head << "content-length: " << format(strlen(body_cstr), "%u") << CRLF; |
| } | } |
| const char* head_cstr=head.cstr(String::L_UNSPECIFIED); | const char* head_cstr=head.cstr(String::L_UNSPECIFIED, 0, &charsets); |
| // recode those pieces which are not in String::L_URI lang | |
| // [those violating HTTP standard, but widly used] | |
| head_cstr=Charset::transcode( | |
| String::C(head_cstr, strlen(head_cstr)), | |
| charsets.source(), | |
| *asked_remote_charset); | |
| // head + end of header | // head + end of header |
| request_head_and_body << head_cstr << CRLF; | request_head_and_body << head_cstr << CRLF; |
| // body | // body |
| if(body_cstr) | if(body_cstr) |
| request_head_and_body << body_cstr; | request_head_and_body << body_cstr; |
| Line 603 File_read_http_result pa_internal_file_r | Line 623 File_read_http_result pa_internal_file_r |
| throw Exception("http.response", | throw Exception("http.response", |
| &connect_string, | &connect_string, |
| "bad response from host - bad header \"%s\"", line.cstr()); | "bad response from host - bad header \"%s\"", line.cstr()); |
| const String::Body HEADER_NAME= | const String::Body HEADER_NAME=line.mid(0, pos).change_case(charsets.source(), String::CC_UPPER); |
| line.mid(0, pos).change_case(charsets.source(), String::CC_UPPER); | const String& HEADER_VALUE=line.mid(pos+1, line.length()).trim(String::TRIM_BOTH, " \t\r"); |
| const String& header_value=line.mid(pos+1, line.length()).trim(String::TRIM_BOTH, " \t\r"); | if(as_text && HEADER_NAME==HTTP_CONTENT_TYPE) |
| if(as_text && HEADER_NAME=="CONTENT-TYPE") | real_remote_charset=detect_charset(charsets.source(), HEADER_VALUE); |
| real_remote_charset=detect_charset(charsets.source(), header_value); | |
| // tables | // tables |
| { | { |
| Line 619 File_read_http_result pa_internal_file_r | Line 638 File_read_http_result pa_internal_file_r |
| table=valready->get_table(); | table=valready->get_table(); |
| } else { | } else { |
| // first appearence | // first appearence |
| Table::columns_type columns =new ArrayString(1); | Table::columns_type columns=new ArrayString(1); |
| *columns+=new String("value"); | *columns+=new String("value"); |
| table=new Table(columns); | table=new Table(columns); |
| } | } |
| // this string becomes next row | // this string becomes next row |
| ArrayString& row=*new ArrayString(1); | ArrayString& row=*new ArrayString(1); |
| row+=&header_value; | row+=&HEADER_VALUE; |
| *table+=&row; | *table+=&row; |
| // not existed before? add it | // not existed before? add it |
| if(!existed) | if(!existed) |
| tables.put(HEADER_NAME, new VTable(table)); | tables.put(HEADER_NAME, new VTable(table)); |
| } | } |
| result.headers->put(HEADER_NAME, new VString(header_value)); | result.headers->put(HEADER_NAME, new VString(HEADER_VALUE)); |
| } | } |
| } | } |
| if(as_text && raw_body_size>=3 && strncmp(raw_body, "\xEF\xBB\xBF", 3)==0){ | |
| // skip UTF-8 signature: EF BB BF (BOM code) | |
| raw_body+=3; | |
| raw_body_size-=3; | |
| } | |
| // output response | // output response |
| String::C real_body=String::C(raw_body, raw_body_size); | String::C real_body=String::C(raw_body, raw_body_size); |
| if(as_text && raw_body_size) { // must be checked because transcode returns CONST string in case length==0, which contradicts hacking few lines below | |
| if(as_text && transcode_text_result && raw_body_size) { // raw_body_size must be checked because transcode returns CONST string in case length==0, which contradicts hacking few lines below | |
| // defaulting to used-asked charset [it's never empty!] | // defaulting to used-asked charset [it's never empty!] |
| if(!real_remote_charset) | if(!real_remote_charset) |
| real_remote_charset=asked_remote_charset; | real_remote_charset=asked_remote_charset; |
| real_body=Charset::transcode(real_body, *real_remote_charset, charsets.source()); | real_body=Charset::transcode(real_body, *real_remote_charset, charsets.source()); |
| } | } |
| result.str=const_cast<char *>(real_body.str); // hacking a little | result.str=const_cast<char *>(real_body.str); // hacking a little |
| result.length=real_body.length; | result.length=real_body.length; |
| result.headers->put(file_status_name, new VInt(status_code)); | result.headers->put(file_status_name, new VInt(status_code)); |
| return result; | return result; |
| } | } |