|
|
| version 1.50, 2010/12/29 12:17:58 | version 1.65, 2015/04/30 17:37:43 |
|---|---|
| Line 1 | Line 1 |
| /** @file | /** @file |
| Parser: http support functions. | Parser: http support functions. |
| Copyright(c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com) | Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com) |
| Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru) | Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru) |
| */ | */ |
| static const char * const IDENT_HTTP_C="$Date$"; | |
| #include "pa_http.h" | #include "pa_http.h" |
| #include "pa_common.h" | #include "pa_common.h" |
| #include "pa_charsets.h" | #include "pa_charsets.h" |
| Line 15 static const char * const IDENT_HTTP_C=" | Line 13 static const char * const IDENT_HTTP_C=" |
| #include "pa_vfile.h" | #include "pa_vfile.h" |
| #include "pa_random.h" | #include "pa_random.h" |
| volatile const char * IDENT_PA_HTTP_C="$Id$" IDENT_PA_HTTP_H; | |
| #ifdef _MSC_VER | |
| #include <windows.h> | |
| #else | |
| #define closesocket close | |
| #endif | |
| // defines | // defines |
| #define HTTP_METHOD_NAME "method" | #define HTTP_METHOD_NAME "method" |
| Line 22 static const char * const IDENT_HTTP_C=" | Line 28 static const char * const IDENT_HTTP_C=" |
| #define HTTP_BODY_NAME "body" | #define HTTP_BODY_NAME "body" |
| #define HTTP_TIMEOUT_NAME "timeout" | #define HTTP_TIMEOUT_NAME "timeout" |
| #define HTTP_HEADERS_NAME "headers" | #define HTTP_HEADERS_NAME "headers" |
| #define HTTP_COOKIES_NAME "cookies" | |
| #define HTTP_FORM_ENCTYPE_NAME "enctype" | #define HTTP_FORM_ENCTYPE_NAME "enctype" |
| #define HTTP_ANY_STATUS_NAME "any-status" | #define HTTP_ANY_STATUS_NAME "any-status" |
| #define HTTP_OMIT_POST_CHARSET_NAME "omit-post-charset" // ^file::load[...;http://...;$.form[...]$.method[post]] | #define HTTP_OMIT_POST_CHARSET_NAME "omit-post-charset" // ^file::load[...;http://...;$.method[post]] by default adds charset to content-type |
| // by default add charset to content-type | |
| #define HTTP_TABLES_NAME "tables" | #define HTTP_TABLES_NAME "tables" |
| Line 35 static const char * const IDENT_HTTP_C=" | Line 39 static const char * const IDENT_HTTP_C=" |
| #define DEFAULT_USER_AGENT "parser3" | #define DEFAULT_USER_AGENT "parser3" |
| # ifndef INADDR_NONE | #ifndef INADDR_NONE |
| # define INADDR_NONE ((ulong) -1) | #define INADDR_NONE ((ulong) -1) |
| # endif | #endif |
| #undef CRLF | #undef CRLF |
| #define CRLF "\r\n" | #define CRLF "\r\n" |
| // helpers | |
| class Cookies_table_template_columns: public ArrayString { | |
| public: | |
| Cookies_table_template_columns() { | |
| *this+=new String("name"); | |
| *this+=new String("value"); | |
| *this+=new String("expires"); | |
| *this+=new String("max-age"); | |
| *this+=new String("domain"); | |
| *this+=new String("path"); | |
| *this+=new String("httponly"); | |
| *this+=new String("secure"); | |
| } | |
| }; | |
| static bool set_addr(struct sockaddr_in *addr, const char* host, const short port){ | static bool set_addr(struct sockaddr_in *addr, const char* host, const short port){ |
| memset(addr, 0, sizeof(*addr)); | memset(addr, 0, sizeof(*addr)); |
| addr->sin_family=AF_INET; | addr->sin_family=AF_INET; |
| addr->sin_port=htons(port); | addr->sin_port=htons(port); |
| if(host) { | if(host) { |
| ulong packed_ip=inet_addr(host); | struct hostent *hostIP=gethostbyname(host); |
| if(packed_ip!=INADDR_NONE) | if(hostIP && hostIP->h_addrtype == AF_INET){ |
| memcpy(&addr->sin_addr, &packed_ip, sizeof(packed_ip)); | memcpy(&addr->sin_addr, hostIP->h_addr, hostIP->h_length); |
| else { | return true; |
| struct hostent *hostIP=gethostbyname(host); | } |
| if(hostIP) | } |
| memcpy(&addr->sin_addr, hostIP->h_addr, hostIP->h_length); | return false; |
| else | |
| return false; | |
| } | |
| } else | |
| addr->sin_addr.s_addr=INADDR_ANY; | |
| return true; | |
| } | } |
| size_t guess_content_length(char* buf) { | size_t guess_content_length(char* buf) { |
| Line 299 struct Http_pass_header_info { | Line 314 struct Http_pass_header_info { |
| char *pa_http_safe_header_name(const char *name) { | char *pa_http_safe_header_name(const char *name) { |
| char *result=pa_strdup(name); | char *result=pa_strdup(name); |
| char *n=result; | char *n=result; |
| if(isdigit(*n)) | if(!pa_isalpha((unsigned char)*n)) |
| *n++ = '_'; | *n++ = '_'; |
| for(; *n; ++n) { | for(; *n; ++n) { |
| if (!isalnum(*n) && *n != '_') | if (!pa_isalnum((unsigned char)*n) && *n != '-' && *n != '_') |
| *n = '_'; | *n = '_'; |
| } | } |
| return result; | return result; |
| Line 326 static void http_pass_header(HashStringV | Line 341 static void http_pass_header(HashStringV |
| *info->user_agent_specified=true; | *info->user_agent_specified=true; |
| if(strcasecmp(name_cstr, HTTP_CONTENT_TYPE)==0){ | if(strcasecmp(name_cstr, HTTP_CONTENT_TYPE)==0){ |
| *info->content_type_specified=true; | *info->content_type_specified=true; |
| *info->content_type_url_encoded=StrStartFromNC(value.cstr(), HTTP_CONTENT_TYPE_FORM_URLENCODED); | *info->content_type_url_encoded=pa_strncasecmp(value.cstr(), HTTP_CONTENT_TYPE_FORM_URLENCODED)==0; |
| } | } |
| } | } |
| Line 389 static void form_value2string( | Line 404 static void form_value2string( |
| } else | } else |
| throw Exception(PARSER_RUNTIME, | throw Exception(PARSER_RUNTIME, |
| new String(key, String::L_TAINTED), | new String(key, String::L_TAINTED), |
| "is %s, "HTTP_FORM_NAME" option value can be string or table only (file is allowed for $."HTTP_METHOD_NAME"[POST] + $."HTTP_FORM_ENCTYPE_NAME"["HTTP_CONTENT_TYPE_MULTIPART_FORMDATA"])", value->type()); | "is %s, " HTTP_FORM_NAME " option value can be string or table only (file is allowed for $." HTTP_METHOD_NAME "[POST] + $." HTTP_FORM_ENCTYPE_NAME "[" HTTP_CONTENT_TYPE_MULTIPART_FORMDATA "])", value->type()); |
| } | } |
| const char* pa_form2string(HashStringValue& form, Request_charsets& charsets) { | const char* pa_form2string(HashStringValue& form, Request_charsets& charsets) { |
| Line 495 static void form_value2part( | Line 510 static void form_value2part( |
| } else | } else |
| throw Exception(PARSER_RUNTIME, | throw Exception(PARSER_RUNTIME, |
| new String(key, String::L_TAINTED), | new String(key, String::L_TAINTED), |
| "is %s, "HTTP_FORM_NAME" option value can be string, table or file only", value->type()); | "is %s, " HTTP_FORM_NAME " option value can be string, table or file only", value->type()); |
| } | } |
| const char* pa_form2string_multipart(HashStringValue& form, Request& r, const char* boundary, size_t& post_size){ | const char* pa_form2string_multipart(HashStringValue& form, Request& r, const char* boundary, size_t& post_size){ |
| Line 525 static void find_headers_end(char* p, | Line 540 static void find_headers_end(char* p, |
| headers_end_at=0; | headers_end_at=0; |
| } | } |
| // Set-Cookie: name=value; Domain=docs.foo.com; Path=/accounts; Expires=Wed, 13-Jan-2021 22:23:01 GMT; Secure; HttpOnly | |
| static ArrayString* parse_cookie(Request& r, const String& cookie) { | |
| char *current=pa_strdup(cookie.cstr()); | |
| const String* name=0; | |
| const String* value=&String::Empty; | |
| const String* expires=&String::Empty; | |
| const String* max_age=&String::Empty; | |
| const String* path=&String::Empty; | |
| const String* domain=&String::Empty; | |
| const String* httponly=&String::Empty; | |
| const String* secure=&String::Empty; | |
| bool first_pair=true; | |
| do { | |
| if(char *meaning=search_stop(current, ';')) | |
| if(char *attribute=search_stop(meaning, '=')) { | |
| const String* sname=new String(unescape_chars(attribute, strlen(attribute), &r.charsets.source(), true/*don't convert '"' to space*/), String::L_TAINTED); | |
| const String* smeaning=0; | |
| if(meaning) | |
| smeaning=new String(unescape_chars(meaning, strlen(meaning), &r.charsets.source(), true/*don't convert '"' to space*/), String::L_TAINTED); | |
| if(first_pair) { | |
| // name + value | |
| name=sname; | |
| value=smeaning; | |
| first_pair=false; | |
| } else { | |
| const String& slower=sname->change_case(r.charsets.source(), String::CC_LOWER); | |
| if(slower == "expires") | |
| expires=smeaning; | |
| else if(slower == "max-age") | |
| max_age=smeaning; | |
| else if(slower == "domain") | |
| domain=smeaning; | |
| else if(slower == "path") | |
| path=smeaning; | |
| else if(slower == "httponly") | |
| httponly=new String("1", String::L_CLEAN); | |
| else if(slower == "secure") | |
| secure=new String("1", String::L_CLEAN); | |
| else { | |
| // todo@ ? | |
| } | |
| } | |
| } | |
| } while(current); | |
| if(!name) | |
| return 0; | |
| ArrayString* result=new ArrayString(8); | |
| *result+=name; | |
| *result+=value; | |
| *result+=expires; | |
| *result+=max_age; | |
| *result+=domain; | |
| *result+=path; | |
| *result+=httponly; | |
| *result+=secure; | |
| return result; | |
| } | |
| Table* parse_cookies(Request& r, Table *cookies){ | |
| Table& result=*new Table(new Cookies_table_template_columns); | |
| for(Array_iterator<Table::element_type> i(*cookies); i.has_next(); ) | |
| if(ArrayString* row=parse_cookie(r, *i.next()->get(0))) | |
| result+=row; | |
| return &result; | |
| } | |
| /// @todo build .cookies field. use ^file.tables.SET-COOKIES.menu{ for now | /// @todo build .cookies field. use ^file.tables.SET-COOKIES.menu{ for now |
| File_read_http_result pa_internal_file_read_http(Request& r, | File_read_http_result pa_internal_file_read_http(Request& r, |
| const String& file_spec, | const String& file_spec, |
| Line 545 File_read_http_result pa_internal_file_r | Line 636 File_read_http_result pa_internal_file_r |
| Value* vcookies=0; | Value* vcookies=0; |
| Value* vbody=0; | Value* vbody=0; |
| Charset *asked_remote_charset=0; | Charset *asked_remote_charset=0; |
| Charset* real_remote_charset=0; | |
| const char* user_cstr=0; | const char* user_cstr=0; |
| const char* password_cstr=0; | const char* password_cstr=0; |
| const char* encode=0; | const char* encode=0; |
| Line 588 File_read_http_result pa_internal_file_r | Line 680 File_read_http_result pa_internal_file_r |
| omit_post_charset=vomit_post_charset->as_bool(); | omit_post_charset=vomit_post_charset->as_bool(); |
| } | } |
| if(Value* vcharset_name=options->get(PA_CHARSET_NAME)) { | if(Value* vcharset_name=options->get(PA_CHARSET_NAME)) { |
| asked_remote_charset=&charsets.get(vcharset_name->as_string(). | asked_remote_charset=&charsets.get(vcharset_name->as_string().change_case(r.charsets.source(), String::CC_UPPER)); |
| change_case(r.charsets.source(), String::CC_UPPER)); | } |
| if(Value* vresponse_charset_name=options->get(PA_RESPONSE_CHARSET_NAME)) { | |
| valid_options++; | |
| real_remote_charset=&charsets.get(vresponse_charset_name->as_string().change_case(r.charsets.source(), String::CC_UPPER)); | |
| } | } |
| if(Value* vuser=options->get(HTTP_USER)) { | if(Value* vuser=options->get(HTTP_USER)) { |
| valid_options++; | valid_options++; |
| Line 610 File_read_http_result pa_internal_file_r | Line 705 File_read_http_result pa_internal_file_r |
| if(method_is_get) | if(method_is_get) |
| throw Exception(PARSER_RUNTIME, | throw Exception(PARSER_RUNTIME, |
| 0, | 0, |
| "you can not use $."HTTP_FORM_ENCTYPE_NAME" option with method GET"); | "you can not use $." HTTP_FORM_ENCTYPE_NAME " option with method GET"); |
| multipart=strcasecmp(encode, HTTP_CONTENT_TYPE_MULTIPART_FORMDATA)==0; | multipart=strcasecmp(encode, HTTP_CONTENT_TYPE_MULTIPART_FORMDATA)==0; |
| if(!multipart && strcasecmp(encode, HTTP_CONTENT_TYPE_FORM_URLENCODED)!=0) | if(!multipart && strcasecmp(encode, HTTP_CONTENT_TYPE_FORM_URLENCODED)!=0) |
| throw Exception(PARSER_RUNTIME, | throw Exception(PARSER_RUNTIME, |
| 0, | 0, |
| "$."HTTP_FORM_ENCTYPE_NAME" option value can be "HTTP_CONTENT_TYPE_FORM_URLENCODED" or "HTTP_CONTENT_TYPE_MULTIPART_FORMDATA" only"); | "$." HTTP_FORM_ENCTYPE_NAME " option value can be " HTTP_CONTENT_TYPE_FORM_URLENCODED " or " HTTP_CONTENT_TYPE_MULTIPART_FORMDATA " only"); |
| } | } |
| if(vbody){ | if(vbody){ |
| if(method_is_get) | if(method_is_get) |
| throw Exception(PARSER_RUNTIME, | throw Exception(PARSER_RUNTIME, |
| 0, | 0, |
| "you can not use $."HTTP_BODY_NAME" option with method GET"); | "you can not use $." HTTP_BODY_NAME " option with method GET"); |
| if(form) | if(form) |
| throw Exception(PARSER_RUNTIME, | throw Exception(PARSER_RUNTIME, |
| 0, | 0, |
| "you can not use options $."HTTP_BODY_NAME" and $."HTTP_FORM_NAME" together"); | "you can not use options $." HTTP_BODY_NAME " and $." HTTP_FORM_NAME " together"); |
| } | } |
| //preparing request | //preparing request |
| Line 789 File_read_http_result pa_internal_file_r | Line 884 File_read_http_result pa_internal_file_r |
| // sending request | // sending request |
| int status_code=http_request(response, response_size, | int status_code=http_request(response, response_size, |
| host, port, request, request_size, | pa_idna_encode(host, r.charsets.source()), port, request, request_size, |
| timeout_secs, fail_on_status_ne_200); | timeout_secs, fail_on_status_ne_200); |
| // processing results | // processing results |
| Line 803 File_read_http_result pa_internal_file_r | Line 898 File_read_http_result pa_internal_file_r |
| result.headers=new HashStringValue; | result.headers=new HashStringValue; |
| VHash* vtables=new VHash; | VHash* vtables=new VHash; |
| result.headers->put(HTTP_TABLES_NAME, vtables); | result.headers->put(HTTP_TABLES_NAME, vtables); |
| Charset* real_remote_charset=0; // undetected, yet | |
| if(headers_end_at) { | if(headers_end_at) { |
| *headers_end_at=0; | *headers_end_at=0; |
| Line 826 File_read_http_result pa_internal_file_r | Line 920 File_read_http_result pa_internal_file_r |
| "bad response from host - bad header \"%s\"", line.cstr()); | "bad response from host - bad header \"%s\"", line.cstr()); |
| const String::Body HEADER_NAME=line.mid(0, pos).change_case(r.charsets.source(), String::CC_UPPER); | const String::Body HEADER_NAME=line.mid(0, pos).change_case(r.charsets.source(), String::CC_UPPER); |
| const String& HEADER_VALUE=line.mid(pos+1, line.length()).trim(String::TRIM_BOTH, " \t\r"); | const String& HEADER_VALUE=line.mid(pos+1, line.length()).trim(String::TRIM_BOTH, " \t\r"); |
| if(as_text && HEADER_NAME==HTTP_CONTENT_TYPE_UPPER) | if(as_text && HEADER_NAME==HTTP_CONTENT_TYPE_UPPER && !real_remote_charset) |
| real_remote_charset=detect_charset(HEADER_VALUE.cstr()); | real_remote_charset=detect_charset(HEADER_VALUE.cstr()); |
| // tables | // tables |
| Line 854 File_read_http_result pa_internal_file_r | Line 948 File_read_http_result pa_internal_file_r |
| result.headers->put(HEADER_NAME, new VString(HEADER_VALUE)); | result.headers->put(HEADER_NAME, new VString(HEADER_VALUE)); |
| } | } |
| // filling $.cookies | |
| if(Value *vcookies=(Value *)tables.get("SET-COOKIE")) | |
| result.headers->put(HTTP_COOKIES_NAME, new VTable(parse_cookies(r, vcookies->get_table()))); | |
| } | } |
| if(as_text && raw_body_size>=3 && strncmp(raw_body, "\xEF\xBB\xBF", 3)==0){ | if(as_text && raw_body_size>=3 && strncmp(raw_body, "\xEF\xBB\xBF", 3)==0){ |