--- parser3/src/classes/curl.C 2013/04/21 21:24:53 1.26 +++ parser3/src/classes/curl.C 2016/07/27 22:34:49 1.39 @@ -1,7 +1,7 @@ /** @file Parser: @b curl parser class. - Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com) + Copyright (c) 2001-2015 Art. Lebedev Studio (http://www.artlebedev.com) */ #include "pa_config_includes.h" @@ -11,12 +11,13 @@ #include "pa_vfile.h" #include "pa_charsets.h" #include "pa_vstring.h" +#include "pa_vdate.h" #include "pa_vtable.h" #include "pa_common.h" #include "pa_http.h" #include "ltdl.h" -volatile const char * IDENT_CURL_C="$Id: curl.C,v 1.26 2013/04/21 21:24:53 moko Exp $"; +volatile const char * IDENT_CURL_C="$Id: curl.C,v 1.39 2016/07/27 22:34:49 moko Exp $"; class MCurl: public Methoded { public: @@ -25,7 +26,7 @@ public: // global variables -DECLARE_CLASS_VAR(curl, new MCurl, 0); +DECLARE_CLASS_VAR(curl, new MCurl); #include "curl.h" @@ -44,8 +45,7 @@ typedef void (*t_curl_formfree)(struct c #define DLINK(name) GLINK(name) if(!f_##name) return "function " #name " was not found"; static const char *dlink(const char *dlopen_file_spec) { - if(lt_dlinit()) - return lt_dlerror(); + pa_dlinit(); lt_dlhandle handle=lt_dlopen(dlopen_file_spec); @@ -71,14 +71,18 @@ static const char *dlink(const char *dlo class ParserOptions { public: + // real options const String *filename; const String *content_type; bool is_text; Charset *charset, *response_charset; + + // stuff for internal use + const char *url; struct curl_httppost *f_post; FILE *f_stderr; - ParserOptions() : filename(0), content_type(0), is_text(true), charset(0), response_charset(0), f_post(0), f_stderr(0){} + ParserOptions() : filename(0), content_type(0), is_text(true), charset(0), response_charset(0), url(0), f_post(0), f_stderr(0){} ~ParserOptions() { f_curl_formfree(f_post); if(f_stderr) @@ -112,6 +116,7 @@ public: fcurl = f_curl_easy_init(); foptions = new ParserOptions(); f_curl_easy_setopt(fcurl, CURLOPT_POSTFIELDSIZE, 0); // fix libcurl bug + f_curl_easy_setopt(fcurl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4); // avoid ipv6 by default } ~Temp_curl() { f_curl_easy_cleanup(fcurl); @@ -122,11 +127,7 @@ public: }; bool curl_linked = false; -#ifdef WIN32 -const char *curl_library="libcurl.dll"; -#else -const char *curl_library="libcurl.so"; -#endif +const char *curl_library="libcurl" LT_MODULE_EXT; const char *curl_status = 0; @@ -161,20 +162,6 @@ static void _curl_version(Request& r, Me fcurl ? _curl_version_action(r, params) : temp_curl(_curl_version_action, r, params); } -static char *str_lower(const char *str){ - char *result=pa_strdup(str); - for(char* c=result; *c; c++) - *c=(char)tolower((unsigned char)*c); - return result; -} - -static char *str_upper(const char *str){ - char *result=pa_strdup(str); - for(char* c=result; *c; c++) - *c=(char)toupper((unsigned char)*c); - return result; -} - class CurlOption { public: @@ -230,6 +217,7 @@ public: CURL_OPT(CURL_INT, FOLLOWLOCATION); CURL_OPT(CURL_INT, UNRESTRICTED_AUTH); + CURL_OPT(CURL_INT, IPRESOLVE); CURL_OPT(CURL_INT, POST); CURL_OPT(CURL_INT, HTTPGET); @@ -301,6 +289,54 @@ public: } *curl_options=0; +class CurlInfo { +public: + + enum OptionType { + CURL_STRING, + CURL_INT, + CURL_DOUBLE + }; + + CURLINFO id; + OptionType type; + CurlInfo(CURLINFO aid, OptionType atype): id(aid), type(atype) {} +}; + +class CurlInfoHash: public HashString { +public: + CurlInfoHash() { +#define CURL_INF(type, name) put(str_lower(#name),new CurlInfo(CURLINFO_##name, CurlInfo::type)); + CURL_INF(CURL_DOUBLE, APPCONNECT_TIME); + CURL_INF(CURL_DOUBLE, CONNECT_TIME); + CURL_INF(CURL_DOUBLE, CONTENT_LENGTH_DOWNLOAD); + CURL_INF(CURL_DOUBLE, CONTENT_LENGTH_UPLOAD); + CURL_INF(CURL_STRING, CONTENT_TYPE); + CURL_INF(CURL_STRING, EFFECTIVE_URL); + CURL_INF(CURL_INT, HEADER_SIZE); + CURL_INF(CURL_INT, HTTPAUTH_AVAIL); + CURL_INF(CURL_DOUBLE, NAMELOOKUP_TIME); + CURL_INF(CURL_INT, NUM_CONNECTS); + CURL_INF(CURL_INT, OS_ERRNO); + CURL_INF(CURL_DOUBLE, PRETRANSFER_TIME); + CURL_INF(CURL_STRING, PRIMARY_IP); + CURL_INF(CURL_INT, PROXYAUTH_AVAIL); + CURL_INF(CURL_INT, REDIRECT_COUNT); + CURL_INF(CURL_DOUBLE, REDIRECT_TIME); + CURL_INF(CURL_STRING, REDIRECT_URL); + CURL_INF(CURL_INT, REQUEST_SIZE); + CURL_INF(CURL_INT, RESPONSE_CODE); + CURL_INF(CURL_DOUBLE, SIZE_DOWNLOAD); + CURL_INF(CURL_DOUBLE, SIZE_UPLOAD); + CURL_INF(CURL_DOUBLE, SPEED_DOWNLOAD); + CURL_INF(CURL_DOUBLE, SPEED_UPLOAD); + CURL_INF(CURL_INT, SSL_VERIFYRESULT); + CURL_INF(CURL_DOUBLE, STARTTRANSFER_TIME); + CURL_INF(CURL_DOUBLE, TOTAL_TIME); + } + +} *curl_infos=0; + static const char *curl_urlencode(const String &s, Request& r){ if(options().charset){ Temp_client_charset temp(r.charsets, *options().charset); @@ -395,8 +431,8 @@ static void curl_setopt(HashStringValue: const String url = v.as_string(); if(!url.starts_with("http://") && !url.starts_with("https://")) throw Exception("curl", 0, "failed to set option '%s': invalid url scheme '%s'", key.cstr(), url.cstr()); - const char *value_str=curl_urlencode(url, r); - res=f_curl_easy_setopt(curl(), opt->id, value_str); + options().url=curl_urlencode(url, r); + res=f_curl_easy_setopt(curl(), opt->id, options().url); break; } case CurlOption::CURL_INT:{ @@ -477,8 +513,7 @@ static void curl_setopt(HashStringValue: break; } case CurlOption::PARSER_CHARSET:{ - // 'charset' parser option - options().charset=&::charsets.get(v.as_string().change_case(r.charsets.source(), String::CC_UPPER)); + // 'charset' parser option should be processed before other options break; } case CurlOption::PARSER_RESPONSE_CHARSET:{ @@ -496,8 +531,59 @@ static void _curl_options(Request& r, Me if(curl_options==0) curl_options=new CurlOptionHash(); - if(HashStringValue* options=params.as_hash(0)) - options->for_each(curl_setopt, r); + if(HashStringValue* options_hash=params.as_hash(0)){ + if(Value* value=options_hash->get("charset")){ + // charset should be handled first as params may require transcode + Value &v=r.process_to_value(*value); + options().charset=&::charsets.get(v.as_string().change_case(r.charsets.source(), String::CC_UPPER)); + } + options_hash->for_each(curl_setopt, r); + } +} + +#define CURL_GETINFO(arg) \ + if((res=f_curl_easy_getinfo(curl(), info->id, &arg)) != CURLE_OK){ \ + throw Exception("curl", 0, "failed to get %s info: %s", key.cstr(), f_curl_easy_strerror(res)); \ + } + +static Value *curl_getinfo(const String::Body &key, CurlInfo *info=0) { + if(info==0 && !(info=curl_infos->get(key))) + throw Exception("curl", 0, "called with invalid parameter '%s'", key.cstr()); + + CURLcode res; + switch (info->type){ + case CurlInfo::CURL_STRING:{ + char *str=0; + CURL_GETINFO(str); + return new VString(str ? *new String(pa_strdup(str), String::L_TAINTED) : String::Empty); + } + case CurlInfo::CURL_INT:{ + long l=0; + CURL_GETINFO(l); + return new VInt(l); + } + case CurlInfo::CURL_DOUBLE:{ + double d=0; + CURL_GETINFO(d); + return new VDouble(d); + } + } + return VVoid::get(); +} + +static void _curl_info(Request& r, MethodParams& params){ + if(curl_infos==0) + curl_infos=new CurlInfoHash(); + if(params.count()==1){ + const String &name=params.as_string(0, "name must be string"); + r.write_assign_lang(*curl_getinfo(name)); + } else { + VHash& result=*new VHash; + for(CurlInfoHash::Iterator i(*curl_infos); i; i.next() ){ + result.get_hash()->put(i.key(), curl_getinfo(i.key(), i.value())); + } + r.write_no_lang(result); + } } @@ -526,27 +612,13 @@ static int curl_writer(char *data, size_ return size; } -class Curl_response { -public: - HASH_STRING headers; - Array cookies; -}; - -static int curl_header(char *data, size_t size, size_t nmemb, Curl_response *result){ +static int curl_header(char *data, size_t size, size_t nmemb, ResponseHeaders *result){ if(result == 0) return 0; size=size*nmemb; if(size>0){ - char *line=pa_strdup(data, size); - char *value=lsplit(line,':'); - if(value && *line){ - // we need only headers, not the response code - const char* HEADER_NAME=str_upper(line); - result->headers.put(HEADER_NAME, value); - if(strcmp(HEADER_NAME, "SET-COOKIE")==0) - result->cookies+=value; - } + result->add_header(pa_strdup(data, size)); } return size; } @@ -567,7 +639,7 @@ static void _curl_load_action(Request& r CURL_SETOPT(CURLOPT_WRITEDATA, &body, "curl write buffer"); // we need a container for headers as VFile fields can be put only after VFile.set - Curl_response response; + ResponseHeaders response; CURL_SETOPT(CURLOPT_HEADERFUNCTION, curl_header, "curl header function"); CURL_SETOPT(CURLOPT_WRITEHEADER, &response, "curl header buffer"); @@ -588,8 +660,9 @@ static void _curl_load_action(Request& r case CURLE_SSL_CACERT: case CURLE_SSL_ENGINE_INITFAILED: ex_type = "curl.ssl"; break; + default: break; } - throw Exception( ex_type ? ex_type : "curl.fail", 0, "%s", f_curl_easy_strerror(res)); + throw Exception( PA_DEFAULT(ex_type, "curl.fail"), 0, "%s", f_curl_easy_strerror(res)); } // assure trailing zero @@ -597,12 +670,15 @@ static void _curl_load_action(Request& r VFile& result=*new VFile; - String::Body ct_header = response.headers.get(HTTP_CONTENT_TYPE_UPPER); Charset *asked_charset = options().response_charset; - if (asked_charset == 0){ - Charset *remote_charset = ct_header.is_empty() ? 0 : detect_charset(ct_header.trim(String::TRIM_BOTH, " \t\n\r").cstr()); - asked_charset = remote_charset ? remote_charset : options().charset; - } + if (!asked_charset && !response.content_type.is_empty()) + asked_charset=detect_charset(response.content_type.cstr()); + + if(options().is_text) + asked_charset=charsets.checkBOM(body.buf, body.length, asked_charset); + + if (!asked_charset) + asked_charset = options().charset; if(options().is_text && asked_charset != 0){ String::C c=Charset::transcode(String::C(body.buf, body.length), *asked_charset, r.charsets.source()); @@ -610,42 +686,34 @@ static void _curl_load_action(Request& r body.length=c.length; } - result.set(true/*tainted*/, options().is_text, body.buf, body.length, options().filename - , options().content_type ? new VString(*options().content_type) : 0, &r); + const String *content_type = PA_DEFAULT(options().content_type, response.content_type.is_empty() ? 0 : new String(response.content_type, String::L_TAINTED)); + const String *filename = PA_DEFAULT(options().filename, new String(options().url)); + + result.set(true/*tainted*/, options().is_text, body.buf, body.length, filename, content_type ? new VString(*content_type) : 0, &r); + long http_status = 0; if(f_curl_easy_getinfo(curl(), CURLINFO_RESPONSE_CODE, &http_status) == CURLE_OK){ result.fields().put("status", new VInt(http_status)); } - for(HASH_STRING::Iterator i(response.headers); i; i.next() ){ - String::Body HEADER_NAME=i.key(); - String::Body value=i.value(); - if(asked_charset){ - HEADER_NAME=Charset::transcode(HEADER_NAME, *asked_charset, r.charsets.source()); - value=Charset::transcode(value, *asked_charset, r.charsets.source()); - } - result.fields().put(HEADER_NAME, new VString(*new String(value.trim(String::TRIM_BOTH, " \t\n\r"), String::L_TAINTED))); - } + VHash* vtables=new VHash; + result.fields().put("tables", vtables); - // filling $.cookies - Table* tcookies=0; + for(Array_iterator i(response.headers); i.has_next(); ){ + ResponseHeaders::Header header=i.next(); - for(Array_iterator i(response.cookies); i.has_next(); ){ - if(!tcookies){ - Table::columns_type columns=new ArrayString(1); - *columns+=new String("value"); - tcookies=new Table(columns); - } - String::Body value=i.next(); if(asked_charset) - value=Charset::transcode(value, *asked_charset, r.charsets.source()); - ArrayString& row=*new ArrayString(1); - row+=new String(value.trim(String::TRIM_BOTH, " \t\n\r"), String::L_TAINTED); - *tcookies+=&row; + header.transcode(*asked_charset, r.charsets.source()); + + String &header_value=*new String(header.value, String::L_TAINTED); + + tables_update(vtables->hash(), header.name, header_value); + result.fields().put(header.name, new VString(header_value)); } - if(tcookies) - result.fields().put(HTTP_COOKIES_NAME, new VTable(parse_cookies(r, tcookies))); + // filling $.cookies + if(Value *vcookies=vtables->hash().get("SET-COOKIE")) + result.fields().put(HTTP_COOKIES_NAME, new VTable(parse_cookies(r, vcookies->get_table()))); r.write_no_lang(result); } @@ -659,5 +727,6 @@ MCurl::MCurl(): Methoded("curl") { add_native_method("session", Method::CT_STATIC, _curl_session, 1, 1); add_native_method("version", Method::CT_STATIC, _curl_version, 0, 0); add_native_method("options", Method::CT_STATIC, _curl_options, 1, 1); + add_native_method("info", Method::CT_STATIC, _curl_info, 0, 1); add_native_method("load", Method::CT_STATIC, _curl_load, 0, 1); }