--- parser3/src/classes/curl.C 2010/05/16 00:23:00 1.4 +++ parser3/src/classes/curl.C 2026/04/25 13:38:46 1.78 @@ -1,38 +1,33 @@ /** @file Parser: @b curl parser class. - Copyright(c) 2001-2009 ArtLebedev Group(http://www.artlebedev.com) + Copyright (c) 2001-2026 Art. Lebedev Studio (https://www.artlebedev.com) + Authors: Konstantin Morshnev */ #include "pa_config_includes.h" -#ifdef HAVE_CURL - -static const char * const IDENT_INET_C="$Date: 2010/05/16 00:23:00 $"; - #include "pa_vmethod_frame.h" #include "pa_request.h" #include "pa_vfile.h" #include "pa_charsets.h" #include "pa_vstring.h" +#include "pa_vdate.h" #include "pa_vtable.h" #include "pa_common.h" -#include "ltdl.h" +#include "pa_http.h" +#include "ltdl.h" + +volatile const char * IDENT_CURL_C="$Id: curl.C,v 1.78 2026/04/25 13:38:46 moko Exp $"; class MCurl: public Methoded { public: MCurl(); - -public: // Methoded - bool used_directly() { return true; } }; // global variables -DECLARE_CLASS_VAR(curl, new MCurl, 0); - -// from file.C -extern bool is_text_mode(const String& mode); +DECLARE_CLASS_VAR(curl, new MCurl); #include "curl.h" @@ -50,16 +45,20 @@ typedef void (*t_curl_formfree)(struct c #define GLINK(name) f_##name=(t_##name)lt_dlsym(handle, #name); #define DLINK(name) GLINK(name) if(!f_##name) return "function " #name " was not found"; -const char *dlink(const char *dlopen_file_spec) { - if(lt_dlinit()) - return lt_dlerror(); +static const char *dlink(char *dlopen_file_spec) { + pa_dlinit(); - lt_dlhandle handle=lt_dlopen(dlopen_file_spec); + lt_dlhandle handle; + do { + char *next=lsplit(dlopen_file_spec, ','); + handle=lt_dlopen(dlopen_file_spec); + dlopen_file_spec=next; + } while (!handle && dlopen_file_spec); if(!handle){ if(const char* result=lt_dlerror()) return result; - return "can not open the dynamic link module"; + return "cannot open the dynamic link module"; } DLINK(curl_easy_init); @@ -76,28 +75,36 @@ const char *dlink(const char *dlopen_fil } -class ParserOptions { -public: - const char *filename; +struct ParserOptions : public PA_Allocated { + // real options + const String *filename; const String *content_type; bool is_text; Charset *charset, *response_charset; + + // stuff for internal use + const char *url; struct curl_httppost *f_post; + FILE *f_stderr; - ParserOptions() : filename(0), content_type(0), is_text(true), charset(0), response_charset(0), f_post(0){} + // if response content-length check required + bool no_body; + // stuff to walkaround curl request content-length bugs + bool is_post; + bool has_content_length; + + ParserOptions() : filename(0), content_type(0), is_text(true), charset(0), response_charset(0), url(0), f_post(0), f_stderr(0), no_body(false), is_post(false), has_content_length(false){} ~ParserOptions() { f_curl_formfree(f_post); + if(f_stderr) + fclose(f_stderr); } - + }; -// using thread local variables instead of keeping them in request -// not necessary for cgi version -#ifdef WIN32 -#define __thread __declspec(thread) -#endif -__thread CURL *fcurl = 0; -__thread ParserOptions *foptions = 0; +// using TLS instead of keeping variables in request +THREAD_LOCAL CURL *fcurl = 0; +THREAD_LOCAL ParserOptions *foptions = 0; static CURL *curl(){ if(!fcurl) @@ -115,12 +122,17 @@ static ParserOptions &options(){ class Temp_curl { CURL *saved_curl; ParserOptions *saved_options; + + // every TLS should be referenced elsewhere, or GC will collect it + CURL *thread_curl; + ParserOptions *thread_options; public: Temp_curl() : saved_curl(fcurl), saved_options(foptions){ - fcurl = f_curl_easy_init(); - foptions = new ParserOptions(); - f_curl_easy_setopt(fcurl, CURLOPT_POSTFIELDSIZE, 0); // fix libcurl bug + thread_curl = fcurl = f_curl_easy_init(); + thread_options = foptions = new ParserOptions(); + f_curl_easy_setopt(fcurl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4); // avoid ipv6 by default } + ~Temp_curl() { f_curl_easy_cleanup(fcurl); fcurl = saved_curl; @@ -129,26 +141,28 @@ public: } }; -bool curl_linked = false; #ifdef WIN32 -const char *curl_library="libcurl.dll"; +#define CURL_LIBRARY "libcurl" LT_MODULE_EXT #else -const char *curl_library="libcurl.so"; +#define CURL_LIBRARY "libcurl" LT_MODULE_EXT ",libcurl" LT_MODULE_EXT ".4" #endif +bool curl_linked = false; const char *curl_status = 0; +const char *curl_library=CURL_LIBRARY; + static void temp_curl(void (*action)(Request&, MethodParams&), Request& r, MethodParams& params){ - if(!curl_linked){ - curl_linked=true; - curl_status=dlink(curl_library); - } + if(!curl_linked) + curl_status=dlink(pa_strdup(curl_library)); if(curl_status == 0){ + curl_linked=true; Temp_curl temp_curl; action(r,params); } else { - throw Exception("curl", 0, "failed to load curl library %s: %s", curl_library, curl_status); + const char *hint=strcmp(curl_library, CURL_LIBRARY) ? "" : " (before use, call ^curl:options[ $.library[correct.libcurl" LT_MODULE_EXT ".name] ])"; + throw Exception("curl", 0, "failed to load curl library %s%s", curl_status, hint); } } @@ -161,40 +175,29 @@ static void _curl_session(Request& r, Me temp_curl(_curl_session_action, r, params); } -static void _curl_version_action(Request& r, MethodParams& params){ - r.write_no_lang(*new VString(*new String(f_curl_version(), String::L_TAINTED))); +static void _curl_version_action(Request& r, MethodParams& ){ + r.write(*new VString(f_curl_version())); } static void _curl_version(Request& r, MethodParams& params){ fcurl ? _curl_version_action(r, params) : temp_curl(_curl_version_action, r, params); } -static char *str_lower(const char *str){ - char *result=pa_strdup(str); - for(char* c=result; *c; c++) - *c=(char)tolower((unsigned char)*c); - return result; -} - -static char *str_upper(const char *str){ - char *result=pa_strdup(str); - for(char* c=result; *c; c++) - *c=(char)toupper((unsigned char)*c); - return result; -} - -class CurlOption { -public: +struct CurlOption : public PA_Allocated{ enum OptionType { CURL_STRING, CURL_URLENCODE, // url-encoded string CURL_URL, CURL_INT, + CURL_NO_BODY, CURL_POST, + CURL_POSTFIELDS, CURL_FORM, CURL_HEADERS, CURL_FILE, + CURL_STDERR, + CURL_HTTP_VERSION, PARSER_LIBRARY, PARSER_NAME, PARSER_CONTENT_TYPE, @@ -218,28 +221,34 @@ public: CURL_OPT(CURL_INT, LOCALPORT); CURL_OPT(CURL_INT, PORT); + CURL_OPT(CURL_INT, VERBOSE); + CURL_OPT(CURL_STDERR, STDERR); + CURL_OPT(CURL_INT, MAXFILESIZE); + CURL_OPT(CURL_INT, HTTPAUTH); CURL_OPT(CURL_STRING, USERPWD); -#ifdef CURLOPT_USERNAME CURL_OPT(CURL_STRING, USERNAME); CURL_OPT(CURL_STRING, PASSWORD); -#endif CURL_OPT(CURL_URLENCODE, USERAGENT); CURL_OPT(CURL_URLENCODE, REFERER); CURL_OPT(CURL_INT, AUTOREFERER); + CURL_OPT(CURL_STRING, ENCODING); // gzip or deflate + CURL_OPT(CURL_STRING, ACCEPT_ENCODING); // gzip or deflate + CURL_OPT(CURL_INT, FOLLOWLOCATION); CURL_OPT(CURL_INT, UNRESTRICTED_AUTH); + CURL_OPT(CURL_INT, IPRESOLVE); - CURL_OPT(CURL_INT, POST); + CURL_OPT(CURL_POST, POST); CURL_OPT(CURL_INT, HTTPGET); - CURL_OPT(CURL_INT, NOBODY); + CURL_OPT(CURL_NO_BODY, NOBODY); CURL_OPT(CURL_STRING, CUSTOMREQUEST); - CURL_OPT(CURL_POST, POSTFIELDS); // hopefully is safe too - CURL_OPT(CURL_POST, COPYPOSTFIELDS); + CURL_OPT(CURL_POSTFIELDS, POSTFIELDS); // hopefully is safe too + CURL_OPT(CURL_POSTFIELDS, COPYPOSTFIELDS); CURL_OPT(CURL_FORM, HTTPPOST); CURL_OPT(CURL_HEADERS, HTTPHEADER); @@ -283,20 +292,17 @@ public: CURL_OPT(CURL_STRING, SSLENGINE); CURL_OPT(CURL_STRING, SSLENGINE_DEFAULT); -#ifdef CURLOPT_ISSUERCERT CURL_OPT(CURL_FILE, ISSUERCERT); -#endif - -#ifdef CURLOPT_CRLFILE CURL_OPT(CURL_FILE, CRLFILE); -#endif CURL_OPT(CURL_STRING, CAINFO); - CURL_OPT(CURL_STRING, CAPATH); + CURL_OPT(CURL_FILE, CAPATH); CURL_OPT(CURL_INT, SSL_VERIFYPEER); CURL_OPT(CURL_INT, SSL_VERIFYHOST); CURL_OPT(CURL_STRING, SSL_CIPHER_LIST); CURL_OPT(CURL_INT, SSL_SESSIONID_CACHE); + CURL_OPT(CURL_INT, SSLVERSION); + CURL_OPT(CURL_HTTP_VERSION, HTTP_VERSION); PARSER_OPT(PARSER_LIBRARY, "library"); PARSER_OPT(PARSER_NAME, "name"); @@ -308,6 +314,59 @@ public: } *curl_options=0; +struct CurlInfo : public PA_Allocated{ + + enum OptionType { + CURL_STRING, + CURL_INT, + CURL_DOUBLE, + CURL_HTTP_VERSION + }; + + CURLINFO id; + OptionType type; + CurlInfo(CURLINFO aid, OptionType atype): id(aid), type(atype) {} +}; + +class CurlInfoHash: public OrderedHashString { +public: + CurlInfoHash() { +#define CURL_INF(type, name) put(str_lower(#name),new CurlInfo(CURLINFO_##name, CurlInfo::type)); + CURL_INF(CURL_STRING, SCHEME); + CURL_INF(CURL_HTTP_VERSION, HTTP_VERSION); + CURL_INF(CURL_STRING, EFFECTIVE_URL); + CURL_INF(CURL_STRING, CONTENT_TYPE); + CURL_INF(CURL_INT, RESPONSE_CODE); + CURL_INF(CURL_INT, OS_ERRNO); + + CURL_INF(CURL_DOUBLE, NAMELOOKUP_TIME); + CURL_INF(CURL_DOUBLE, APPCONNECT_TIME); + CURL_INF(CURL_DOUBLE, PRETRANSFER_TIME); + CURL_INF(CURL_DOUBLE, STARTTRANSFER_TIME); + CURL_INF(CURL_DOUBLE, CONNECT_TIME); + CURL_INF(CURL_DOUBLE, TOTAL_TIME); + + CURL_INF(CURL_DOUBLE, CONTENT_LENGTH_DOWNLOAD); + CURL_INF(CURL_DOUBLE, CONTENT_LENGTH_UPLOAD); + CURL_INF(CURL_INT, HEADER_SIZE); + CURL_INF(CURL_INT, REQUEST_SIZE); + CURL_INF(CURL_DOUBLE, SIZE_DOWNLOAD); + CURL_INF(CURL_DOUBLE, SIZE_UPLOAD); + CURL_INF(CURL_DOUBLE, SPEED_DOWNLOAD); + CURL_INF(CURL_DOUBLE, SPEED_UPLOAD); + + CURL_INF(CURL_INT, NUM_CONNECTS); + CURL_INF(CURL_STRING, PRIMARY_IP); + CURL_INF(CURL_INT, HTTPAUTH_AVAIL); + CURL_INF(CURL_INT, PROXYAUTH_AVAIL); + CURL_INF(CURL_INT, REDIRECT_COUNT); + CURL_INF(CURL_DOUBLE, REDIRECT_TIME); + CURL_INF(CURL_STRING, REDIRECT_URL); + CURL_INF(CURL_INT, SSL_VERIFYRESULT); + } + +} *curl_infos=0; + static const char *curl_urlencode(const String &s, Request& r){ if(options().charset){ Temp_client_charset temp(r.charsets, *options().charset); @@ -321,9 +380,9 @@ static struct curl_slist *curl_headers(H for(HashStringValue::Iterator i(*value_hash); i; i.next() ){ String header = - String(capitalize(i.key().cstr()), String::L_URI) + String(pa_http_safe_header_name(capitalize(i.key().cstr())), String::L_AS_IS) << ": " - << String(i.value()->as_string(), String::L_URI); + << String(i.value()->as_string(), String::L_HTTP_HEADER); slist=f_curl_slist_append(slist, curl_urlencode(header, r)); } @@ -352,7 +411,7 @@ static void curl_form(HashStringValue *v CURLFORM_PTRCONTENTS, curl_transcode(String(tvalue->get(t)->get(0)->cstr()), r), CURLFORM_END); } - } else if(VFile* fvalue=static_cast(i.value()->as("file"))){ + } else if(VFile* fvalue=dynamic_cast(i.value())){ // file f_curl_formadd(&options().f_post, &f_last, CURLFORM_PTRNAME, key, @@ -362,11 +421,41 @@ static void curl_form(HashStringValue *v CURLFORM_CONTENTTYPE, fvalue->fields().get("content-type")->as_string().taint_cstr(String::L_URI), CURLFORM_END); } else { - throw Exception("curl", new String(i.key(), String::L_TAINTED), "is %s, form option value can be string, table or file only", i.value()->type()); + throw Exception("curl", new String(i.key(), String::L_TAINTED), "is %s, form option value can be string, table or file only", i.value()->type()); } } } +static const char *curl_check_file(const String &file_spec){ + const char *file_spec_cstr=file_spec.taint_cstr(String::L_FILE_SPEC); + struct stat finfo; + if(pa_stat(file_spec_cstr, &finfo)==0) + check_safe_mode(finfo, file_spec, file_spec_cstr); + return file_spec_cstr; +} + +static long curl_http_version(const String &name){ + if(name.is_empty()) return CURL_HTTP_VERSION_NONE; + + if(name == "1.0") return CURL_HTTP_VERSION_1_0; + if(name == "1.1") return CURL_HTTP_VERSION_1_1; + if(name == "2") return CURL_HTTP_VERSION_2; + if(name == "2.0") return CURL_HTTP_VERSION_2_0; + + const char *sname = str_upper(name.cstr()); + if(!strcmp(sname,"2TLS")) return CURL_HTTP_VERSION_2TLS; + if(!strcmp(sname,"2ONLY")) return CURL_HTTP_VERSION_2_PRIOR_KNOWLEDGE; + throw Exception("curl", &name, "invalid http_version option value"); +} + +static const char *curl_http_version_name(long value){ + if(value == CURL_HTTP_VERSION_NONE) return "none"; + if(value == CURL_HTTP_VERSION_1_0) return "1.0"; + if(value == CURL_HTTP_VERSION_1_1) return "1.1"; + if(value == CURL_HTTP_VERSION_2) return "2"; + throw Exception("curl", 0, "invalid http version '%d' in info", value); +} + static void curl_setopt(HashStringValue::key_type key, HashStringValue::value_type value, Request& r) { CurlOption *opt=curl_options->get(key); @@ -374,7 +463,7 @@ static void curl_setopt(HashStringValue: throw Exception("curl", 0, "called with invalid option '%s'", key.cstr()); CURLcode res = CURLE_OK; - Value &v=r.process_to_value(*value); + Value &v=r.process(*value); switch (opt->type){ case CurlOption::CURL_STRING:{ @@ -391,11 +480,11 @@ static void curl_setopt(HashStringValue: } case CurlOption::CURL_URL:{ // url-encoded string curl_url option - const String url = v.as_string(); - if(!url.starts_with("http://") && !url.starts_with("https://")) - throw Exception("curl", 0, "failed to set option '%s': invalid url scheme '%s'", key.cstr(), url.cstr()); - const char *value_str=curl_urlencode(url, r); - res=f_curl_easy_setopt(curl(), opt->id, value_str); + const String url = v.as_string(); + if(!url.starts_with("http://") && !url.starts_with("https://")) + throw Exception("curl", 0, "failed to set option '%s': invalid url scheme '%s'", key.cstr(), url.cstr()); + options().url=curl_urlencode(url, r); + res=f_curl_easy_setopt(curl(), opt->id, options().url); break; } case CurlOption::CURL_INT:{ @@ -404,54 +493,88 @@ static void curl_setopt(HashStringValue: res=f_curl_easy_setopt(curl(), opt->id, value_int); break; } + case CurlOption::CURL_NO_BODY:{ + // integer curl option + long value_int=(long)v.as_double(); + res=f_curl_easy_setopt(curl(), opt->id, value_int); + options().no_body=value_int != 0; + break; + } case CurlOption::CURL_POST:{ + // integer curl option + long value_int=(long)v.as_double(); + res=f_curl_easy_setopt(curl(), opt->id, value_int); + options().is_post=value_int != 0; + break; + } + case CurlOption::CURL_POSTFIELDS:{ // http post curl option if(v.get_string()){ if( (res=f_curl_easy_setopt(curl(), CURLOPT_POSTFIELDSIZE, -1L)) == CURLE_OK ) res=f_curl_easy_setopt(curl(), opt->id, curl_urlencode(v.as_string(), r)); } else { - VFile *file=v.as_vfile(String::L_AS_IS); + VFile *file=v.as_vfile(); if( (res=f_curl_easy_setopt(curl(), CURLOPT_POSTFIELDSIZE, (long)file->value_size())) == CURLE_OK ) res=f_curl_easy_setopt(curl(), opt->id, file->value_ptr()); } + options().has_content_length=true; break; } case CurlOption::CURL_FORM:{ - HashStringValue *value_hash = v.get_hash(); + HashStringValue *value_hash = v.as_hash("failed to set option 'httppost': value"); if(value_hash){ curl_form(value_hash, r); - } else if(v.get_string()->is_empty()){ - f_curl_formfree(options().f_post); - options().f_post = 0; } else { - throw Exception("curl", 0, "%s must be a hash", key.cstr()); - } + if(options().f_post) + f_curl_formfree(options().f_post); + options().f_post = 0; + } res=f_curl_easy_setopt(curl(), CURLOPT_HTTPPOST, foptions->f_post); + options().has_content_length=true; break; } case CurlOption::CURL_HEADERS:{ // http headers curl option - HashStringValue *value_hash=v.get_hash(); + HashStringValue *value_hash=v.as_hash("failed to set option 'httpheader': value"); res=f_curl_easy_setopt(curl(), opt->id, value_hash ? curl_headers(value_hash, r) : 0); break; } case CurlOption::CURL_FILE:{ // file-spec curl option - const char *value_str=r.absolute(v.as_string()).taint_cstr(String::L_FILE_SPEC); - res=f_curl_easy_setopt(curl(), opt->id, value_str); + const char *file_spec_cstr=curl_check_file(r.full_disk_path(v.as_string())); + res=f_curl_easy_setopt(curl(), opt->id, file_spec_cstr); + break; + } + case CurlOption::CURL_STDERR:{ + // verbose output redirection from stderr to file curl option + const char *file_spec_cstr=curl_check_file(r.full_disk_path(v.as_string())); + FILE *f_stderr=options().f_stderr=pa_fopen(file_spec_cstr, "wt"); + if (f_stderr){ + res=f_curl_easy_setopt(curl(), opt->id, f_stderr); + } else { + throw Exception("curl", 0, "failed to set option '%s': unable to open file '%s'", key.cstr(), file_spec_cstr); + } + break; + } + case CurlOption::CURL_HTTP_VERSION:{ + // http protocol version name curl option + long value_int=curl_http_version(v.as_string()); + res=f_curl_easy_setopt(curl(), opt->id, value_int); break; } case CurlOption::PARSER_LIBRARY:{ // 'library' parser option - if(fcurl==0){ + if(!curl_linked){ curl_library=v.as_string().taint_cstr(String::L_FILE_SPEC); + if(!curl_library[0]) + curl_library=CURL_LIBRARY; } else - throw Exception("curl", 0, "failed to set option '%s': %s", key.cstr(), "already loaded"); + throw Exception("curl", 0, "failed to set option '%s': already loaded", key.cstr()); break; } case CurlOption::PARSER_NAME:{ // 'name' parser option - options().filename=v.as_string().taint_cstr(String::L_FILE_SPEC); + options().filename=&v.as_string(); break; } case CurlOption::PARSER_CONTENT_TYPE:{ @@ -461,17 +584,16 @@ static void curl_setopt(HashStringValue: } case CurlOption::PARSER_MODE:{ // 'mode' parser option - options().is_text=is_text_mode(v.as_string()); + options().is_text=VFile::is_text_mode(v.as_string()); break; } case CurlOption::PARSER_CHARSET:{ - // 'charset' parser option - options().charset=&::charsets.get(v.as_string().change_case(r.charsets.source(), String::CC_UPPER)); + // 'charset' parser option should be processed before other options break; } case CurlOption::PARSER_RESPONSE_CHARSET:{ - // 'charset' parser option - options().response_charset=&::charsets.get(v.as_string().change_case(r.charsets.source(), String::CC_UPPER)); + // 'response-charset' parser option + options().response_charset=&pa_charsets.get(v.as_string()); break; } } @@ -484,20 +606,83 @@ static void _curl_options(Request& r, Me if(curl_options==0) curl_options=new CurlOptionHash(); - if(HashStringValue* options=params.as_no_junction(0, OPTIONS_MUST_NOT_BE_CODE).get_hash()){ - options->for_each(curl_setopt, r); - } else - throw Exception("curl", 0, "options must be hash"); + if(HashStringValue* options_hash=params.as_hash(0)){ + if(Value* value=options_hash->get("charset")){ + // charset should be handled first as params may require transcode + Value &v=r.process(*value); + options().charset=&pa_charsets.get(v.as_string()); + } + options_hash->for_each(curl_setopt, r); + } } +#define CURL_GETINFO(arg) \ + if((res=f_curl_easy_getinfo(curl(), info->id, &arg)) != CURLE_OK){ \ + if (fail_on_error) \ + throw Exception("curl", 0, "failed to get %s info: %s", key.cstr(), f_curl_easy_strerror(res)); \ + return 0; \ + } + +static Value *curl_getinfo(const String::Body &key, CurlInfo *info, bool fail_on_error=false) { + CURLcode res; + switch (info->type){ + case CurlInfo::CURL_STRING:{ + char *str=0; + CURL_GETINFO(str); + return new VString(str ? *new String(pa_strdup(str), String::L_TAINTED) : String::Empty); + } + case CurlInfo::CURL_INT:{ + long l=0; + CURL_GETINFO(l); + return new VInt(l); + } + case CurlInfo::CURL_DOUBLE:{ + double d=0; + CURL_GETINFO(d); + return new VDouble(d); + } + case CurlInfo::CURL_HTTP_VERSION:{ + long l=0; + CURL_GETINFO(l); + return new VString(curl_http_version_name(l)); + } + } + return VVoid::get(); +} + +static void _curl_info(Request& r, MethodParams& params){ + if(curl_infos==0) + curl_infos=new CurlInfoHash(); + if(params.count()==1){ + const String &name=params.as_string(0, "name must be string"); + CurlInfo *info=curl_infos->get(name); + if(info==0) + throw Exception("curl", 0, "called with invalid parameter '%s'", name.cstr()); + r.write(*curl_getinfo(name, info, true)); + } else { + VHash& result=*new VHash; + for(CurlInfoHash::Iterator i(*curl_infos); i; i.next() ){ + Value *value=curl_getinfo(i.key(), i.value()); + if(value) + result.get_hash()->put(i.key(), value); + } + r.write(result); + } +} class Curl_buffer{ public: char *buf; size_t length; size_t buf_size; + HTTP_Headers& headers; + + Curl_buffer(HTTP_Headers& aheaders) : buf((char *)pa_malloc_atomic(MAX_STRING)), length(0), buf_size(MAX_STRING-1), headers(aheaders){} - Curl_buffer() : buf((char *)pa_malloc(MAX_STRING+1)), length(0), buf_size(MAX_STRING){} + void resize(size_t size){ + buf_size=size; + buf=(char *)pa_realloc(buf, size+1); + } }; static int curl_writer(char *data, size_t size, size_t nmemb, Curl_buffer *result){ @@ -506,27 +691,29 @@ static int curl_writer(char *data, size_ size=size*nmemb; if(size>0){ - if(result->length + size >= result->buf_size){ - result->buf_size = result->buf_size*2 + size; - result->buf = (char *)pa_realloc(result->buf, result->buf_size+1); - } + size_t buf_required = result->length + size; + if(buf_required > result->buf_size) + result->resize(buf_required <= result->headers.content_length ? (size_t)result->headers.content_length : result->buf_size*2 + size); memcpy(result->buf+result->length, data, size); result->length += size; } return size; } -static int curl_header(char *data, size_t size, size_t nmemb, HASH_STRING *result){ +static int curl_header(char *data, size_t size, size_t nmemb, HTTP_Headers *result){ if(result == 0) return 0; size=size*nmemb; if(size>0){ - char *line=pa_strdup(data, size); - char *value=lsplit(line,':'); - if(value && *line){ - // we need only headers, not the response code - result->put(str_upper(line), value); + char *header=pa_strdup(data, size); + if(!pa_strncasecmp(header, "HTTP/") && !strchr(header, ':')){ + // response code, clearing possible headers from previous requests + result->clear(); + } else { + result->add_header(header); + if(result->content_length>pa_file_size_limit && !options().no_body) + return 0; } } return size; @@ -543,17 +730,24 @@ static void _curl_load_action(Request& r CURLcode res; - Curl_buffer body; + // we need a container for headers as VFile fields can be put only after VFile.set + HTTP_Headers response; + CURL_SETOPT(CURLOPT_HEADERFUNCTION, curl_header, "curl header function"); + CURL_SETOPT(CURLOPT_WRITEHEADER, &response, "curl header buffer"); + + Curl_buffer body(response); CURL_SETOPT(CURLOPT_WRITEFUNCTION, curl_writer, "curl writer function"); CURL_SETOPT(CURLOPT_WRITEDATA, &body, "curl write buffer"); - // we need a container for headers as VFile fields can be put only after VFile.set - HASH_STRING headers; - CURL_SETOPT(CURLOPT_HEADERFUNCTION, curl_header, "curl header function"); - CURL_SETOPT(CURLOPT_WRITEHEADER, &headers, "curl header buffer"); + if(options().is_post && !options().has_content_length){ + // libcurl bug walkaround. Prior to 7.38 (Debian Jessie) curl passed Content-length: -1 + // after that no Content-length header is passed, that hangs request to nginx. + CURL_SETOPT(CURLOPT_POSTFIELDSIZE, 0, "post content-length"); + } - if((res=f_curl_easy_perform(curl())) != CURLE_OK){ - char *ex_type = 0; + ALTER_EXCEPTION_SOURCE(res=f_curl_easy_perform(curl()), new String(options().url)); + if(res != CURLE_OK){ + const char *ex_type = 0; switch(res){ case CURLE_OPERATION_TIMEDOUT: ex_type = "curl.timeout"; break; @@ -569,22 +763,27 @@ static void _curl_load_action(Request& r case CURLE_SSL_CACERT: case CURLE_SSL_ENGINE_INITFAILED: ex_type = "curl.ssl"; break; + case CURLE_WRITE_ERROR: + check_file_size(response.content_length, new String(options().url)); break; + default: break; } - throw Exception( ex_type ? ex_type : "curl.fail", 0, "%s", f_curl_easy_strerror(res)); + throw Exception( PA_DEFAULT(ex_type, "curl.fail"), new String(options().url), "%s", f_curl_easy_strerror(res)); } // assure trailing zero body.buf[body.length]=0; - Value* vcontent_type= - options().content_type ? new VString(*options().content_type) : - options().filename ? new VString(r.mime_type_of(options().filename)) : 0; - VFile& result=*new VFile; - String::Body ct_header = headers.get(HTTP_CONTENT_TYPE_UPPER); - Charset *remote_charset = ct_header.is_empty() ? 0 : detect_charset(ct_header.trim(String::TRIM_BOTH, " \t\n\r").cstr()); - Charset *asked_charset = options().response_charset ? options().response_charset : (remote_charset ? remote_charset : options().charset); + Charset *asked_charset = options().response_charset; + if (!asked_charset && !response.content_type.is_empty()) + asked_charset=detect_charset(response.content_type.cstr()); + + if(options().is_text) + asked_charset=pa_charsets.checkBOM(body.buf, body.length, asked_charset); + + if (!asked_charset) + asked_charset = options().charset; if(options().is_text && asked_charset != 0){ String::C c=Charset::transcode(String::C(body.buf, body.length), *asked_charset, r.charsets.source()); @@ -592,25 +791,36 @@ static void _curl_load_action(Request& r body.length=c.length; } - result.set(true /*tainted*/, body.buf, body.length, options().filename, vcontent_type); - result.set_mode(options().is_text); + const String *content_type = PA_DEFAULT(options().content_type, response.content_type.is_empty() ? 0 : new String(response.content_type, String::L_TAINTED)); + const String *filename = PA_DEFAULT(options().filename, new String(options().url)); + + result.set(true/*tainted*/, options().is_text, body.buf, body.length, filename, content_type ? new VString(*content_type) : 0, &r); long http_status = 0; if(f_curl_easy_getinfo(curl(), CURLINFO_RESPONSE_CODE, &http_status) == CURLE_OK){ - result.fields().put("status", new VInt(http_status)); + HASH_PUT_CSTR(result.fields(), "status", new VInt(http_status)); } - for(HASH_STRING::Iterator i(headers); i; i.next() ){ - String::Body key=i.key(); - String::Body value=i.value(); - if(asked_charset){ - key=Charset::transcode(key, *asked_charset, r.charsets.source()); - value=Charset::transcode(value, *asked_charset, r.charsets.source()); - } - result.fields().put(key, new VString(*new String(value.trim(String::TRIM_BOTH, " \t\n\r"), String::L_TAINTED))); + VHash* vtables=new VHash; + HASH_PUT_CSTR(result.fields(), "tables", vtables); + + for(Array_iterator i(response.headers); i; ){ + HTTP_Headers::Header header=i.next(); + + if(asked_charset) + header.transcode(*asked_charset, r.charsets.source()); + + String &header_value=*new String(header.value, String::L_TAINTED); + + tables_update(vtables->hash(), header.name, header_value); + result.fields().put(header.name, new VString(header_value)); } - r.write_no_lang(result); + // filling $.cookies + if(Value *vcookies=vtables->hash().get("SET-COOKIE")) + HASH_PUT_CSTR(result.fields(), HTTP_COOKIES_NAME, new VTable(parse_cookies(r, vcookies->get_table()))); + + r.write(result); } static void _curl_load(Request& r, MethodParams& params){ @@ -622,13 +832,6 @@ MCurl::MCurl(): Methoded("curl") { add_native_method("session", Method::CT_STATIC, _curl_session, 1, 1); add_native_method("version", Method::CT_STATIC, _curl_version, 0, 0); add_native_method("options", Method::CT_STATIC, _curl_options, 1, 1); + add_native_method("info", Method::CT_STATIC, _curl_info, 0, 1); add_native_method("load", Method::CT_STATIC, _curl_load, 0, 1); } - -#else // HAVE_CURL - -#include "classes.h" -// global variable -DECLARE_CLASS_VAR(curl, 0, 0); // fictive - -#endif // HAVE_CURL