--- parser3/src/classes/curl.C 2017/11/29 19:10:05 1.59 +++ parser3/src/classes/curl.C 2024/09/07 16:30:26 1.71 @@ -1,7 +1,8 @@ /** @file Parser: @b curl parser class. - Copyright (c) 2001-2017 Art. Lebedev Studio (http://www.artlebedev.com) + Copyright (c) 2001-2023 Art. Lebedev Studio (http://www.artlebedev.com) + Authors: Konstantin Morshnev */ #include "pa_config_includes.h" @@ -17,7 +18,7 @@ #include "pa_http.h" #include "ltdl.h" -volatile const char * IDENT_CURL_C="$Id: curl.C,v 1.59 2017/11/29 19:10:05 moko Exp $"; +volatile const char * IDENT_CURL_C="$Id: curl.C,v 1.71 2024/09/07 16:30:26 moko Exp $"; class MCurl: public Methoded { public: @@ -81,11 +82,13 @@ struct ParserOptions : public PA_Allocat struct curl_httppost *f_post; FILE *f_stderr; - // stuff to walkaround curl content-length bugs + // if response content-length check required + bool no_body; + // stuff to walkaround curl request content-length bugs bool is_post; bool has_content_length; - ParserOptions() : filename(0), content_type(0), is_text(true), charset(0), response_charset(0), url(0), f_post(0), f_stderr(0), is_post(false), has_content_length(false){} + ParserOptions() : filename(0), content_type(0), is_text(true), charset(0), response_charset(0), url(0), f_post(0), f_stderr(0), no_body(false), is_post(false), has_content_length(false){} ~ParserOptions() { f_curl_formfree(f_post); if(f_stderr) @@ -114,10 +117,14 @@ static ParserOptions &options(){ class Temp_curl { CURL *saved_curl; ParserOptions *saved_options; + + // every TLS should be referenced elsewhere, or GC will collect it + CURL *thread_curl; + ParserOptions *thread_options; public: Temp_curl() : saved_curl(fcurl), saved_options(foptions){ - fcurl = f_curl_easy_init(); - foptions = new ParserOptions(); + thread_curl = fcurl = f_curl_easy_init(); + thread_options = foptions = new ParserOptions(); f_curl_easy_setopt(fcurl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4); // avoid ipv6 by default } @@ -142,7 +149,8 @@ static void temp_curl(void (*action)(Req Temp_curl temp_curl; action(r,params); } else { - throw Exception("curl", 0, "failed to load curl library %s: %s", curl_library, curl_status); + const char *hint=strcmp(curl_library, "libcurl" LT_MODULE_EXT) ? "" : " (at first call ^curl:options[ $.library[correct.libcurl" LT_MODULE_EXT ".name] ])"; + throw Exception("curl", 0, "failed to load curl library %s%s", curl_status, hint); } } @@ -170,6 +178,7 @@ struct CurlOption : public PA_Allocated{ CURL_URLENCODE, // url-encoded string CURL_URL, CURL_INT, + CURL_NO_BODY, CURL_POST, CURL_POSTFIELDS, CURL_FORM, @@ -223,7 +232,7 @@ public: CURL_OPT(CURL_POST, POST); CURL_OPT(CURL_INT, HTTPGET); - CURL_OPT(CURL_INT, NOBODY); + CURL_OPT(CURL_NO_BODY, NOBODY); CURL_OPT(CURL_STRING, CUSTOMREQUEST); CURL_OPT(CURL_POSTFIELDS, POSTFIELDS); // hopefully is safe too @@ -307,38 +316,41 @@ struct CurlInfo : public PA_Allocated{ CurlInfo(CURLINFO aid, OptionType atype): id(aid), type(atype) {} }; -class CurlInfoHash: public HashString { +class CurlInfoHash: public OrderedHashString { public: CurlInfoHash() { #define CURL_INF(type, name) put(str_lower(#name),new CurlInfo(CURLINFO_##name, CurlInfo::type)); + CURL_INF(CURL_STRING, SCHEME); + CURL_INF(CURL_HTTP_VERSION, HTTP_VERSION); + CURL_INF(CURL_STRING, EFFECTIVE_URL); + CURL_INF(CURL_STRING, CONTENT_TYPE); + CURL_INF(CURL_INT, RESPONSE_CODE); + CURL_INF(CURL_INT, OS_ERRNO); + + CURL_INF(CURL_DOUBLE, NAMELOOKUP_TIME); CURL_INF(CURL_DOUBLE, APPCONNECT_TIME); + CURL_INF(CURL_DOUBLE, PRETRANSFER_TIME); + CURL_INF(CURL_DOUBLE, STARTTRANSFER_TIME); CURL_INF(CURL_DOUBLE, CONNECT_TIME); + CURL_INF(CURL_DOUBLE, TOTAL_TIME); + CURL_INF(CURL_DOUBLE, CONTENT_LENGTH_DOWNLOAD); CURL_INF(CURL_DOUBLE, CONTENT_LENGTH_UPLOAD); - CURL_INF(CURL_STRING, CONTENT_TYPE); - CURL_INF(CURL_STRING, EFFECTIVE_URL); CURL_INF(CURL_INT, HEADER_SIZE); - CURL_INF(CURL_INT, HTTPAUTH_AVAIL); - CURL_INF(CURL_DOUBLE, NAMELOOKUP_TIME); + CURL_INF(CURL_INT, REQUEST_SIZE); + CURL_INF(CURL_DOUBLE, SIZE_DOWNLOAD); + CURL_INF(CURL_DOUBLE, SIZE_UPLOAD); + CURL_INF(CURL_DOUBLE, SPEED_DOWNLOAD); + CURL_INF(CURL_DOUBLE, SPEED_UPLOAD); + CURL_INF(CURL_INT, NUM_CONNECTS); - CURL_INF(CURL_INT, OS_ERRNO); - CURL_INF(CURL_DOUBLE, PRETRANSFER_TIME); CURL_INF(CURL_STRING, PRIMARY_IP); + CURL_INF(CURL_INT, HTTPAUTH_AVAIL); CURL_INF(CURL_INT, PROXYAUTH_AVAIL); CURL_INF(CURL_INT, REDIRECT_COUNT); CURL_INF(CURL_DOUBLE, REDIRECT_TIME); CURL_INF(CURL_STRING, REDIRECT_URL); - CURL_INF(CURL_INT, REQUEST_SIZE); - CURL_INF(CURL_INT, RESPONSE_CODE); - CURL_INF(CURL_DOUBLE, SIZE_DOWNLOAD); - CURL_INF(CURL_DOUBLE, SIZE_UPLOAD); - CURL_INF(CURL_DOUBLE, SPEED_DOWNLOAD); - CURL_INF(CURL_DOUBLE, SPEED_UPLOAD); CURL_INF(CURL_INT, SSL_VERIFYRESULT); - CURL_INF(CURL_DOUBLE, STARTTRANSFER_TIME); - CURL_INF(CURL_DOUBLE, TOTAL_TIME); - CURL_INF(CURL_HTTP_VERSION, HTTP_VERSION); - CURL_INF(CURL_STRING, SCHEME); } } *curl_infos=0; @@ -469,6 +481,13 @@ static void curl_setopt(HashStringValue: res=f_curl_easy_setopt(curl(), opt->id, value_int); break; } + case CurlOption::CURL_NO_BODY:{ + // integer curl option + long value_int=(long)v.as_double(); + res=f_curl_easy_setopt(curl(), opt->id, value_int); + options().no_body=value_int != 0; + break; + } case CurlOption::CURL_POST:{ // integer curl option long value_int=(long)v.as_double(); @@ -482,7 +501,7 @@ static void curl_setopt(HashStringValue: if( (res=f_curl_easy_setopt(curl(), CURLOPT_POSTFIELDSIZE, -1L)) == CURLE_OK ) res=f_curl_easy_setopt(curl(), opt->id, curl_urlencode(v.as_string(), r)); } else { - VFile *file=v.as_vfile(String::L_AS_IS); + VFile *file=v.as_vfile(); if( (res=f_curl_easy_setopt(curl(), CURLOPT_POSTFIELDSIZE, (long)file->value_size())) == CURLE_OK ) res=f_curl_easy_setopt(curl(), opt->id, file->value_ptr()); } @@ -510,13 +529,13 @@ static void curl_setopt(HashStringValue: } case CurlOption::CURL_FILE:{ // file-spec curl option - const char *file_spec_cstr=curl_check_file(r.absolute(v.as_string())); + const char *file_spec_cstr=curl_check_file(r.full_disk_path(v.as_string())); res=f_curl_easy_setopt(curl(), opt->id, file_spec_cstr); break; } case CurlOption::CURL_STDERR:{ // verbose output redirection from stderr to file curl option - const char *file_spec_cstr=curl_check_file(r.absolute(v.as_string())); + const char *file_spec_cstr=curl_check_file(r.full_disk_path(v.as_string())); FILE *f_stderr=options().f_stderr=pa_fopen(file_spec_cstr, "wt"); if (f_stderr){ res=f_curl_easy_setopt(curl(), opt->id, f_stderr); @@ -642,9 +661,9 @@ public: char *buf; size_t length; size_t buf_size; - ResponseHeaders& headers; + HTTP_Headers& headers; - Curl_buffer(ResponseHeaders& aheaders) : buf((char *)pa_malloc_atomic(MAX_STRING)), length(0), buf_size(MAX_STRING-1), headers(aheaders){} + Curl_buffer(HTTP_Headers& aheaders) : buf((char *)pa_malloc_atomic(MAX_STRING)), length(0), buf_size(MAX_STRING-1), headers(aheaders){} void resize(size_t size){ buf_size=size; @@ -667,7 +686,7 @@ static int curl_writer(char *data, size_ return size; } -static int curl_header(char *data, size_t size, size_t nmemb, ResponseHeaders *result){ +static int curl_header(char *data, size_t size, size_t nmemb, HTTP_Headers *result){ if(result == 0) return 0; @@ -679,7 +698,7 @@ static int curl_header(char *data, size_ result->clear(); } else { result->add_header(header); - if(result->content_length>pa_file_size_limit) + if(result->content_length>pa_file_size_limit && !options().no_body) return 0; } } @@ -698,7 +717,7 @@ static void _curl_load_action(Request& r CURLcode res; // we need a container for headers as VFile fields can be put only after VFile.set - ResponseHeaders response; + HTTP_Headers response; CURL_SETOPT(CURLOPT_HEADERFUNCTION, curl_header, "curl header function"); CURL_SETOPT(CURLOPT_WRITEHEADER, &response, "curl header buffer"); @@ -712,7 +731,8 @@ static void _curl_load_action(Request& r CURL_SETOPT(CURLOPT_POSTFIELDSIZE, 0, "post content-length"); } - if((res=f_curl_easy_perform(curl())) != CURLE_OK){ + ALTER_EXCEPTION_SOURCE(res=f_curl_easy_perform(curl()), new String(options().url)); + if(res != CURLE_OK){ const char *ex_type = 0; switch(res){ case CURLE_OPERATION_TIMEDOUT: @@ -730,10 +750,10 @@ static void _curl_load_action(Request& r case CURLE_SSL_ENGINE_INITFAILED: ex_type = "curl.ssl"; break; case CURLE_WRITE_ERROR: - check_file_size(response.content_length, *new String(options().url)); break; + check_file_size(response.content_length, new String(options().url)); break; default: break; } - throw Exception( PA_DEFAULT(ex_type, "curl.fail"), 0, "%s", f_curl_easy_strerror(res)); + throw Exception( PA_DEFAULT(ex_type, "curl.fail"), new String(options().url), "%s", f_curl_easy_strerror(res)); } // assure trailing zero @@ -770,8 +790,8 @@ static void _curl_load_action(Request& r VHash* vtables=new VHash; result.fields().put("tables", vtables); - for(Array_iterator i(response.headers); i.has_next(); ){ - ResponseHeaders::Header header=i.next(); + for(Array_iterator i(response.headers); i; ){ + HTTP_Headers::Header header=i.next(); if(asked_charset) header.transcode(*asked_charset, r.charsets.source());