--- parser3/src/classes/curl.C 2016/07/29 20:24:16 1.40 +++ parser3/src/classes/curl.C 2017/11/29 19:10:05 1.59 @@ -1,7 +1,7 @@ /** @file Parser: @b curl parser class. - Copyright (c) 2001-2015 Art. Lebedev Studio (http://www.artlebedev.com) + Copyright (c) 2001-2017 Art. Lebedev Studio (http://www.artlebedev.com) */ #include "pa_config_includes.h" @@ -17,7 +17,7 @@ #include "pa_http.h" #include "ltdl.h" -volatile const char * IDENT_CURL_C="$Id: curl.C,v 1.40 2016/07/29 20:24:16 moko Exp $"; +volatile const char * IDENT_CURL_C="$Id: curl.C,v 1.59 2017/11/29 19:10:05 moko Exp $"; class MCurl: public Methoded { public: @@ -69,8 +69,7 @@ static const char *dlink(const char *dlo } -class ParserOptions { -public: +struct ParserOptions : public PA_Allocated { // real options const String *filename; const String *content_type; @@ -82,13 +81,17 @@ public: struct curl_httppost *f_post; FILE *f_stderr; - ParserOptions() : filename(0), content_type(0), is_text(true), charset(0), response_charset(0), url(0), f_post(0), f_stderr(0){} + // stuff to walkaround curl content-length bugs + bool is_post; + bool has_content_length; + + ParserOptions() : filename(0), content_type(0), is_text(true), charset(0), response_charset(0), url(0), f_post(0), f_stderr(0), is_post(false), has_content_length(false){} ~ParserOptions() { f_curl_formfree(f_post); if(f_stderr) fclose(f_stderr); } - + }; // using TLS instead of keeping variables in request @@ -115,9 +118,9 @@ public: Temp_curl() : saved_curl(fcurl), saved_options(foptions){ fcurl = f_curl_easy_init(); foptions = new ParserOptions(); - f_curl_easy_setopt(fcurl, CURLOPT_POSTFIELDSIZE, 0); // fix libcurl bug f_curl_easy_setopt(fcurl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4); // avoid ipv6 by default } + ~Temp_curl() { f_curl_easy_cleanup(fcurl); fcurl = saved_curl; @@ -127,17 +130,15 @@ public: }; bool curl_linked = false; -const char *curl_library="libcurl" LT_MODULE_EXT; - const char *curl_status = 0; +const char *curl_library="libcurl" LT_MODULE_EXT; static void temp_curl(void (*action)(Request&, MethodParams&), Request& r, MethodParams& params){ - if(!curl_linked){ - curl_linked=true; + if(!curl_linked) curl_status=dlink(curl_library); - } if(curl_status == 0){ + curl_linked=true; Temp_curl temp_curl; action(r,params); } else { @@ -155,15 +156,14 @@ static void _curl_session(Request& r, Me } static void _curl_version_action(Request& r, MethodParams& ){ - r.write_no_lang(*new VString(*new String(f_curl_version(), String::L_TAINTED))); + r.write(*new VString(*new String(f_curl_version(), String::L_TAINTED))); } static void _curl_version(Request& r, MethodParams& params){ fcurl ? _curl_version_action(r, params) : temp_curl(_curl_version_action, r, params); } -class CurlOption { -public: +struct CurlOption : public PA_Allocated{ enum OptionType { CURL_STRING, @@ -171,10 +171,12 @@ public: CURL_URL, CURL_INT, CURL_POST, + CURL_POSTFIELDS, CURL_FORM, CURL_HEADERS, CURL_FILE, CURL_STDERR, + CURL_HTTP_VERSION, PARSER_LIBRARY, PARSER_NAME, PARSER_CONTENT_TYPE, @@ -219,13 +221,13 @@ public: CURL_OPT(CURL_INT, UNRESTRICTED_AUTH); CURL_OPT(CURL_INT, IPRESOLVE); - CURL_OPT(CURL_INT, POST); + CURL_OPT(CURL_POST, POST); CURL_OPT(CURL_INT, HTTPGET); CURL_OPT(CURL_INT, NOBODY); CURL_OPT(CURL_STRING, CUSTOMREQUEST); - CURL_OPT(CURL_POST, POSTFIELDS); // hopefully is safe too - CURL_OPT(CURL_POST, COPYPOSTFIELDS); + CURL_OPT(CURL_POSTFIELDS, POSTFIELDS); // hopefully is safe too + CURL_OPT(CURL_POSTFIELDS, COPYPOSTFIELDS); CURL_OPT(CURL_FORM, HTTPPOST); CURL_OPT(CURL_HEADERS, HTTPHEADER); @@ -278,6 +280,8 @@ public: CURL_OPT(CURL_INT, SSL_VERIFYHOST); CURL_OPT(CURL_STRING, SSL_CIPHER_LIST); CURL_OPT(CURL_INT, SSL_SESSIONID_CACHE); + CURL_OPT(CURL_INT, SSLVERSION); + CURL_OPT(CURL_HTTP_VERSION, HTTP_VERSION); PARSER_OPT(PARSER_LIBRARY, "library"); PARSER_OPT(PARSER_NAME, "name"); @@ -289,13 +293,13 @@ public: } *curl_options=0; -class CurlInfo { -public: +struct CurlInfo : public PA_Allocated{ enum OptionType { CURL_STRING, CURL_INT, - CURL_DOUBLE + CURL_DOUBLE, + CURL_HTTP_VERSION }; CURLINFO id; @@ -333,6 +337,8 @@ public: CURL_INF(CURL_INT, SSL_VERIFYRESULT); CURL_INF(CURL_DOUBLE, STARTTRANSFER_TIME); CURL_INF(CURL_DOUBLE, TOTAL_TIME); + CURL_INF(CURL_HTTP_VERSION, HTTP_VERSION); + CURL_INF(CURL_STRING, SCHEME); } } *curl_infos=0; @@ -391,7 +397,7 @@ static void curl_form(HashStringValue *v CURLFORM_CONTENTTYPE, fvalue->fields().get("content-type")->as_string().taint_cstr(String::L_URI), CURLFORM_END); } else { - throw Exception("curl", new String(i.key(), String::L_TAINTED), "is %s, form option value can be string, table or file only", i.value()->type()); + throw Exception("curl", new String(i.key(), String::L_TAINTED), "is %s, form option value can be string, table or file only", i.value()->type()); } } } @@ -399,11 +405,33 @@ static void curl_form(HashStringValue *v static const char *curl_check_file(const String &file_spec){ const char *file_spec_cstr=file_spec.taint_cstr(String::L_FILE_SPEC); struct stat finfo; - if(stat(file_spec_cstr, &finfo)==0) + if(pa_stat(file_spec_cstr, &finfo)==0) check_safe_mode(finfo, file_spec, file_spec_cstr); return file_spec_cstr; } +static long curl_http_version(const String &name){ + if(name.is_empty()) return CURL_HTTP_VERSION_NONE; + + if(name == "1.0") return CURL_HTTP_VERSION_1_0; + if(name == "1.1") return CURL_HTTP_VERSION_1_1; + if(name == "2") return CURL_HTTP_VERSION_2; + if(name == "2.0") return CURL_HTTP_VERSION_2_0; + + const char *sname = str_upper(name.cstr()); + if(!strcmp(sname,"2TLS")) return CURL_HTTP_VERSION_2TLS; + if(!strcmp(sname,"2ONLY")) return CURL_HTTP_VERSION_2_PRIOR_KNOWLEDGE; + throw Exception("curl", &name, "invalid http_version option value"); +} + +static const char *curl_http_version_name(long value){ + if(value == CURL_HTTP_VERSION_NONE) return "none"; + if(value == CURL_HTTP_VERSION_1_0) return "1.0"; + if(value == CURL_HTTP_VERSION_1_1) return "1.1"; + if(value == CURL_HTTP_VERSION_2) return "2"; + throw Exception("curl", 0, "invalid http version '%d' in info", value); +} + static void curl_setopt(HashStringValue::key_type key, HashStringValue::value_type value, Request& r) { CurlOption *opt=curl_options->get(key); @@ -411,7 +439,7 @@ static void curl_setopt(HashStringValue: throw Exception("curl", 0, "called with invalid option '%s'", key.cstr()); CURLcode res = CURLE_OK; - Value &v=r.process_to_value(*value); + Value &v=r.process(*value); switch (opt->type){ case CurlOption::CURL_STRING:{ @@ -442,6 +470,13 @@ static void curl_setopt(HashStringValue: break; } case CurlOption::CURL_POST:{ + // integer curl option + long value_int=(long)v.as_double(); + res=f_curl_easy_setopt(curl(), opt->id, value_int); + options().is_post=value_int != 0; + break; + } + case CurlOption::CURL_POSTFIELDS:{ // http post curl option if(v.get_string()){ if( (res=f_curl_easy_setopt(curl(), CURLOPT_POSTFIELDSIZE, -1L)) == CURLE_OK ) @@ -451,24 +486,25 @@ static void curl_setopt(HashStringValue: if( (res=f_curl_easy_setopt(curl(), CURLOPT_POSTFIELDSIZE, (long)file->value_size())) == CURLE_OK ) res=f_curl_easy_setopt(curl(), opt->id, file->value_ptr()); } + options().has_content_length=true; break; } case CurlOption::CURL_FORM:{ - HashStringValue *value_hash = v.get_hash(); + HashStringValue *value_hash = v.as_hash("failed to set option 'httppost': value"); if(value_hash){ curl_form(value_hash, r); - } else if(v.get_string()->is_empty()){ - f_curl_formfree(options().f_post); - options().f_post = 0; } else { - throw Exception("curl", 0, "failed to set option '%s': value must be a hash", key.cstr()); + if(options().f_post) + f_curl_formfree(options().f_post); + options().f_post = 0; } res=f_curl_easy_setopt(curl(), CURLOPT_HTTPPOST, foptions->f_post); + options().has_content_length=true; break; } case CurlOption::CURL_HEADERS:{ // http headers curl option - HashStringValue *value_hash=v.get_hash(); + HashStringValue *value_hash=v.as_hash("failed to set option 'httpheader': value"); res=f_curl_easy_setopt(curl(), opt->id, value_hash ? curl_headers(value_hash, r) : 0); break; } @@ -481,7 +517,7 @@ static void curl_setopt(HashStringValue: case CurlOption::CURL_STDERR:{ // verbose output redirection from stderr to file curl option const char *file_spec_cstr=curl_check_file(r.absolute(v.as_string())); - FILE *f_stderr=options().f_stderr=fopen(file_spec_cstr, "wt"); + FILE *f_stderr=options().f_stderr=pa_fopen(file_spec_cstr, "wt"); if (f_stderr){ res=f_curl_easy_setopt(curl(), opt->id, f_stderr); } else { @@ -489,9 +525,15 @@ static void curl_setopt(HashStringValue: } break; } + case CurlOption::CURL_HTTP_VERSION:{ + // http protocol version name curl option + long value_int=curl_http_version(v.as_string()); + res=f_curl_easy_setopt(curl(), opt->id, value_int); + break; + } case CurlOption::PARSER_LIBRARY:{ // 'library' parser option - if(fcurl==0){ + if(!curl_linked){ curl_library=v.as_string().taint_cstr(String::L_FILE_SPEC); } else throw Exception("curl", 0, "failed to set option '%s': already loaded", key.cstr()); @@ -518,7 +560,7 @@ static void curl_setopt(HashStringValue: } case CurlOption::PARSER_RESPONSE_CHARSET:{ // 'response-charset' parser option - options().response_charset=&::charsets.get(v.as_string()); + options().response_charset=&pa_charsets.get(v.as_string()); break; } } @@ -534,8 +576,8 @@ static void _curl_options(Request& r, Me if(HashStringValue* options_hash=params.as_hash(0)){ if(Value* value=options_hash->get("charset")){ // charset should be handled first as params may require transcode - Value &v=r.process_to_value(*value); - options().charset=&::charsets.get(v.as_string()); + Value &v=r.process(*value); + options().charset=&pa_charsets.get(v.as_string()); } options_hash->for_each(curl_setopt, r); } @@ -543,13 +585,12 @@ static void _curl_options(Request& r, Me #define CURL_GETINFO(arg) \ if((res=f_curl_easy_getinfo(curl(), info->id, &arg)) != CURLE_OK){ \ - throw Exception("curl", 0, "failed to get %s info: %s", key.cstr(), f_curl_easy_strerror(res)); \ + if (fail_on_error) \ + throw Exception("curl", 0, "failed to get %s info: %s", key.cstr(), f_curl_easy_strerror(res)); \ + return 0; \ } -static Value *curl_getinfo(const String::Body &key, CurlInfo *info=0) { - if(info==0 && !(info=curl_infos->get(key))) - throw Exception("curl", 0, "called with invalid parameter '%s'", key.cstr()); - +static Value *curl_getinfo(const String::Body &key, CurlInfo *info, bool fail_on_error=false) { CURLcode res; switch (info->type){ case CurlInfo::CURL_STRING:{ @@ -567,6 +608,11 @@ static Value *curl_getinfo(const String: CURL_GETINFO(d); return new VDouble(d); } + case CurlInfo::CURL_HTTP_VERSION:{ + long l=0; + CURL_GETINFO(l); + return new VString(*new String(curl_http_version_name(l), String::L_TAINTED)); + } } return VVoid::get(); } @@ -576,24 +622,34 @@ static void _curl_info(Request& r, Metho curl_infos=new CurlInfoHash(); if(params.count()==1){ const String &name=params.as_string(0, "name must be string"); - r.write_assign_lang(*curl_getinfo(name)); + CurlInfo *info=curl_infos->get(name); + if(info==0) + throw Exception("curl", 0, "called with invalid parameter '%s'", name.cstr()); + r.write(*curl_getinfo(name, info, true)); } else { VHash& result=*new VHash; for(CurlInfoHash::Iterator i(*curl_infos); i; i.next() ){ - result.get_hash()->put(i.key(), curl_getinfo(i.key(), i.value())); + Value *value=curl_getinfo(i.key(), i.value()); + if(value) + result.get_hash()->put(i.key(), value); } - r.write_no_lang(result); + r.write(result); } } - class Curl_buffer{ public: char *buf; size_t length; size_t buf_size; + ResponseHeaders& headers; + + Curl_buffer(ResponseHeaders& aheaders) : buf((char *)pa_malloc_atomic(MAX_STRING)), length(0), buf_size(MAX_STRING-1), headers(aheaders){} - Curl_buffer() : buf((char *)pa_malloc(MAX_STRING+1)), length(0), buf_size(MAX_STRING){} + void resize(size_t size){ + buf_size=size; + buf=(char *)pa_realloc(buf, size+1); + } }; static int curl_writer(char *data, size_t size, size_t nmemb, Curl_buffer *result){ @@ -602,10 +658,9 @@ static int curl_writer(char *data, size_ size=size*nmemb; if(size>0){ - if(result->length + size >= result->buf_size){ - result->buf_size = result->buf_size*2 + size; - result->buf = (char *)pa_realloc(result->buf, result->buf_size+1); - } + size_t buf_required = result->length + size; + if(buf_required > result->buf_size) + result->resize(buf_required <= result->headers.content_length ? (size_t)result->headers.content_length : result->buf_size*2 + size); memcpy(result->buf+result->length, data, size); result->length += size; } @@ -618,7 +673,15 @@ static int curl_header(char *data, size_ size=size*nmemb; if(size>0){ - result->add_header(pa_strdup(data, size)); + char *header=pa_strdup(data, size); + if(!pa_strncasecmp(header, "HTTP/") && !strchr(header, ':')){ + // response code, clearing possible headers from previous requests + result->clear(); + } else { + result->add_header(header); + if(result->content_length>pa_file_size_limit) + return 0; + } } return size; } @@ -634,15 +697,21 @@ static void _curl_load_action(Request& r CURLcode res; - Curl_buffer body; - CURL_SETOPT(CURLOPT_WRITEFUNCTION, curl_writer, "curl writer function"); - CURL_SETOPT(CURLOPT_WRITEDATA, &body, "curl write buffer"); - // we need a container for headers as VFile fields can be put only after VFile.set ResponseHeaders response; CURL_SETOPT(CURLOPT_HEADERFUNCTION, curl_header, "curl header function"); CURL_SETOPT(CURLOPT_WRITEHEADER, &response, "curl header buffer"); + Curl_buffer body(response); + CURL_SETOPT(CURLOPT_WRITEFUNCTION, curl_writer, "curl writer function"); + CURL_SETOPT(CURLOPT_WRITEDATA, &body, "curl write buffer"); + + if(options().is_post && !options().has_content_length){ + // libcurl bug walkaround. Prior to 7.38 (Debian Jessie) curl passed Content-length: -1 + // after that no Content-length header is passed, that hangs request to nginx. + CURL_SETOPT(CURLOPT_POSTFIELDSIZE, 0, "post content-length"); + } + if((res=f_curl_easy_perform(curl())) != CURLE_OK){ const char *ex_type = 0; switch(res){ @@ -660,6 +729,8 @@ static void _curl_load_action(Request& r case CURLE_SSL_CACERT: case CURLE_SSL_ENGINE_INITFAILED: ex_type = "curl.ssl"; break; + case CURLE_WRITE_ERROR: + check_file_size(response.content_length, *new String(options().url)); break; default: break; } throw Exception( PA_DEFAULT(ex_type, "curl.fail"), 0, "%s", f_curl_easy_strerror(res)); @@ -675,7 +746,7 @@ static void _curl_load_action(Request& r asked_charset=detect_charset(response.content_type.cstr()); if(options().is_text) - asked_charset=charsets.checkBOM(body.buf, body.length, asked_charset); + asked_charset=pa_charsets.checkBOM(body.buf, body.length, asked_charset); if (!asked_charset) asked_charset = options().charset; @@ -715,7 +786,7 @@ static void _curl_load_action(Request& r if(Value *vcookies=vtables->hash().get("SET-COOKIE")) result.fields().put(HTTP_COOKIES_NAME, new VTable(parse_cookies(r, vcookies->get_table()))); - r.write_no_lang(result); + r.write(result); } static void _curl_load(Request& r, MethodParams& params){