--- parser3/src/classes/curl.C 2009/12/04 22:22:13 1.2 +++ parser3/src/classes/curl.C 2012/03/16 09:24:06 1.13 @@ -1,33 +1,34 @@ /** @file Parser: @b curl parser class. - Copyright(c) 2001-2009 ArtLebedev Group(http://www.artlebedev.com) + Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com) */ -static const char * const IDENT_INET_C="$Date: 2009/12/04 22:22:13 $"; +#include "pa_config_includes.h" + +#ifdef HAVE_CURL #include "pa_vmethod_frame.h" #include "pa_request.h" #include "pa_vfile.h" #include "pa_charsets.h" +#include "pa_vstring.h" +#include "pa_vtable.h" +#include "pa_common.h" +#include "pa_http.h" #include "ltdl.h" +volatile const char * IDENT_CURL_C="$Id: curl.C,v 1.13 2012/03/16 09:24:06 moko Exp $"; + class MCurl: public Methoded { public: MCurl(); - -public: // Methoded - bool used_directly() { return true; } }; // global variables DECLARE_CLASS_VAR(curl, new MCurl, 0); -// from file.C -extern bool is_text_mode(const String& mode); - -#ifdef HAVE_CURL #include "curl.h" typedef CURL *(*t_curl_easy_init)(); t_curl_easy_init f_curl_easy_init; @@ -35,7 +36,11 @@ typedef CURLcode (*t_curl_easy_setopt)(C typedef CURLcode (*t_curl_easy_perform)(CURL *); t_curl_easy_perform f_curl_easy_perform; typedef void (*t_curl_easy_cleanup)(CURL *); t_curl_easy_cleanup f_curl_easy_cleanup; typedef const char *(*t_curl_easy_strerror)(CURLcode); t_curl_easy_strerror f_curl_easy_strerror; +typedef CURLcode (*t_curl_easy_getinfo)(CURL *curl, CURLINFO info, ...); t_curl_easy_getinfo f_curl_easy_getinfo; typedef struct curl_slist *(*t_curl_slist_append)(struct curl_slist *,const char *); t_curl_slist_append f_curl_slist_append; +typedef const char *(*t_curl_version)(); t_curl_version f_curl_version; +typedef CURLFORMcode (*t_curl_formadd)(struct curl_httppost **httppost, struct curl_httppost **last_post, ...); t_curl_formadd f_curl_formadd; +typedef void (*t_curl_formfree)(struct curl_httppost *form); t_curl_formfree f_curl_formfree; #define GLINK(name) f_##name=(t_##name)lt_dlsym(handle, #name); #define DLINK(name) GLINK(name) if(!f_##name) return "function " #name " was not found"; @@ -54,31 +59,36 @@ const char *dlink(const char *dlopen_fil DLINK(curl_easy_init); DLINK(curl_easy_cleanup); + DLINK(curl_version); DLINK(curl_easy_setopt); DLINK(curl_easy_perform); DLINK(curl_easy_strerror); + DLINK(curl_easy_getinfo); DLINK(curl_slist_append); + DLINK(curl_formadd); + DLINK(curl_formfree); return 0; } class ParserOptions { public: - const char *filename; + const String *filename; const String *content_type; bool is_text; - Charset *charset; + Charset *charset, *response_charset; + struct curl_httppost *f_post; - ParserOptions() : filename(0), content_type(0), is_text(true), charset(0) {} + ParserOptions() : filename(0), content_type(0), is_text(true), charset(0), response_charset(0), f_post(0){} + ~ParserOptions() { + f_curl_formfree(f_post); + } + }; -// using thread local variables instead of keeping them in request -// not necessary for cgi version -#ifdef WIN32 -#define __thread __declspec(thread) -#endif -__thread CURL *fcurl=0; -__thread ParserOptions *foptions; +// using TLS instead of keeping variables in request +THREAD_LOCAL CURL *fcurl = 0; +THREAD_LOCAL ParserOptions *foptions = 0; static CURL *curl(){ if(!fcurl) @@ -94,12 +104,13 @@ static ParserOptions &options(){ // using temporal object scheme to garanty cleanup call class Temp_curl { - CURL* saved_curl; - ParserOptions* saved_options; + CURL *saved_curl; + ParserOptions *saved_options; public: - Temp_curl() : saved_curl(fcurl) { + Temp_curl() : saved_curl(fcurl), saved_options(foptions){ fcurl = f_curl_easy_init(); foptions = new ParserOptions(); + f_curl_easy_setopt(fcurl, CURLOPT_POSTFIELDSIZE, 0); // fix libcurl bug } ~Temp_curl() { f_curl_easy_cleanup(fcurl); @@ -141,6 +152,13 @@ static void _curl_session(Request& r, Me temp_curl(_curl_session_action, r, params); } +static void _curl_version_action(Request& r, MethodParams& ){ + r.write_no_lang(*new VString(*new String(f_curl_version(), String::L_TAINTED))); +} + +static void _curl_version(Request& r, MethodParams& params){ + fcurl ? _curl_version_action(r, params) : temp_curl(_curl_version_action, r, params); +} static char *str_lower(const char *str){ char *result=pa_strdup(str); @@ -162,15 +180,18 @@ public: enum OptionType { CURL_STRING, CURL_URLENCODE, // url-encoded string + CURL_URL, CURL_INT, CURL_POST, + CURL_FORM, CURL_HEADERS, CURL_FILE, PARSER_LIBRARY, PARSER_NAME, PARSER_CONTENT_TYPE, PARSER_MODE, - PARSER_CHARSET + PARSER_CHARSET, + PARSER_RESPONSE_CHARSET }; CURLoption id; @@ -183,11 +204,11 @@ public: CurlOptionHash() { #define CURL_OPT(type, name) put(str_lower(#name),new CurlOption(CURLOPT_##name, CurlOption::type)); #define PARSER_OPT(type, name) put(name,new CurlOption((CURLoption)0, CurlOption::type)); - CURL_OPT(CURL_URLENCODE, URL); + CURL_OPT(CURL_URL, URL); CURL_OPT(CURL_STRING, INTERFACE); CURL_OPT(CURL_INT, LOCALPORT); CURL_OPT(CURL_INT, PORT); - + CURL_OPT(CURL_INT, HTTPAUTH); CURL_OPT(CURL_STRING, USERPWD); @@ -196,6 +217,8 @@ public: CURL_OPT(CURL_STRING, PASSWORD); #endif + CURL_OPT(CURL_URLENCODE, USERAGENT); + CURL_OPT(CURL_URLENCODE, REFERER); CURL_OPT(CURL_INT, AUTOREFERER); CURL_OPT(CURL_STRING, ENCODING); // gzip or deflate CURL_OPT(CURL_INT, FOLLOWLOCATION); @@ -203,9 +226,12 @@ public: CURL_OPT(CURL_INT, POST); CURL_OPT(CURL_INT, HTTPGET); + CURL_OPT(CURL_INT, NOBODY); + CURL_OPT(CURL_STRING, CUSTOMREQUEST); CURL_OPT(CURL_POST, POSTFIELDS); // hopefully is safe too CURL_OPT(CURL_POST, COPYPOSTFIELDS); + CURL_OPT(CURL_FORM, HTTPPOST); CURL_OPT(CURL_HEADERS, HTTPHEADER); CURL_OPT(CURL_URLENCODE, COOKIE); @@ -216,16 +242,31 @@ public: CURL_OPT(CURL_INT, HTTP_CONTENT_DECODING); CURL_OPT(CURL_INT, HTTP_TRANSFER_DECODING); + CURL_OPT(CURL_INT, MAXREDIRS); +#ifdef CURLOPT_POSTREDIR + CURL_OPT(CURL_INT, POSTREDIR); +#endif + + CURL_OPT(CURL_STRING, RANGE); + CURL_OPT(CURL_INT, TIMEOUT); CURL_OPT(CURL_INT, TIMEOUT_MS); CURL_OPT(CURL_INT, LOW_SPEED_LIMIT); CURL_OPT(CURL_INT, LOW_SPEED_TIME); CURL_OPT(CURL_INT, MAXCONNECTS); + CURL_OPT(CURL_STRING, PROXY); + CURL_OPT(CURL_INT, PROXYPORT); + CURL_OPT(CURL_INT, PROXYTYPE); + CURL_OPT(CURL_INT, HTTPPROXYTUNNEL); + CURL_OPT(CURL_STRING, PROXYUSERPWD); + CURL_OPT(CURL_INT, PROXYAUTH); + CURL_OPT(CURL_INT, FRESH_CONNECT); CURL_OPT(CURL_INT, FORBID_REUSE); CURL_OPT(CURL_INT, CONNECTTIMEOUT); CURL_OPT(CURL_INT, CONNECTTIMEOUT_MS); + CURL_OPT(CURL_INT, FAILONERROR); CURL_OPT(CURL_FILE, SSLCERT); CURL_OPT(CURL_STRING, SSLCERTTYPE); @@ -255,6 +296,7 @@ public: PARSER_OPT(PARSER_CONTENT_TYPE, "content-type"); PARSER_OPT(PARSER_MODE, "mode"); PARSER_OPT(PARSER_CHARSET, "charset"); + PARSER_OPT(PARSER_RESPONSE_CHARSET, "response-charset"); } } *curl_options=0; @@ -272,15 +314,52 @@ static struct curl_slist *curl_headers(H for(HashStringValue::Iterator i(*value_hash); i; i.next() ){ String header = - String(capitalize(i.key().cstr()), String::L_URI) + String(pa_http_safe_header_name(capitalize(i.key().cstr())), String::L_AS_IS) << ": " - << String(i.value()->as_string(), String::L_URI); + << String(i.value()->as_string(), String::L_HTTP_HEADER); slist=f_curl_slist_append(slist, curl_urlencode(header, r)); } return slist; } +static const char* curl_transcode(const String &s, Request& r){ + return options().charset ? Charset::transcode(s.cstr(), r.charsets.source(), *options().charset).cstr() : s.cstr(); +} + +static void curl_form(HashStringValue *value_hash, Request& r){ + struct curl_httppost *f_last=0; + for(HashStringValue::Iterator i(*value_hash); i; i.next() ){ + const char *key = curl_transcode(String(i.key().cstr()), r); + if(const String* svalue = i.value()->get_string()){ + // string + f_curl_formadd(&options().f_post, &f_last, + CURLFORM_PTRNAME, key, + CURLFORM_PTRCONTENTS, curl_transcode(String(svalue->cstr()), r), + CURLFORM_END); + } else if(Table* tvalue = i.value()->get_table()){ + // table + for(size_t t = 0; t < tvalue->count(); t++) { + f_curl_formadd(&options().f_post, &f_last, + CURLFORM_PTRNAME, key, + CURLFORM_PTRCONTENTS, curl_transcode(String(tvalue->get(t)->get(0)->cstr()), r), + CURLFORM_END); + } + } else if(VFile* fvalue=static_cast(i.value()->as("file"))){ + // file + f_curl_formadd(&options().f_post, &f_last, + CURLFORM_PTRNAME, key, + CURLFORM_BUFFER, curl_transcode(String(fvalue->fields().get("name")->as_string(), String::L_FILE_SPEC), r), + CURLFORM_BUFFERLENGTH, (long)fvalue->value_size(), + CURLFORM_BUFFERPTR, fvalue->value_ptr(), + CURLFORM_CONTENTTYPE, fvalue->fields().get("content-type")->as_string().taint_cstr(String::L_URI), + CURLFORM_END); + } else { + throw Exception("curl", new String(i.key(), String::L_TAINTED), "is %s, form option value can be string, table or file only", i.value()->type()); + } + } +} + static void curl_setopt(HashStringValue::key_type key, HashStringValue::value_type value, Request& r) { CurlOption *opt=curl_options->get(key); @@ -303,6 +382,15 @@ static void curl_setopt(HashStringValue: res=f_curl_easy_setopt(curl(), opt->id, value_str); break; } + case CurlOption::CURL_URL:{ + // url-encoded string curl_url option + const String url = v.as_string(); + if(!url.starts_with("http://") && !url.starts_with("https://")) + throw Exception("curl", 0, "failed to set option '%s': invalid url scheme '%s'", key.cstr(), url.cstr()); + const char *value_str=curl_urlencode(url, r); + res=f_curl_easy_setopt(curl(), opt->id, value_str); + break; + } case CurlOption::CURL_INT:{ // integer curl option long value_int=(long)v.as_double(); @@ -321,6 +409,19 @@ static void curl_setopt(HashStringValue: } break; } + case CurlOption::CURL_FORM:{ + HashStringValue *value_hash = v.get_hash(); + if(value_hash){ + curl_form(value_hash, r); + } else if(v.get_string()->is_empty()){ + f_curl_formfree(options().f_post); + options().f_post = 0; + } else { + throw Exception("curl", 0, "%s must be a hash", key.cstr()); + } + res=f_curl_easy_setopt(curl(), CURLOPT_HTTPPOST, foptions->f_post); + break; + } case CurlOption::CURL_HEADERS:{ // http headers curl option HashStringValue *value_hash=v.get_hash(); @@ -343,7 +444,7 @@ static void curl_setopt(HashStringValue: } case CurlOption::PARSER_NAME:{ // 'name' parser option - options().filename=v.as_string().taint_cstr(String::L_FILE_SPEC); + options().filename=&v.as_string(); break; } case CurlOption::PARSER_CONTENT_TYPE:{ @@ -353,7 +454,7 @@ static void curl_setopt(HashStringValue: } case CurlOption::PARSER_MODE:{ // 'mode' parser option - options().is_text=is_text_mode(v.as_string()); + options().is_text=VFile::is_text_mode(v.as_string()); break; } case CurlOption::PARSER_CHARSET:{ @@ -361,20 +462,25 @@ static void curl_setopt(HashStringValue: options().charset=&::charsets.get(v.as_string().change_case(r.charsets.source(), String::CC_UPPER)); break; } + case CurlOption::PARSER_RESPONSE_CHARSET:{ + // 'response-charset' parser option + options().response_charset=&::charsets.get(v.as_string().change_case(r.charsets.source(), String::CC_UPPER)); + break; + } } if(res != CURLE_OK) throw Exception("curl", 0, "failed to set option '%s': %s", key.cstr(), f_curl_easy_strerror(res)); } -static void _curl_option(Request& r, MethodParams& params){ +static void _curl_options(Request& r, MethodParams& params){ if(curl_options==0) curl_options=new CurlOptionHash(); if(HashStringValue* options=params.as_no_junction(0, OPTIONS_MUST_NOT_BE_CODE).get_hash()){ options->for_each(curl_setopt, r); } else - throw Exception("curl", 0, "options must be hash"); + throw Exception("curl", 0, OPTIONS_MUST_BE_HASH); } @@ -426,7 +532,7 @@ static int curl_header(char *data, size_ static void _curl_load_action(Request& r, MethodParams& params){ if(params.count()==1) - _curl_option(r, params); + _curl_options(r, params); CURLcode res; @@ -440,13 +546,37 @@ static void _curl_load_action(Request& r CURL_SETOPT(CURLOPT_WRITEHEADER, &headers, "curl header buffer"); if((res=f_curl_easy_perform(curl())) != CURLE_OK){ - throw Exception("curl", 0, "failed to exec curl session: %s", f_curl_easy_strerror(res)); + char *ex_type = 0; + switch(res){ + case CURLE_OPERATION_TIMEDOUT: + ex_type = "curl.timeout"; break; + case CURLE_COULDNT_RESOLVE_HOST: + ex_type = "curl.host"; break; + case CURLE_COULDNT_CONNECT: + ex_type = "curl.connect"; break; + case CURLE_HTTP_RETURNED_ERROR: + ex_type = "curl.status"; break; + case CURLE_SSL_CONNECT_ERROR: + case CURLE_SSL_CERTPROBLEM: + case CURLE_SSL_CIPHER: + case CURLE_SSL_CACERT: + case CURLE_SSL_ENGINE_INITFAILED: + ex_type = "curl.ssl"; break; + } + throw Exception( ex_type ? ex_type : "curl.fail", 0, "%s", f_curl_easy_strerror(res)); } // assure trailing zero body.buf[body.length]=0; - Charset *asked_charset=options().charset; + VFile& result=*new VFile; + + String::Body ct_header = headers.get(HTTP_CONTENT_TYPE_UPPER); + Charset *asked_charset = options().response_charset; + if (asked_charset == 0){ + Charset *remote_charset = ct_header.is_empty() ? 0 : detect_charset(ct_header.trim(String::TRIM_BOTH, " \t\n\r").cstr()); + asked_charset = remote_charset ? remote_charset : options().charset; + } if(options().is_text && asked_charset != 0){ String::C c=Charset::transcode(String::C(body.buf, body.length), *asked_charset, r.charsets.source()); @@ -454,14 +584,16 @@ static void _curl_load_action(Request& r body.length=c.length; } - Value* vcontent_type= - options().content_type ? new VString(*options().content_type) : - options().filename ? new VString(r.mime_type_of(options().filename)) : 0; - - VFile& result=*new VFile; - result.set(true /*tainted*/, body.buf, body.length, options().filename, vcontent_type); + result.set(true /*tainted*/, body.buf, body.length, options().filename + , options().content_type ? new VString(*options().content_type) : 0 + , &r); result.set_mode(options().is_text); + long http_status = 0; + if(f_curl_easy_getinfo(curl(), CURLINFO_RESPONSE_CODE, &http_status) == CURLE_OK){ + result.fields().put("status", new VInt(http_status)); + } + for(HASH_STRING::Iterator i(headers); i; i.next() ){ String::Body key=i.key(); String::Body value=i.value(); @@ -479,13 +611,18 @@ static void _curl_load(Request& r, Metho fcurl ? _curl_load_action(r, params) : temp_curl(_curl_load_action, r, params); } -#endif // HAVE_CURL - // constructor MCurl::MCurl(): Methoded("curl") { -#ifdef HAVE_CURL add_native_method("session", Method::CT_STATIC, _curl_session, 1, 1); - add_native_method("option", Method::CT_STATIC, _curl_option, 1, 1); + add_native_method("version", Method::CT_STATIC, _curl_version, 0, 0); + add_native_method("options", Method::CT_STATIC, _curl_options, 1, 1); add_native_method("load", Method::CT_STATIC, _curl_load, 0, 1); -#endif // HAVE_CURL } + +#else // HAVE_CURL + +#include "classes.h" +// global variable +DECLARE_CLASS_VAR(curl, 0, 0); // fictive + +#endif // HAVE_CURL