Annotation of parser3/src/classes/curl.C, revision 1.1
1.1 ! misha 1: /** @file
! 2: Parser: @b curl parser class.
! 3:
! 4: Copyright(c) 2001-2009 ArtLebedev Group(http://www.artlebedev.com)
! 5: */
! 6:
! 7: static const char * const IDENT_INET_C="$Date: 2009-04-10 11:31:06 $";
! 8:
! 9: #include "pa_vmethod_frame.h"
! 10: #include "pa_request.h"
! 11: #include "pa_vfile.h"
! 12: #include "pa_charsets.h"
! 13: #include "ltdl.h"
! 14:
! 15: class MCurl: public Methoded {
! 16: public:
! 17: MCurl();
! 18:
! 19: public: // Methoded
! 20: bool used_directly() { return true; }
! 21: };
! 22:
! 23: // global variables
! 24:
! 25: DECLARE_CLASS_VAR(curl, new MCurl, 0);
! 26:
! 27: // from file.C
! 28: extern bool is_text_mode(const String& mode);
! 29:
! 30: #ifdef HAVE_CURL
! 31: #include "curl.h"
! 32:
! 33: typedef CURL *(*t_curl_easy_init)(); t_curl_easy_init f_curl_easy_init;
! 34: typedef CURLcode (*t_curl_easy_setopt)(CURL *, CURLoption option, ...); t_curl_easy_setopt f_curl_easy_setopt;
! 35: typedef CURLcode (*t_curl_easy_perform)(CURL *); t_curl_easy_perform f_curl_easy_perform;
! 36: typedef void (*t_curl_easy_cleanup)(CURL *); t_curl_easy_cleanup f_curl_easy_cleanup;
! 37: typedef const char *(*t_curl_easy_strerror)(CURLcode); t_curl_easy_strerror f_curl_easy_strerror;
! 38: typedef struct curl_slist *(*t_curl_slist_append)(struct curl_slist *,const char *); t_curl_slist_append f_curl_slist_append;
! 39:
! 40: #define GLINK(name) f_##name=(t_##name)lt_dlsym(handle, #name);
! 41: #define DLINK(name) GLINK(name) if(!f_##name) return "function " #name " was not found";
! 42:
! 43: const char *dlink(const char *dlopen_file_spec) {
! 44: if(lt_dlinit())
! 45: return lt_dlerror();
! 46:
! 47: lt_dlhandle handle=lt_dlopen(dlopen_file_spec);
! 48:
! 49: if(!handle){
! 50: if(const char* result=lt_dlerror())
! 51: return result;
! 52: return "can not open the dynamic link module";
! 53: }
! 54:
! 55: DLINK(curl_easy_init);
! 56: DLINK(curl_easy_cleanup);
! 57: DLINK(curl_easy_setopt);
! 58: DLINK(curl_easy_perform);
! 59: DLINK(curl_easy_strerror);
! 60: DLINK(curl_slist_append);
! 61: return 0;
! 62: }
! 63:
! 64:
! 65: class ParserOptions {
! 66: public:
! 67: const char *filename;
! 68: const String *content_type;
! 69: bool is_text;
! 70: Charset *charset;
! 71:
! 72: ParserOptions() : filename(0), content_type(0), is_text(true), charset(0) {}
! 73: };
! 74:
! 75: // using thread local variables instead of keeping them in request
! 76: // not necessary for cgi version
! 77: #ifdef WIN32
! 78: #define __thread __declspec(thread)
! 79: #endif
! 80: __thread CURL *fcurl=0;
! 81: __thread ParserOptions *foptions;
! 82:
! 83: static CURL *curl(){
! 84: if(!fcurl)
! 85: throw Exception("curl", 0, "outside of 'session' operator");
! 86: return fcurl;
! 87: }
! 88:
! 89: static ParserOptions &options(){
! 90: if(!foptions)
! 91: throw Exception("curl", 0, "outside of 'session' operator");
! 92: return *foptions;
! 93: }
! 94:
! 95: // using temporal object scheme to garanty cleanup call
! 96: class Temp_curl {
! 97: CURL* saved_curl;
! 98: ParserOptions* saved_options;
! 99: public:
! 100: Temp_curl() : saved_curl(fcurl) {
! 101: fcurl = f_curl_easy_init();
! 102: foptions = new ParserOptions();
! 103: }
! 104: ~Temp_curl() {
! 105: f_curl_easy_cleanup(fcurl);
! 106: fcurl = saved_curl;
! 107: delete foptions;
! 108: foptions = saved_options;
! 109: }
! 110: };
! 111:
! 112: bool curl_linked = false;
! 113: #ifdef WIN32
! 114: const char *curl_library="libcurl.dll";
! 115: #else
! 116: const char *curl_library="libcurl.so";
! 117: #endif
! 118:
! 119: const char *curl_status = 0;
! 120:
! 121: static void temp_curl(void (*action)(Request&, MethodParams&), Request& r, MethodParams& params){
! 122: if(!curl_linked){
! 123: curl_linked=true;
! 124: curl_status=dlink(curl_library);
! 125: }
! 126:
! 127: if(curl_status == 0){
! 128: Temp_curl temp_curl;
! 129: action(r,params);
! 130: } else {
! 131: throw Exception("curl", 0, "failed to load curl library %s: %s", curl_library, curl_status);
! 132: }
! 133: }
! 134:
! 135: static void _curl_session_action(Request& r, MethodParams& params){
! 136: Value& body_code=params.as_junction(0, "body must be code");
! 137: r.process_write(body_code);
! 138: }
! 139:
! 140: static void _curl_session(Request& r, MethodParams& params){
! 141: temp_curl(_curl_session_action, r, params);
! 142: }
! 143:
! 144:
! 145: static char *str_lower(const char *str){
! 146: char *result=pa_strdup(str);
! 147: for(char* c=result; *c; c++)
! 148: *c=(char)tolower((unsigned char)*c);
! 149: return result;
! 150: }
! 151:
! 152: static char *str_upper(const char *str){
! 153: char *result=pa_strdup(str);
! 154: for(char* c=result; *c; c++)
! 155: *c=(char)toupper((unsigned char)*c);
! 156: return result;
! 157: }
! 158:
! 159: class CurlOption {
! 160: public:
! 161:
! 162: enum OptionType {
! 163: CURL_STRING,
! 164: CURL_URLENCODE, // url-encoded string
! 165: CURL_INT,
! 166: CURL_POST,
! 167: CURL_HEADERS,
! 168: CURL_FILE,
! 169: PARSER_LIBRARY,
! 170: PARSER_NAME,
! 171: PARSER_CONTENT_TYPE,
! 172: PARSER_MODE,
! 173: PARSER_CHARSET
! 174: };
! 175:
! 176: CURLoption id;
! 177: OptionType type;
! 178: CurlOption(CURLoption aid, OptionType atype): id(aid), type(atype) {}
! 179: };
! 180:
! 181: class CurlOptionHash: public HashString<CurlOption*> {
! 182: public:
! 183: CurlOptionHash() {
! 184: #define CURL_OPT(type, name) put(str_lower(#name),new CurlOption(CURLOPT_##name, CurlOption::type));
! 185: #define PARSER_OPT(type, name) put(name,new CurlOption((CURLoption)0, CurlOption::type));
! 186: CURL_OPT(CURL_URLENCODE, URL);
! 187: CURL_OPT(CURL_STRING, INTERFACE);
! 188: CURL_OPT(CURL_INT, LOCALPORT);
! 189: CURL_OPT(CURL_INT, PORT);
! 190:
! 191: CURL_OPT(CURL_INT, HTTPAUTH);
! 192: CURL_OPT(CURL_STRING, USERPWD);
! 193:
! 194: CURL_OPT(CURL_STRING, USERNAME);
! 195: CURL_OPT(CURL_STRING, PASSWORD);
! 196:
! 197: CURL_OPT(CURL_INT, AUTOREFERER);
! 198: CURL_OPT(CURL_STRING, ENCODING); // gzip or deflate
! 199: CURL_OPT(CURL_INT, FOLLOWLOCATION);
! 200: CURL_OPT(CURL_INT, UNRESTRICTED_AUTH);
! 201:
! 202: CURL_OPT(CURL_INT, POST);
! 203: CURL_OPT(CURL_INT, HTTPGET);
! 204:
! 205: CURL_OPT(CURL_POST, POSTFIELDS); // hopefully is safe too
! 206: CURL_OPT(CURL_POST, COPYPOSTFIELDS);
! 207:
! 208: CURL_OPT(CURL_HEADERS, HTTPHEADER);
! 209: CURL_OPT(CURL_URLENCODE, COOKIE);
! 210: CURL_OPT(CURL_URLENCODE, COOKIELIST);
! 211: CURL_OPT(CURL_INT, COOKIESESSION);
! 212:
! 213: CURL_OPT(CURL_INT, IGNORE_CONTENT_LENGTH);
! 214: CURL_OPT(CURL_INT, HTTP_CONTENT_DECODING);
! 215: CURL_OPT(CURL_INT, HTTP_TRANSFER_DECODING);
! 216:
! 217: CURL_OPT(CURL_INT, TIMEOUT);
! 218: CURL_OPT(CURL_INT, TIMEOUT_MS);
! 219: CURL_OPT(CURL_INT, LOW_SPEED_LIMIT);
! 220: CURL_OPT(CURL_INT, LOW_SPEED_TIME);
! 221: CURL_OPT(CURL_INT, MAXCONNECTS);
! 222:
! 223: CURL_OPT(CURL_INT, FRESH_CONNECT);
! 224: CURL_OPT(CURL_INT, FORBID_REUSE);
! 225: CURL_OPT(CURL_INT, CONNECTTIMEOUT);
! 226: CURL_OPT(CURL_INT, CONNECTTIMEOUT_MS);
! 227:
! 228: CURL_OPT(CURL_FILE, SSLCERT);
! 229: CURL_OPT(CURL_STRING, SSLCERTTYPE);
! 230: CURL_OPT(CURL_FILE, SSLKEY);
! 231: CURL_OPT(CURL_STRING, SSLKEYTYPE);
! 232: CURL_OPT(CURL_STRING, KEYPASSWD);
! 233: CURL_OPT(CURL_STRING, SSLENGINE);
! 234: CURL_OPT(CURL_STRING, SSLENGINE_DEFAULT);
! 235:
! 236: CURL_OPT(CURL_FILE, ISSUERCERT);
! 237: CURL_OPT(CURL_FILE, CRLFILE);
! 238:
! 239: CURL_OPT(CURL_STRING, CAINFO);
! 240: CURL_OPT(CURL_STRING, CAPATH);
! 241: CURL_OPT(CURL_INT, SSL_VERIFYPEER);
! 242: CURL_OPT(CURL_INT, SSL_VERIFYHOST);
! 243: CURL_OPT(CURL_STRING, SSL_CIPHER_LIST);
! 244: CURL_OPT(CURL_INT, SSL_SESSIONID_CACHE);
! 245:
! 246: PARSER_OPT(PARSER_LIBRARY, "library");
! 247: PARSER_OPT(PARSER_NAME, "name");
! 248: PARSER_OPT(PARSER_CONTENT_TYPE, "content-type");
! 249: PARSER_OPT(PARSER_MODE, "mode");
! 250: PARSER_OPT(PARSER_CHARSET, "charset");
! 251: }
! 252:
! 253: } *curl_options=0;
! 254:
! 255: static const char *curl_urlencode(const String &s, Request& r){
! 256: if(options().charset){
! 257: Temp_client_charset temp(r.charsets, *options().charset);
! 258: return s.untaint_and_transcode_cstr(String::L_URI, &r.charsets);
! 259: } else
! 260: return s.untaint_cstr(String::L_URI);
! 261: }
! 262:
! 263: static struct curl_slist *curl_headers(HashStringValue *value_hash, Request& r) {
! 264: struct curl_slist *slist=NULL;
! 265:
! 266: for(HashStringValue::Iterator i(*value_hash); i; i.next() ){
! 267: String header =
! 268: String(capitalize(i.key().cstr()), String::L_URI)
! 269: << ": "
! 270: << String(i.value()->as_string(), String::L_URI);
! 271:
! 272: slist=f_curl_slist_append(slist, curl_urlencode(header, r));
! 273: }
! 274: return slist;
! 275: }
! 276:
! 277: static void curl_setopt(HashStringValue::key_type key, HashStringValue::value_type value, Request& r) {
! 278: CurlOption *opt=curl_options->get(key);
! 279:
! 280: if(opt==0)
! 281: throw Exception("curl", 0, "called with invalid option '%s'", key.cstr());
! 282:
! 283: CURLcode res = CURLE_OK;
! 284: Value &v=r.process_to_value(*value);
! 285:
! 286: switch (opt->type){
! 287: case CurlOption::CURL_STRING:{
! 288: // string curl option
! 289: const char *value_str=v.as_string().cstr();
! 290: res=f_curl_easy_setopt(curl(), opt->id, value_str);
! 291: break;
! 292: }
! 293: case CurlOption::CURL_URLENCODE:{
! 294: // url-encoded string curl option
! 295: const char *value_str=curl_urlencode(v.as_string(), r);
! 296: res=f_curl_easy_setopt(curl(), opt->id, value_str);
! 297: break;
! 298: }
! 299: case CurlOption::CURL_INT:{
! 300: // int curl option
! 301: int value_int=(int)v.as_double();
! 302: res=f_curl_easy_setopt(curl(), opt->id, value_int);
! 303: break;
! 304: }
! 305: case CurlOption::CURL_POST:{
! 306: // http post curl option
! 307: if(v.get_string()){
! 308: if( (res=f_curl_easy_setopt(curl(), CURLOPT_POSTFIELDSIZE, -1)) == CURLE_OK )
! 309: res=f_curl_easy_setopt(curl(), opt->id, curl_urlencode(v.as_string(), r));
! 310: } else {
! 311: VFile *file=v.as_vfile(String::L_AS_IS);
! 312: if( (res=f_curl_easy_setopt(curl(), CURLOPT_POSTFIELDSIZE, file->value_size())) == CURLE_OK )
! 313: res=f_curl_easy_setopt(curl(), opt->id, file->value_ptr());
! 314: }
! 315: break;
! 316: }
! 317: case CurlOption::CURL_HEADERS:{
! 318: // http headers curl option
! 319: HashStringValue *value_hash=v.get_hash();
! 320: res=f_curl_easy_setopt(curl(), opt->id, value_hash ? curl_headers(value_hash, r) : 0);
! 321: break;
! 322: }
! 323: case CurlOption::CURL_FILE:{
! 324: // file-spec curl option
! 325: const char *value_str=r.absolute(v.as_string()).taint_cstr(String::L_FILE_SPEC);
! 326: res=f_curl_easy_setopt(curl(), opt->id, value_str);
! 327: break;
! 328: }
! 329: case CurlOption::PARSER_LIBRARY:{
! 330: // 'library' parser option
! 331: if(fcurl==0){
! 332: curl_library=v.as_string().taint_cstr(String::L_FILE_SPEC);
! 333: } else
! 334: throw Exception("curl", 0, "failed to set option '%s': %s", key.cstr(), "already loaded");
! 335: break;
! 336: }
! 337: case CurlOption::PARSER_NAME:{
! 338: // 'name' parser option
! 339: options().filename=v.as_string().taint_cstr(String::L_FILE_SPEC);
! 340: break;
! 341: }
! 342: case CurlOption::PARSER_CONTENT_TYPE:{
! 343: // 'content-type' parser option
! 344: options().content_type=&v.as_string();
! 345: break;
! 346: }
! 347: case CurlOption::PARSER_MODE:{
! 348: // 'mode' parser option
! 349: options().is_text=is_text_mode(v.as_string());
! 350: break;
! 351: }
! 352: case CurlOption::PARSER_CHARSET:{
! 353: // 'charset' parser option
! 354: options().charset=&::charsets.get(v.as_string().change_case(r.charsets.source(), String::CC_UPPER));
! 355: break;
! 356: }
! 357: }
! 358:
! 359: if(res != CURLE_OK)
! 360: throw Exception("curl", 0, "failed to set option '%s': %s", key.cstr(), f_curl_easy_strerror(res));
! 361: }
! 362:
! 363: static void _curl_option(Request& r, MethodParams& params){
! 364: if(curl_options==0)
! 365: curl_options=new CurlOptionHash();
! 366:
! 367: if(HashStringValue* options=params.as_no_junction(0, OPTIONS_MUST_NOT_BE_CODE).get_hash()){
! 368: options->for_each<Request&>(curl_setopt, r);
! 369: } else
! 370: throw Exception("curl", 0, "options must be hash");
! 371: }
! 372:
! 373:
! 374: class Curl_buffer{
! 375: public:
! 376: char *buf;
! 377: size_t length;
! 378: size_t buf_size;
! 379:
! 380: Curl_buffer() : buf((char *)pa_malloc(MAX_STRING+1)), length(0), buf_size(MAX_STRING){}
! 381: };
! 382:
! 383: static int curl_writer(char *data, size_t size, size_t nmemb, Curl_buffer *result){
! 384: if(result == 0)
! 385: return 0;
! 386:
! 387: size=size*nmemb;
! 388: if(size>0){
! 389: if(result->length + size >= result->buf_size){
! 390: result->buf_size = result->buf_size*2 + size;
! 391: result->buf = (char *)pa_realloc(result->buf, result->buf_size+1);
! 392: }
! 393: memcpy(result->buf+result->length, data, size);
! 394: result->length += size;
! 395: }
! 396: return size;
! 397: }
! 398:
! 399: static int curl_header(char *data, size_t size, size_t nmemb, HASH_STRING<char *> *result){
! 400: if(result == 0)
! 401: return 0;
! 402:
! 403: size=size*nmemb;
! 404: if(size>0){
! 405: char *line=pa_strdup(data, size);
! 406: char *value=lsplit(line,':');
! 407: if(value && *line){
! 408: // we need only headers, not the response code
! 409: result->put(str_upper(line), value);
! 410: }
! 411: }
! 412: return size;
! 413: }
! 414:
! 415: #define CURL_SETOPT(option, arg, message) \
! 416: if( (res=f_curl_easy_setopt(curl(), option, arg)) != CURLE_OK){ \
! 417: throw Exception("curl", 0, "failed to set " message ": %s", f_curl_easy_strerror(res)); \
! 418: }
! 419:
! 420: static void _curl_load_action(Request& r, MethodParams& params){
! 421: if(params.count()==1)
! 422: _curl_option(r, params);
! 423:
! 424: CURLcode res;
! 425:
! 426: Curl_buffer body;
! 427: CURL_SETOPT(CURLOPT_WRITEFUNCTION, curl_writer, "curl writer function");
! 428: CURL_SETOPT(CURLOPT_WRITEDATA, &body, "curl write buffer");
! 429:
! 430: // we need a container for headers as VFile fields can be put only after VFile.set
! 431: HASH_STRING<char *> headers;
! 432: CURL_SETOPT(CURLOPT_HEADERFUNCTION, curl_header, "curl header function");
! 433: CURL_SETOPT(CURLOPT_WRITEHEADER, &headers, "curl header buffer");
! 434:
! 435: if((res=f_curl_easy_perform(curl())) != CURLE_OK){
! 436: throw Exception("curl", 0, "failed to exec curl session: %s", f_curl_easy_strerror(res));
! 437: }
! 438:
! 439: // assure trailing zero
! 440: body.buf[body.length]=0;
! 441:
! 442: Charset *asked_charset=options().charset;
! 443:
! 444: if(options().is_text && asked_charset != 0){
! 445: String::C c=Charset::transcode(String::C(body.buf, body.length), *asked_charset, r.charsets.source());
! 446: body.buf=(char *)c.str;
! 447: body.length=c.length;
! 448: }
! 449:
! 450: Value* vcontent_type=
! 451: options().content_type ? new VString(*options().content_type) :
! 452: options().filename ? new VString(r.mime_type_of(options().filename)) : 0;
! 453:
! 454: VFile& result=*new VFile;
! 455: result.set(true /*tainted*/, body.buf, body.length, options().filename, vcontent_type);
! 456: result.set_mode(options().is_text);
! 457:
! 458: for(HASH_STRING<char *>::Iterator i(headers); i; i.next() ){
! 459: String::Body key=i.key();
! 460: String::Body value=i.value();
! 461: if(asked_charset){
! 462: key=Charset::transcode(key, *asked_charset, r.charsets.source());
! 463: value=Charset::transcode(value, *asked_charset, r.charsets.source());
! 464: }
! 465: result.fields().put(key, new VString(*new String(value.trim(String::TRIM_BOTH, " \t\n\r"), String::L_TAINTED)));
! 466: }
! 467:
! 468: r.write_no_lang(result);
! 469: }
! 470:
! 471: static void _curl_load(Request& r, MethodParams& params){
! 472: fcurl ? _curl_load_action(r, params) : temp_curl(_curl_load_action, r, params);
! 473: }
! 474:
! 475: #endif // HAVE_CURL
! 476:
! 477: // constructor
! 478: MCurl::MCurl(): Methoded("curl") {
! 479: #ifdef HAVE_CURL
! 480: add_native_method("session", Method::CT_STATIC, _curl_session, 1, 1);
! 481: add_native_method("option", Method::CT_STATIC, _curl_option, 1, 1);
! 482: add_native_method("load", Method::CT_STATIC, _curl_load, 0, 1);
! 483: #endif // HAVE_CURL
! 484: }
E-mail: