Annotation of parser3/src/main/pa_common.C, revision 1.213
1.15 paf 1: /** @file
1.16 paf 2: Parser: commonly functions.
3:
1.205 paf 4: Copyright(c) 2001-2005 ArtLebedev Group (http://www.artlebedev.com)
1.101 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.16 paf 6:
1.210 paf 7: * BASE64 part
8: * Authors: Michael Zucchi <notzed@ximian.com>
9: * Jeffrey Stedfast <fejj@ximian.com>
10: *
11: * Copyright 2000-2004 Ximian, Inc. (www.ximian.com)
12: *
13: * This program is free software; you can redistribute it and/or modify
14: * it under the terms of the GNU General Public License as published by
15: * the Free Software Foundation; either version 2 of the License, or
16: * (at your option) any later version.
17: *
18: * This program is distributed in the hope that it will be useful,
19: * but WITHOUT ANY WARRANTY; without even the implied warranty of
20: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21: * GNU General Public License for more details.
22: *
23: * You should have received a copy of the GNU General Public License
24: * along with this program; if not, write to the Free Software
25: * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
26: *
27: */
28:
1.213 ! paf 29: static const char * const IDENT_COMMON_C="$Date: 2005/11/22 11:51:00 $";
1.1 paf 30:
31: #include "pa_common.h"
1.4 paf 32: #include "pa_exception.h"
1.154 paf 33: #include "pa_hash.h"
1.14 paf 34: #include "pa_globals.h"
1.154 paf 35: #include "pa_request_charsets.h"
36: #include "pa_charsets.h"
37:
38: #define PA_HTTP
39:
40: #ifdef PA_HTTP
1.126 paf 41: #include "pa_vstring.h"
1.154 paf 42: #include "pa_vint.h"
1.155 paf 43: #include "pa_vhash.h"
44: #include "pa_vtable.h"
1.195 paf 45: #include "pa_socks.h"
1.154 paf 46:
47: #ifdef CYGWIN
48: #define _GNU_H_WINDOWS32_SOCKETS
49: // for PASCAL
50: #include <windows.h>
51: // SOCKET
52: typedef u_int SOCKET;
53: int PASCAL closesocket(SOCKET);
54: #else
55: # if defined(WIN32)
56: # include <windows.h>
57: # else
58: # define closesocket close
59: # endif
60: #endif
1.1 paf 61:
1.126 paf 62: #else
1.154 paf 63:
64: # if defined(WIN32)
65: # include <windows.h>
66: # endif
67:
1.98 paf 68: #endif
69:
1.93 paf 70: // some maybe-undefined constants
71:
1.82 paf 72: #ifndef _O_TEXT
73: # define _O_TEXT 0
74: #endif
75: #ifndef _O_BINARY
76: # define _O_BINARY 0
1.47 paf 77: #endif
1.80 paf 78:
1.138 paf 79: #ifdef HAVE_FTRUNCATE
80: # define PA_O_TRUNC 0
81: #else
82: # ifdef _O_TRUNC
83: # define PA_O_TRUNC _O_TRUNC
84: # else
85: # error you must have either ftruncate function or _O_TRUNC bit declared
86: # endif
1.154 paf 87: #endif
1.176 paf 88:
89: # ifndef INADDR_NONE
90: # define INADDR_NONE ((ulong) -1)
91: # endif
1.154 paf 92:
93: // defines for globals
94:
95: #define FILE_STATUS_NAME "status"
96:
97: // globals
98:
99: const String file_status_name(FILE_STATUS_NAME);
100:
1.178 paf 101: // defines
1.154 paf 102:
103: #define HTTP_METHOD_NAME "method"
1.180 paf 104: #define HTTP_FORM_NAME "form"
105: #define HTTP_BODY_NAME "body"
1.154 paf 106: #define HTTP_TIMEOUT_NAME "timeout"
107: #define HTTP_HEADERS_NAME "headers"
108: #define HTTP_ANY_STATUS_NAME "any-status"
109: #define HTTP_CHARSET_NAME "charset"
1.155 paf 110: #define HTTP_TABLES_NAME "tables"
1.178 paf 111: #define HTTP_USER "user"
112: #define HTTP_PASSWORD "password"
1.154 paf 113:
114: // defines
115:
1.127 paf 116: #define DEFAULT_USER_AGENT "parser3"
117:
1.154 paf 118: // functions
1.127 paf 119:
1.154 paf 120: void fix_line_breaks(char *str, size_t& length) {
1.87 paf 121: //_asm int 3;
1.154 paf 122: const char* const eob=str+length;
123: char* dest=str;
1.72 parser 124: // fix DOS: \r\n -> \n
125: // fix Macintosh: \r -> \n
1.154 paf 126: char* bol=str;
1.137 paf 127: while(char* eol=(char*)memchr(bol, '\r', eob -bol)) {
1.72 parser 128: size_t len=eol-bol;
129: if(dest!=bol)
1.126 paf 130: memcpy(dest, bol, len);
1.72 parser 131: dest+=len;
1.126 paf 132: *dest++='\n';
1.72 parser 133:
1.126 paf 134: if(&eol[1]<eob && eol[1]=='\n') { // \r, \n = DOS
1.72 parser 135: bol=eol+2;
1.154 paf 136: length--;
1.126 paf 137: } else // \r, not \n = Macintosh
1.72 parser 138: bol=eol+1;
139: }
1.154 paf 140: // last piece without \r
1.72 parser 141: if(dest!=bol)
1.126 paf 142: memcpy(dest, bol, eob-bol);
1.154 paf 143: str[length]=0; // terminating
1.72 parser 144: }
1.18 paf 145:
1.154 paf 146: char* file_read_text(Request_charsets& charsets,
147: const String& file_spec,
148: bool fail_on_read_problem,
149: HashStringValue* params/*, HashStringValue* * out_fields*/) {
150: File_read_result file=
151: file_read(charsets, file_spec, true, params, fail_on_read_problem);
152: return file.success?file.str:0;
1.126 paf 153: }
154:
1.206 paf 155: /// these options were handled but not checked elsewhere, now check them
156: static int get_valid_file_options_count(HashStringValue& options)
157: {
158: int result=0;
159: if(options.get(PA_SQL_LIMIT_NAME))
160: result++;
161: if(options.get(PA_SQL_OFFSET_NAME))
162: result++;
163: if(options.get(PA_COLUMN_SEPARATOR_NAME))
164: result++;
165: if(options.get(PA_COLUMN_ENCLOSER_NAME))
166: result++;
167: return result;
168: }
169:
1.178 paf 170: //http request stuff
1.154 paf 171: #ifdef PA_HTTP
172:
173: #undef CRLF
174: #define CRLF "\r\n"
1.126 paf 175:
176: static bool set_addr(struct sockaddr_in *addr, const char* host, const short port){
177: memset(addr, 0, sizeof(*addr));
178: addr->sin_family=AF_INET;
179: addr->sin_port=htons(port);
180: if(host) {
1.175 paf 181: ulong packed_ip=inet_addr(host);
1.184 paf 182: if(packed_ip!=INADDR_NONE)
1.185 paf 183: memcpy(&addr->sin_addr, &packed_ip, sizeof(packed_ip));
1.184 paf 184: else {
185: struct hostent *hostIP=gethostbyname(host);
186: if(hostIP)
187: memcpy(&addr->sin_addr, hostIP->h_addr, hostIP->h_length);
188: else
189: return false;
190: }
1.126 paf 191: } else
192: addr->sin_addr.s_addr=INADDR_ANY;
193: return true;
194: }
195:
1.207 paf 196: size_t guess_content_length(char* buf) {
197: char* ptr;
198: if((ptr=strstr(buf, "Content-Length:"))) // Apache
199: goto found;
200: if((ptr=strstr(buf, "content-length:"))) // Parser 3
201: goto found;
202: if((ptr=strstr(buf, "Content-length:"))) // maybe 1
203: goto found;
204: if((ptr=strstr(buf, "CONTENT-LENGTH:"))) // maybe 2
205: goto found;
206: return 0;
207: found:
208: char *error_pos;
209: size_t result=(size_t)strtol(ptr+15/*strlen("CONTENT-LENGTH:")*/, &error_pos, 0);
210:
211: const size_t reasonable_initial_max=0x400*0x400*10 /*10M*/;
212: if(result>reasonable_initial_max) // sanity check
213: return reasonable_initial_max;
214: return 0;//result;
215: }
216:
217: static int http_read_response(char*& response, size_t& response_size, int sock, bool fail_on_status_ne_200) {
1.154 paf 218: int result=0;
1.207 paf 219: // fetching some to local buffer, guessing on possible content-length
220: response_size=0x400*20; // initial size if content-length could not be determined
221: const size_t preview_size=0x400*20;
222: char preview_buf[preview_size+1/*terminator*/]; // 20K buffer to preview headers
223: ssize_t received_size=recv(sock, preview_buf, preview_size, 0);
224: if(received_size==0)
225: goto done;
226: if(received_size<0) {
227: if(int no=pa_socks_errno())
228: throw Exception("http.timeout",
229: 0,
230: "error receiving response header: %s (%d)", pa_socks_strerr(no), no);
231: goto done;
232: }
233: // detecting response_size
234: {
235: preview_buf[received_size]=0; // terminator
236: if(size_t content_length=guess_content_length(preview_buf))
237: response_size=preview_size+content_length; // a little more than needed, will adjust response_size by actual received size later
238: }
239:
1.208 paf 240: // [gcc is happier this way, see goto above]
241: {
242: // allocating initial buf
243: response=(char*)pa_malloc_atomic(response_size+1/*terminator*/); // just setting memory block type
244: char* ptr=response;
245: size_t todo_size=response_size;
246: // coping part of already received body
247: memcpy(ptr, preview_buf, received_size);
1.207 paf 248: ptr+=received_size;
1.208 paf 249: todo_size-=received_size;
1.171 paf 250:
1.208 paf 251: // we use terminator byte for two purposes here:
252: // 1. we return there zero always, not knowing: maybe they would want to create String form $file.body?
253: // invariant: all Strings should have zero-terminated buffers
254: // 2. we use that out-of-size byte to detect if our content-length guess was wrong
255: // when recv gets more than we expected
256: // a) we know that the content-length guess was wrong
257: // b) we have space to put the first byte of extra data
258: // c) we use less code to detect normal situation: on last while-cycle recv expected to just return 0
259: while(true) {
260: received_size=recv(sock, ptr, todo_size+1/*there is always a place for terminator*/, 0);
261: if(received_size==0) {
262: response_size-=todo_size; // in case we received less than expected, cut down the reported size
263: break;
264: }
265: if(received_size<0) {
266: if(int no=pa_socks_errno())
267: throw Exception("http.timeout",
268: 0,
269: "error receiving response body: %s (%d)", pa_socks_strerr(no), no);
270: break;
271: }
272: // they've touched the terminator?
273: if((size_t)received_size>todo_size)
274: {
275: // that means that our guessed response_size was not big enough
276: const size_t grow_chunk_size=0x400*0x400; // 1M
277: response_size+=grow_chunk_size;
278: size_t ptr_offset=ptr-response;
279: response=(char*)pa_realloc(response, response_size+1/*terminator*/);
280: ptr=response+ptr_offset;
281: todo_size+=grow_chunk_size;
282: }
283: // can't do this before realloc: we need <todo_size check
284: ptr+=received_size;
285: todo_size-=received_size;
286:
287: char* EOLat=0;
288: if(!result && (EOLat=strstr(response, "\n"))) { // checking status in first response
289: const String status_line(pa_strdup(response, EOLat-response));
290: ArrayString astatus;
291: size_t pos_after=0;
292: status_line.split(astatus, pos_after, " ");
293: const String& status_code=*astatus.get(astatus.count()>1?1:0);
294: result=status_code.as_int();
295:
296: if(fail_on_status_ne_200 && result!=200)
297: throw Exception("http.status",
298: &status_code,
299: "invalid HTTP response status");
300: }
1.142 paf 301: }
302: }
1.207 paf 303: done:
1.154 paf 304: if(result)
1.207 paf 305: {
306: response[response_size]=0;
1.154 paf 307: return result;
1.207 paf 308: }
1.142 paf 309: else
310: throw Exception("http.response",
311: 0,
1.173 paf 312: "bad response from host - no status found (size=%u)", response_size);
1.126 paf 313: }
314:
315: /* ********************** request *************************** */
316:
317: #if defined(SIGALRM) && defined(HAVE_SIGSETJMP) && defined(HAVE_SIGLONGJMP)
1.145 paf 318: # define PA_USE_ALARM
1.126 paf 319: #endif
320:
1.145 paf 321: #ifdef PA_USE_ALARM
1.126 paf 322: static sigjmp_buf timeout_env;
1.199 paf 323: static void timeout_handler(int /*sig*/){
1.126 paf 324: siglongjmp(timeout_env, 1);
325: }
326: #endif
327:
1.171 paf 328: static int http_request(char*& response, size_t& response_size,
1.193 paf 329: const char* host, short port,
1.152 paf 330: const char* request,
1.196 paf 331: int timeout_secs,
1.152 paf 332: bool fail_on_status_ne_200) {
1.126 paf 333: if(!host)
334: throw Exception("http.host",
1.154 paf 335: 0,
1.126 paf 336: "zero hostname"); //never
337:
1.200 paf 338: volatile // to prevent makeing it register variable, because it will be clobbered by longjmp [thanks gcc warning]
339: int sock=-1;
1.145 paf 340: #ifdef PA_USE_ALARM
1.146 paf 341: signal(SIGALRM, timeout_handler);
1.126 paf 342: #endif
1.145 paf 343: #ifdef PA_USE_ALARM
344: if(sigsetjmp(timeout_env, 1)) {
345: // stupid gcc [2.95.4] generated bad code
346: // which failed to handle sigsetjmp+throw: crashed inside of pre-throw code.
1.199 paf 347: // rewritten simplier [athough duplicating closesocket code]
1.145 paf 348: if(sock>=0)
349: closesocket(sock);
350: throw Exception("http.timeout",
1.199 paf 351: 0,
1.145 paf 352: "timeout occured while retrieving document");
1.146 paf 353: return 0; // never
1.145 paf 354: } else {
1.196 paf 355: alarm(timeout_secs);
1.145 paf 356: #endif
1.146 paf 357: try {
358: int result;
1.126 paf 359: struct sockaddr_in dest;
1.154 paf 360:
361: if(!set_addr(&dest, host, port))
1.126 paf 362: throw Exception("http.host",
1.154 paf 363: 0,
1.127 paf 364: "can not resolve hostname \"%s\"", host);
1.126 paf 365:
1.195 paf 366: if((sock=socket(AF_INET, SOCK_STREAM, IPPROTO_TCP/*0*/))<0) {
367: int no=pa_socks_errno();
1.126 paf 368: throw Exception("http.connect",
1.154 paf 369: 0,
1.195 paf 370: "can not make socket: %s (%d)", pa_socks_strerr(no), no);
371: }
1.196 paf 372:
373: // To enable SO_DONTLINGER (that is, disable SO_LINGER)
374: // l_onoff should be set to zero and setsockopt should be called
375: linger dont_linger={0,0};
376: setsockopt(sock, SOL_SOCKET, SO_LINGER, (const char *)&dont_linger, sizeof(dont_linger));
377:
1.201 paf 378: #ifdef WIN32
379: // SO_*TIMEO can be defined in .h but not implemlemented in protocol,
380: // failing subsequently with Option not supported by protocol (99) message
381: // could not suppress that, so leaving this only for win32
1.196 paf 382: int timeout_ms=timeout_secs*1000;
383: setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (const char*)&timeout_ms, sizeof(timeout_ms));
384: setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (const char*)&timeout_ms, sizeof(timeout_ms));
385: #endif
386:
1.195 paf 387: if(connect(sock, (struct sockaddr *)&dest, sizeof(dest))) {
388: int no=pa_socks_errno();
1.126 paf 389: throw Exception("http.connect",
1.154 paf 390: 0,
1.195 paf 391: "can not connect to host \"%s\": %s (%d)", host, pa_socks_strerr(no), no);
392: }
1.126 paf 393: size_t request_size=strlen(request);
1.195 paf 394: if(send(sock, request, request_size, 0)!=(ssize_t)request_size) {
395: int no=pa_socks_errno();
1.197 paf 396: throw Exception("http.timeout",
1.154 paf 397: 0,
1.195 paf 398: "error sending request: %s (%d)", pa_socks_strerr(no), no);
399: }
1.126 paf 400:
1.171 paf 401: result=http_read_response(response, response_size, sock, fail_on_status_ne_200);
1.142 paf 402: closesocket(sock);
1.145 paf 403: #ifdef PA_USE_ALARM
1.142 paf 404: alarm(0);
1.126 paf 405: #endif
1.147 paf 406: return result;
1.146 paf 407: } catch(...) {
1.145 paf 408: #ifdef PA_USE_ALARM
1.146 paf 409: alarm(0);
1.126 paf 410: #endif
1.146 paf 411: if(sock>=0)
412: closesocket(sock);
1.154 paf 413: rethrow;
1.146 paf 414: }
1.148 paf 415: #ifdef PA_USE_ALARM
1.126 paf 416: }
1.148 paf 417: #endif
1.126 paf 418: }
419:
1.127 paf 420: #ifndef DOXYGEN
421: struct Http_pass_header_info {
1.154 paf 422: Request_charsets* charsets;
1.127 paf 423: String* request;
424: bool user_agent_specified;
425: };
426: #endif
1.154 paf 427: static void http_pass_header(HashStringValue::key_type key,
428: HashStringValue::value_type value,
429: Http_pass_header_info *info) {
430: *info->request <<key<<": "
431: << attributed_meaning_to_string(*value, String::L_HTTP_HEADER, false)
432: << CRLF;
1.135 paf 433:
1.154 paf 434: if(String(key, String::L_TAINTED).change_case(info->charsets->source(), String::CC_UPPER)=="USER-AGENT")
435: info->user_agent_specified=true;
1.126 paf 436: }
1.154 paf 437:
438:
439: static Charset* detect_charset(Charset& source_charset, const String& content_type_value) {
1.156 paf 440: const String::Body CONTENT_TYPE_VALUE=
1.154 paf 441: content_type_value.change_case(source_charset, String::CC_UPPER);
442: // content-type: xxx/xxx; source_charset=WE-NEED-THIS
443: // content-type: xxx/xxx; source_charset="WE-NEED-THIS"
444: // content-type: xxx/xxx; source_charset="WE-NEED-THIS";
445: size_t before_charseteq_pos=CONTENT_TYPE_VALUE.pos("CHARSET=");
446: if(before_charseteq_pos!=STRING_NOT_FOUND) {
447: size_t charset_begin=before_charseteq_pos+8/*CHARSET="*/;
448: size_t open_quote_pos=CONTENT_TYPE_VALUE.pos('"', charset_begin);
449: bool quoted=open_quote_pos==charset_begin;
450: if(quoted)
451: charset_begin++; // skip opening '"'
452: size_t charset_end=CONTENT_TYPE_VALUE.length();
453: if(quoted) {
454: size_t close_quote_pos=CONTENT_TYPE_VALUE.pos('"', charset_begin);
455: if(close_quote_pos!=STRING_NOT_FOUND)
456: charset_end=close_quote_pos;
457: } else {
458: size_t delim_pos=CONTENT_TYPE_VALUE.pos(';', charset_begin);
459: if(delim_pos!=STRING_NOT_FOUND)
460: charset_end=delim_pos;
461: }
1.156 paf 462: const String::Body CHARSET_NAME_BODY=
1.154 paf 463: CONTENT_TYPE_VALUE.mid(charset_begin, charset_end);
464:
465: return &charsets.get(CHARSET_NAME_BODY);
466: }
467:
468: return 0;
469: }
470:
1.178 paf 471: static const String* basic_authorization_field(const char* user, const char* pass) {
472: if(!user&& !pass)
473: return 0;
474:
475: String combined;
476: if(user)
477: combined<<user;
478: combined<<":";
479: if(pass)
480: combined<<pass;
481:
1.210 paf 482: String* result=new String("Basic "); *result<<pa_base64_encode(combined.cstr(), combined.length());
1.178 paf 483: return result;
484: }
1.154 paf 485:
1.181 paf 486: static void form_string_value2string(
487: HashStringValue::key_type key,
488: const String& value,
489: String& result)
490: {
491: result << String(key, String::L_TAINTED) << "=";
492: result.append(value, String::L_URI, true);
493: result<< "&";
494: }
495: #ifndef DOXYGEN
496: struct Form_table_value2string_info {
497: HashStringValue::key_type key;
498: String& result;
499:
500: Form_table_value2string_info(HashStringValue::key_type akey, String& aresult):
501: key(akey), result(aresult) {}
502: };
503: #endif
504: static void form_table_value2string(Table::element_type row, Form_table_value2string_info* info) {
505: form_string_value2string(info->key, *row->get(0), info->result);
506: }
1.180 paf 507: static void form_value2string(
508: HashStringValue::key_type key,
509: HashStringValue::value_type value,
510: String* result)
511: {
1.181 paf 512: if(const String* svalue=value->get_string())
513: form_string_value2string(key, *svalue, *result);
514: else if(Table* tvalue=value->get_table()) {
515: Form_table_value2string_info info(key, *result);
516: tvalue->for_each(form_table_value2string, &info);
517: } else
518: throw Exception(0,
519: new String(key, String::L_TAINTED),
520: "is %s, "HTTP_FORM_NAME" option value must either string or table", value->type());
1.180 paf 521: }
522: static const char* form2string(HashStringValue& form) {
523: String string;
524: form.for_each(form_value2string, &string);
1.181 paf 525: return string.cstr(String::L_UNSPECIFIED);
1.180 paf 526: }
1.154 paf 527: #ifndef DOXYGEN
528: struct File_read_http_result {
529: char *str; size_t length;
530: HashStringValue* headers;
531: };
532: #endif
1.194 paf 533: static void find_headers_end(char* p,
534: char*& headers_end_at,
535: char*& raw_body)
536: {
537: raw_body=p;
538: // \n\n
539: // \r\n\r\n
540: while((p=strchr(p, '\n'))) {
541: headers_end_at=++p; // \n>.<
542: if(*p=='\r') // \r\n>\r?<\n
543: p++;
544: if(*p=='\n') { // \r\n\r>\n?<
545: raw_body=p+1;
546: return;
547: }
548: }
549: headers_end_at=0;
550: }
551:
1.155 paf 552: /// @todo build .cookies field. use ^file.tables.SET-COOKIES.menu{ for now
1.154 paf 553: static File_read_http_result file_read_http(Request_charsets& charsets,
554: const String& file_spec,
1.169 paf 555: bool as_text,
556: HashStringValue *options=0) {
1.154 paf 557: File_read_http_result result;
1.126 paf 558: char host[MAX_STRING];
1.129 paf 559: const char* uri;
1.193 paf 560: short port;
1.180 paf 561: const char* method="GET"; bool method_is_get;
562: HashStringValue* form=0;
563: const char* body_cstr=0;
1.196 paf 564: int timeout_secs=2;
1.142 paf 565: bool fail_on_status_ne_200=true;
1.154 paf 566: Value* vheaders=0;
567: Charset *asked_remote_charset=0;
1.178 paf 568: const char* user_cstr=0;
569: const char* password_cstr=0;
1.126 paf 570:
1.127 paf 571: if(options) {
1.206 paf 572: int valid_options=get_valid_file_options_count(*options);
573:
1.177 paf 574: if(Value* vmethod=options->get(HTTP_METHOD_NAME)) {
1.127 paf 575: valid_options++;
1.154 paf 576: method=vmethod->as_string().cstr();
1.181 paf 577: }
578: if(Value* vform=options->get(HTTP_FORM_NAME)) {
1.180 paf 579: valid_options++;
580: form=vform->get_hash();
1.181 paf 581: }
582: if(Value* vbody=options->get(HTTP_BODY_NAME)) {
1.180 paf 583: valid_options++;
584: body_cstr=vbody->as_string().cstr(String::L_UNSPECIFIED);
1.181 paf 585: }
586: if(Value* vtimeout=options->get(HTTP_TIMEOUT_NAME)) {
1.127 paf 587: valid_options++;
1.196 paf 588: timeout_secs=vtimeout->as_int();
1.181 paf 589: }
590: if((vheaders=options->get(HTTP_HEADERS_NAME))) {
1.127 paf 591: valid_options++;
1.181 paf 592: }
593: if(Value* vany_status=options->get(HTTP_ANY_STATUS_NAME)) {
1.142 paf 594: valid_options++;
595: fail_on_status_ne_200=!vany_status->as_bool();
1.181 paf 596: }
597: if(Value* vcharset_name=options->get(HTTP_CHARSET_NAME)) {
1.154 paf 598: valid_options++;
599: asked_remote_charset=&::charsets.get(vcharset_name->as_string().
600: change_case(charsets.source(), String::CC_UPPER));
1.181 paf 601: }
602: if(Value* vuser=options->get(HTTP_USER)) {
1.178 paf 603: valid_options++;
604: user_cstr=vuser->as_string().cstr();
1.181 paf 605: }
606: if(Value* vpassword=options->get(HTTP_PASSWORD)) {
1.178 paf 607: valid_options++;
608: password_cstr=vpassword->as_string().cstr();
609: }
1.142 paf 610:
1.154 paf 611: if(valid_options!=options->count())
1.127 paf 612: throw Exception("parser.runtime",
613: 0,
614: "invalid option passed");
1.154 paf 615: }
616: if(!asked_remote_charset) // defaulting to $request:charset
617: asked_remote_charset=&charsets.source();
618:
1.180 paf 619: method_is_get=strcmp(method, "GET")==0;
620: if(method_is_get && body_cstr)
621: throw Exception("parser.runtime",
622: 0,
623: "you can not use $."HTTP_BODY_NAME" option with method GET");
624:
1.154 paf 625: //preparing request
626: String& connect_string=*new String;
627: // not in ^sql{... L_SQL ...} spirit, but closer to ^file::load one
628: connect_string.append(file_spec, String::L_URI); // tainted pieces -> URI pieces
629:
1.180 paf 630: String request_head_and_body;
1.154 paf 631: {
632: // influence URLencoding of tainted pieces to String::L_URI lang
633: Temp_client_charset temp(charsets, *asked_remote_charset);
634:
635: const char* connect_string_cstr=connect_string.cstr(String::L_UNSPECIFIED);
1.126 paf 636:
1.154 paf 637: const char* current=connect_string_cstr;
638: if(strncmp(current, "http://", 7)!=0)
639: throw Exception(0,
640: &connect_string,
641: "does not start with http://"); //never
642: current+=7;
643:
644: strncpy(host, current, sizeof(host)-1); host[sizeof(host)-1]=0;
645: char* host_uri=lsplit(host, '/');
646: uri=host_uri?current+(host_uri-1-host):"/";
647: char* port_cstr=lsplit(host, ':');
648: char* error_pos=0;
1.193 paf 649: port=port_cstr?(short)strtol(port_cstr, &error_pos, 0):80;
1.154 paf 650:
1.212 paf 651: bool uri_has_query_string=strchr(uri, '?')!=0;
1.180 paf 652:
653: //making request head
654: String head;
655: head << method;
656: head << " " << uri;
657: if(form)
658: if(method_is_get)
1.212 paf 659: head << (uri_has_query_string?"&":"?") << form2string(*form);
1.180 paf 660: head <<" HTTP/1.0" CRLF
1.154 paf 661: "host: "<< host << CRLF;
1.181 paf 662: if(form && !method_is_get) {
663: head << "content-type: application/x-www-form-urlencoded" CRLF;
664: body_cstr = form2string(*form);
665: }
1.178 paf 666:
1.179 paf 667: // http://www.ietf.org/rfc/rfc2617.txt
1.178 paf 668: if(const String* authorization_field_value=basic_authorization_field(user_cstr, password_cstr))
1.180 paf 669: head<<"authorization: "<<*authorization_field_value<<CRLF;
1.178 paf 670:
1.154 paf 671: bool user_agent_specified=false;
672: if(vheaders && !vheaders->is_string()) { // allow empty
673: if(HashStringValue *headers=vheaders->get_hash()) {
1.180 paf 674: Http_pass_header_info info={&charsets, &head, false};
1.154 paf 675: headers->for_each(http_pass_header, &info);
676: user_agent_specified=info.user_agent_specified;
677: } else
678: throw Exception("parser.runtime",
679: &connect_string,
680: "headers param must be hash");
681: };
682: if(!user_agent_specified) // defaulting
1.180 paf 683: head << "user-agent: " DEFAULT_USER_AGENT CRLF;
684:
685: if(body_cstr) {
686: // recode those pieces which are not in String::L_URI lang
687: // [those violating HTTP standard, but widly used]
688: body_cstr=Charset::transcode(
689: String::C(body_cstr, strlen(body_cstr)),
690: charsets.source(),
691: *asked_remote_charset);
1.181 paf 692:
693: head << "content-length: " << format(strlen(body_cstr), "%u") << CRLF;
1.180 paf 694: }
1.154 paf 695:
1.181 paf 696: const char* head_cstr=head.cstr(String::L_UNSPECIFIED);
697:
698: // recode those pieces which are not in String::L_URI lang
699: // [those violating HTTP standard, but widly used]
700: head_cstr=Charset::transcode(
701: String::C(head_cstr, strlen(head_cstr)),
702: charsets.source(),
703: *asked_remote_charset);
1.180 paf 704:
1.181 paf 705: // head + end of header
706: request_head_and_body << head_cstr << CRLF;
1.180 paf 707: // body
708: if(body_cstr)
709: request_head_and_body << body_cstr;
1.154 paf 710: }
1.126 paf 711:
712: //sending request
1.171 paf 713: char* response;
714: size_t response_size;
715: int status_code=http_request(response, response_size,
1.180 paf 716: host, port, request_head_and_body.cstr(),
1.196 paf 717: timeout_secs, fail_on_status_ne_200);
1.126 paf 718:
719: //processing results
1.171 paf 720: char* raw_body; size_t raw_body_size;
1.194 paf 721: char* headers_end_at;
722: find_headers_end(response,
723: headers_end_at,
724: raw_body);
1.191 paf 725: raw_body_size=response_size-(raw_body-response);
1.171 paf 726:
1.154 paf 727: result.headers=new HashStringValue;
1.155 paf 728: VHash* vtables=new VHash;
1.177 paf 729: result.headers->put(HTTP_TABLES_NAME, vtables);
1.194 paf 730: Charset* real_remote_charset=0; // undetected, yet
731:
732: if(headers_end_at) {
733: *headers_end_at=0;
734: const String header_block(String::C(response, headers_end_at-response), true);
735:
736: ArrayString aheaders;
737: HashStringValue& tables=vtables->hash();
1.155 paf 738:
1.194 paf 739: size_t pos_after=0;
740: header_block.split(aheaders, pos_after, "\n");
741:
742: //processing headers
743: size_t aheaders_count=aheaders.count();
744: for(size_t i=1; i<aheaders_count; i++) {
745: const String& line=*aheaders.get(i);
746: size_t pos=line.pos(':');
747: if(pos==STRING_NOT_FOUND || pos<1)
748: throw Exception("http.response",
749: &connect_string,
750: "bad response from host - bad header \"%s\"", line.cstr());
751: const String::Body HEADER_NAME=
752: line.mid(0, pos).change_case(charsets.source(), String::CC_UPPER);
753: const String& header_value=line.mid(pos+1, line.length()).trim(String::TRIM_BOTH, " \t\r");
754: if(as_text && HEADER_NAME=="CONTENT-TYPE")
755: real_remote_charset=detect_charset(charsets.source(), header_value);
756:
757: // tables
758: {
759: Value *valready=(Value *)tables.get(HEADER_NAME);
760: bool existed=valready!=0;
761: Table *table;
762: if(existed) {
763: // second+ appearence
764: table=valready->get_table();
765: } else {
766: // first appearence
767: Table::columns_type columns =new ArrayString(1);
768: *columns+=new String("value");
769: table=new Table(columns);
770: }
771: // this string becomes next row
772: ArrayString& row=*new ArrayString(1);
773: row+=&header_value;
774: *table+=&row;
775: // not existed before? add it
776: if(!existed)
777: tables.put(HEADER_NAME, new VTable(table));
1.155 paf 778: }
1.194 paf 779:
780: result.headers->put(HEADER_NAME, new VString(header_value));
1.155 paf 781: }
1.126 paf 782: }
783:
784: // output response
1.171 paf 785: String::C real_body=String::C(raw_body, raw_body_size);
1.192 paf 786: if(as_text && raw_body_size) { // must be checked because transcode returns CONST string in case length==0, which contradicts hacking few lines below
787: // defaulting to used-asked charset [it's never empty!]
788: if(!real_remote_charset)
789: real_remote_charset=asked_remote_charset;
1.169 paf 790: real_body=Charset::transcode(real_body, *real_remote_charset, charsets.source());
1.192 paf 791: }
1.154 paf 792:
793: result.str=const_cast<char *>(real_body.str); // hacking a little
794: result.length=real_body.length;
795: result.headers->put(file_status_name, new VInt(status_code));
796: return result;
1.34 paf 797: }
1.123 paf 798:
1.154 paf 799: #endif
800:
1.123 paf 801: #ifndef DOXYGEN
802: struct File_read_action_info {
1.154 paf 803: char **data; size_t *data_size;
1.188 paf 804: char* buf; size_t offset; size_t count;
1.126 paf 805: };
1.123 paf 806: #endif
1.154 paf 807: static void file_read_action(
808: struct stat& finfo,
809: int f,
1.166 paf 810: const String& file_spec, const char* /*fname*/, bool as_text,
1.154 paf 811: void *context) {
1.126 paf 812: File_read_action_info& info=*static_cast<File_read_action_info *>(context);
1.188 paf 813: size_t to_read_size=info.count;
814: if(!to_read_size)
815: to_read_size=(size_t)finfo.st_size;
816: assert( !(info.buf && as_text) );
817: if(to_read_size) {
818: if(info.offset)
819: lseek(f, info.offset, SEEK_SET);
820: *info.data=info.buf
821: ? info.buf
822: : new(PointerFreeGC) char[to_read_size+(as_text?1:0)];
1.126 paf 823: *info.data_size=(size_t)read(f, *info.data, to_read_size);
1.123 paf 824:
825: if(ssize_t(*info.data_size)<0 || *info.data_size>to_read_size)
1.126 paf 826: throw Exception(0,
1.123 paf 827: &file_spec,
1.173 paf 828: "read failed: actually read %u bytes count not in [0..%u] valid range",
1.126 paf 829: *info.data_size, to_read_size);
1.123 paf 830: } else { // empty file
1.209 paf 831: // for both, text and binary: for text we need that terminator, for binary we need nonzero pointer to be able to save such files
832: *info.data=new(PointerFreeGC) char[1];
833: *(char*)(*info.data)=0;
1.123 paf 834: *info.data_size=0;
835: return;
836: }
1.126 paf 837: }
1.154 paf 838: File_read_result file_read(Request_charsets& charsets, const String& file_spec,
839: bool as_text, HashStringValue *params,
1.188 paf 840: bool fail_on_read_problem,
841: char* buf, size_t offset, size_t count) {
1.167 paf 842: File_read_result result={false, 0, 0, 0};
1.154 paf 843: #ifdef PA_HTTP
844: if(file_spec.starts_with("http://")) {
1.203 paf 845: if(offset || count)
846: throw Exception("parser.runtime",
847: 0,
848: "offset and load options are not supported for HTTP:// file load");
849:
1.126 paf 850: // fail on read problem
1.169 paf 851: File_read_http_result http=file_read_http(charsets, file_spec, as_text, params);
1.154 paf 852: result.success=true;
853: result.str=http.str;
854: result.length=http.length;
855: result.headers=http.headers;
1.126 paf 856: } else {
1.154 paf 857: #endif
1.206 paf 858: if(params) {
859: int valid_options=get_valid_file_options_count(*params);
860: if(valid_options!=params->count())
861: throw Exception("parser.runtime",
862: 0,
863: "invalid option passed");
864: }
1.161 paf 865:
1.188 paf 866: File_read_action_info info={&result.str, &result.length,
867: buf, offset, count};
1.154 paf 868: result.success=file_read_action_under_lock(file_spec,
1.126 paf 869: "read", file_read_action, &info,
870: as_text, fail_on_read_problem);
1.154 paf 871: #ifdef PA_HTTP
1.126 paf 872: }
1.154 paf 873: #endif
1.123 paf 874:
1.154 paf 875: if(result.success && as_text) {
1.131 paf 876: // UTF-8 signature: EF BB BF
1.154 paf 877: if(result.length>=3) {
878: char *in=(char *)result.str;
1.159 paf 879: if(strncmp(in, "\xEF\xBB\xBF", 3)==0) {
1.154 paf 880: result.str=in+3; result.length-=3;// skip prefix
1.131 paf 881: }
882: }
883:
1.154 paf 884: fix_line_breaks((char *)(result.str), result.length);
1.123 paf 885: }
1.126 paf 886:
887: return result;
1.123 paf 888: }
889:
1.154 paf 890: #ifdef PA_SAFE_MODE
891: void check_safe_mode(struct stat finfo, const String& file_spec, const char* fname) {
892: if(finfo.st_uid/*foreign?*/!=geteuid()
893: && finfo.st_gid/*foreign?*/!=getegid())
894: throw Exception("parser.runtime",
895: &file_spec,
896: "parser is in safe mode: "
897: "reading files of foreign group and user disabled "
898: "[recompile parser with --disable-safe-mode configure option], "
899: "actual filename '%s', "
900: "fuid(%d)!=euid(%d) or fgid(%d)!=egid(%d)",
901: fname,
902: finfo.st_uid, geteuid(),
903: finfo.st_gid, getegid());
904: }
905: #endif
1.149 paf 906:
1.154 paf 907: bool file_read_action_under_lock(const String& file_spec,
1.126 paf 908: const char* action_name, File_read_action action, void *context,
909: bool as_text,
1.123 paf 910: bool fail_on_read_problem) {
1.154 paf 911: const char* fname=file_spec.cstr(String::L_FILE_SPEC);
1.33 paf 912: int f;
913:
914: // first open, next stat:
1.45 paf 915: // directory update of NTFS hard links performed on open.
1.33 paf 916: // ex:
917: // a.html:^test[] and b.html hardlink to a.html
918: // user inserts ! before ^test in a.html
1.126 paf 919: // directory entry of b.html in NTFS not updated at once,
1.35 paf 920: // they delay update till open, so we would receive "!^test[" string
921: // if would do stat, next open.
1.123 paf 922: // later: it seems, even this does not help sometimes
1.98 paf 923: if((f=open(fname, O_RDONLY|(as_text?_O_TEXT:_O_BINARY)))>=0) {
1.123 paf 924: try {
1.162 paf 925: if(pa_lock_shared_blocking(f)!=0)
1.126 paf 926: throw Exception("file.lock",
1.123 paf 927: &file_spec,
928: "shared lock failed: %s (%d), actual filename '%s'",
1.154 paf 929: strerror(errno), errno, fname);
1.123 paf 930:
1.124 paf 931: struct stat finfo;
932: if(stat(fname, &finfo)!=0)
933: throw Exception("file.missing", // hardly possible: we just opened it OK
934: &file_spec,
935: "stat failed: %s (%d), actual filename '%s'",
1.154 paf 936: strerror(errno), errno, fname);
1.124 paf 937:
1.140 paf 938: #ifdef PA_SAFE_MODE
1.149 paf 939: check_safe_mode(finfo, file_spec, fname);
1.105 paf 940: #endif
1.32 paf 941:
1.154 paf 942: action(finfo, f, file_spec, fname, as_text, context);
1.123 paf 943: } catch(...) {
1.162 paf 944: pa_unlock(f);close(f);
1.123 paf 945: if(fail_on_read_problem)
1.154 paf 946: rethrow;
1.123 paf 947: return false;
948: }
1.87 paf 949:
1.162 paf 950: pa_unlock(f);close(f);
1.72 parser 951: return true;
1.118 paf 952: } else {
953: if(fail_on_read_problem)
1.126 paf 954: throw Exception(errno==EACCES?"file.access":errno==ENOENT?"file.missing":0,
1.118 paf 955: &file_spec,
1.123 paf 956: "%s failed: %s (%d), actual filename '%s'",
1.154 paf 957: action_name, strerror(errno), errno, fname);
1.118 paf 958: return false;
959: }
1.8 paf 960: }
961:
1.202 paf 962: void create_dir_for_file(const String& file_spec) {
1.63 parser 963: size_t pos_after=1;
1.154 paf 964: size_t pos_before;
965: while((pos_before=file_spec.pos('/', pos_after))!=STRING_NOT_FOUND) {
966: mkdir(file_spec.mid(0, pos_before).cstr(String::L_FILE_SPEC), 0775);
1.63 parser 967: pos_after=pos_before+1;
968: }
969: }
970:
1.98 paf 971: bool file_write_action_under_lock(
1.28 paf 972: const String& file_spec,
1.126 paf 973: const char* action_name, File_write_action action, void *context,
974: bool as_text,
975: bool do_append,
976: bool do_block,
1.110 paf 977: bool fail_on_lock_problem) {
1.154 paf 978: const char* fname=file_spec.cstr(String::L_FILE_SPEC);
1.28 paf 979: int f;
1.80 paf 980: if(access(fname, W_OK)!=0) // no
1.126 paf 981: create_dir_for_file(file_spec);
1.50 paf 982:
1.80 paf 983: if((f=open(fname,
984: O_CREAT|O_RDWR
985: |(as_text?_O_TEXT:_O_BINARY)
1.138 paf 986: |(do_append?O_APPEND:PA_O_TRUNC), 0664))>=0) {
1.162 paf 987: if((do_block?pa_lock_exclusive_blocking(f):pa_lock_exclusive_nonblocking(f))!=0) {
1.126 paf 988: Exception e("file.lock",
1.110 paf 989: &file_spec,
990: "shared lock failed: %s (%d), actual filename '%s'",
1.154 paf 991: strerror(errno), errno, fname);
1.126 paf 992: close(f);
1.110 paf 993: if(fail_on_lock_problem)
994: throw e;
1.98 paf 995: return false;
996: }
1.96 paf 997:
1.158 paf 998: try {
1.126 paf 999: action(f, context);
1.158 paf 1000: } catch(...) {
1.138 paf 1001: #ifdef HAVE_FTRUNCATE
1.104 paf 1002: if(!do_append)
1.125 paf 1003: ftruncate(f, lseek(f, 0, SEEK_CUR)); // one can not use O_TRUNC, read lower
1.138 paf 1004: #endif
1.162 paf 1005: pa_unlock(f);close(f);
1.154 paf 1006: rethrow;
1.158 paf 1007: }
1.80 paf 1008:
1.138 paf 1009: #ifdef HAVE_FTRUNCATE
1.104 paf 1010: if(!do_append)
1.125 paf 1011: ftruncate(f, lseek(f, 0, SEEK_CUR)); // O_TRUNC truncates even exclusevely write-locked file [thanks to Igor Milyakov <virtan@rotabanner.com> for discovering]
1.138 paf 1012: #endif
1.162 paf 1013: pa_unlock(f);close(f);
1.98 paf 1014: return true;
1.80 paf 1015: } else
1.126 paf 1016: throw Exception(errno==EACCES?"file.access":0,
1.80 paf 1017: &file_spec,
1.96 paf 1018: "%s failed: %s (%d), actual filename '%s'",
1.154 paf 1019: action_name, strerror(errno), errno, fname);
1.96 paf 1020: // here should be nothing, see rethrow above
1021: }
1022:
1023: #ifndef DOXYGEN
1024: struct File_write_action_info {
1.154 paf 1025: const char* str; size_t length;
1.126 paf 1026: };
1.96 paf 1027: #endif
1028: static void file_write_action(int f, void *context) {
1.126 paf 1029: File_write_action_info& info=*static_cast<File_write_action_info *>(context);
1.154 paf 1030: if(info.length) {
1031: int written=write(f, info.str, info.length);
1.116 paf 1032: if(written<0)
1.126 paf 1033: throw Exception(0,
1034: 0,
1035: "write failed: %s (%d)", strerror(errno), errno);
1.113 paf 1036: }
1.96 paf 1037: }
1038: void file_write(
1039: const String& file_spec,
1.154 paf 1040: const char* data, size_t size,
1.126 paf 1041: bool as_text,
1.96 paf 1042: bool do_append) {
1.126 paf 1043: File_write_action_info info={data, size};
1.98 paf 1044: file_write_action_under_lock(
1.154 paf 1045: file_spec,
1046: "write", file_write_action, &info,
1047: as_text,
1048: do_append);
1.30 paf 1049: }
1050:
1.63 parser 1051: // throws nothing! [this is required in file_move & file_delete]
1.50 paf 1052: static void rmdir(const String& file_spec, size_t pos_after) {
1.154 paf 1053: size_t pos_before;
1054: if((pos_before=file_spec.pos('/', pos_after))!=STRING_NOT_FOUND)
1.126 paf 1055: rmdir(file_spec, pos_before+1);
1.50 paf 1056:
1.154 paf 1057: rmdir(file_spec.mid(0, pos_after-1/* / */).cstr(String::L_FILE_SPEC));
1.50 paf 1058: }
1.164 paf 1059: bool file_delete(const String& file_spec, bool fail_on_problem) {
1.154 paf 1060: const char* fname=file_spec.cstr(String::L_FILE_SPEC);
1.54 parser 1061: if(unlink(fname)!=0)
1.164 paf 1062: if(fail_on_problem)
1.126 paf 1063: throw Exception(errno==EACCES?"file.access":errno==ENOENT?"file.missing":0,
1.93 paf 1064: &file_spec,
1065: "unlink failed: %s (%d), actual filename '%s'",
1.154 paf 1066: strerror(errno), errno, fname);
1.93 paf 1067: else
1068: return false;
1.50 paf 1069:
1.126 paf 1070: rmdir(file_spec, 1);
1.93 paf 1071: return true;
1.60 parser 1072: }
1.95 paf 1073: void file_move(const String& old_spec, const String& new_spec) {
1.154 paf 1074: const char* old_spec_cstr=old_spec.cstr(String::L_FILE_SPEC);
1075: const char* new_spec_cstr=new_spec.cstr(String::L_FILE_SPEC);
1.63 parser 1076:
1.126 paf 1077: create_dir_for_file(new_spec);
1.63 parser 1078:
1.60 parser 1079: if(rename(old_spec_cstr, new_spec_cstr)!=0)
1.126 paf 1080: throw Exception(errno==EACCES?"file.access":errno==ENOENT?"file.missing":0,
1.60 parser 1081: &old_spec,
1082: "rename failed: %s (%d), actual filename '%s' to '%s'",
1.154 paf 1083: strerror(errno), errno, old_spec_cstr, new_spec_cstr);
1.63 parser 1084:
1.126 paf 1085: rmdir(old_spec, 1);
1.31 paf 1086: }
1087:
1.51 paf 1088:
1.126 paf 1089: bool entry_exists(const char* fname, struct stat *afinfo) {
1.118 paf 1090: struct stat lfinfo;
1091: bool result=stat(fname, &lfinfo)==0;
1092: if(afinfo)
1093: *afinfo=lfinfo;
1094: return result;
1.119 paf 1095: }
1096:
1097: bool entry_exists(const String& file_spec) {
1.154 paf 1098: const char* fname=file_spec.cstr(String::L_FILE_SPEC);
1.126 paf 1099: return entry_exists(fname, 0);
1.118 paf 1100: }
1101:
1.51 paf 1102: static bool entry_readable(const String& file_spec, bool need_dir) {
1.154 paf 1103: char* fname=file_spec.cstrm(String::L_FILE_SPEC);
1.120 paf 1104: if(need_dir) {
1.126 paf 1105: size_t size=strlen(fname);
1.120 paf 1106: while(size) {
1.126 paf 1107: char c=fname[size-1];
1.120 paf 1108: if(c=='/' || c=='\\')
1109: fname[--size]=0;
1110: else
1111: break;
1112: }
1113: }
1.51 paf 1114: struct stat finfo;
1.118 paf 1115: if(access(fname, R_OK)==0 && entry_exists(fname, &finfo)) {
1.109 paf 1116: bool is_dir=(finfo.st_mode&S_IFDIR) != 0;
1.51 paf 1117: return is_dir==need_dir;
1118: }
1119: return false;
1120: }
1.31 paf 1121: bool file_readable(const String& file_spec) {
1.126 paf 1122: return entry_readable(file_spec, false);
1.51 paf 1123: }
1124: bool dir_readable(const String& file_spec) {
1.126 paf 1125: return entry_readable(file_spec, true);
1.65 parser 1126: }
1.154 paf 1127: const String* file_readable(const String& path, const String& name) {
1128: String& result=*new String(path);
1129: result << "/";
1130: result << name;
1131: return file_readable(result)?&result:0;
1.43 paf 1132: }
1133: bool file_executable(const String& file_spec) {
1.154 paf 1134: return access(file_spec.cstr(String::L_FILE_SPEC), X_OK)==0;
1.44 paf 1135: }
1136:
1.64 parser 1137: bool file_stat(const String& file_spec,
1.58 parser 1138: size_t& rsize,
1.126 paf 1139: time_t& ratime,
1140: time_t& rmtime,
1141: time_t& rctime,
1.64 parser 1142: bool fail_on_read_problem) {
1.154 paf 1143: const char* fname=file_spec.cstr(String::L_FILE_SPEC);
1144: struct stat finfo;
1.44 paf 1145: if(stat(fname, &finfo)!=0)
1.64 parser 1146: if(fail_on_read_problem)
1.126 paf 1147: throw Exception("file.missing",
1.67 parser 1148: &file_spec,
1149: "getting file size failed: %s (%d), real filename '%s'",
1.154 paf 1150: strerror(errno), errno, fname);
1.64 parser 1151: else
1152: return false;
1.58 parser 1153: rsize=finfo.st_size;
1154: ratime=finfo.st_atime;
1155: rmtime=finfo.st_mtime;
1156: rctime=finfo.st_ctime;
1.64 parser 1157: return true;
1.18 paf 1158: }
1159:
1.126 paf 1160: char* getrow(char* *row_ref, char delim) {
1161: char* result=*row_ref;
1.8 paf 1162: if(result) {
1.126 paf 1163: *row_ref=strchr(result, delim);
1.8 paf 1164: if(*row_ref)
1165: *((*row_ref)++)=0;
1166: else if(!*result)
1167: return 0;
1168: }
1169: return result;
1170: }
1171:
1.126 paf 1172: char* lsplit(char* string, char delim) {
1.23 paf 1173: if(string) {
1.126 paf 1174: char* v=strchr(string, delim);
1.8 paf 1175: if(v) {
1176: *v=0;
1177: return v+1;
1178: }
1179: }
1180: return 0;
1181: }
1182:
1.126 paf 1183: char* lsplit(char* *string_ref, char delim) {
1184: char* result=*string_ref;
1185: char* next=lsplit(*string_ref, delim);
1.8 paf 1186: *string_ref=next;
1187: return result;
1.9 paf 1188: }
1189:
1.126 paf 1190: char* rsplit(char* string, char delim) {
1.18 paf 1191: if(string) {
1.126 paf 1192: char* v=strrchr(string, delim);
1.18 paf 1193: if(v) {
1.9 paf 1194: *v=0;
1195: return v+1;
1196: }
1197: }
1198: return NULL;
1.10 paf 1199: }
1200:
1.37 paf 1201: /// @todo less stupid type detection
1.154 paf 1202: const char* format(double value, char* fmt) {
1.126 paf 1203: char local_buf[MAX_NUMBER];
1.108 paf 1204: size_t size;
1205:
1.10 paf 1206: if(fmt)
1207: if(strpbrk(fmt, "diouxX"))
1208: if(strpbrk(fmt, "ouxX"))
1.126 paf 1209: size=snprintf(local_buf, sizeof(local_buf), fmt, (uint)value);
1.10 paf 1210: else
1.126 paf 1211: size=snprintf(local_buf, sizeof(local_buf), fmt, (int)value);
1.10 paf 1212: else
1.126 paf 1213: size=snprintf(local_buf, sizeof(local_buf), fmt, value);
1.10 paf 1214: else
1.126 paf 1215: size=snprintf(local_buf, sizeof(local_buf), "%d", (int)value);
1.10 paf 1216:
1.154 paf 1217: return pa_strdup(local_buf, size);
1.12 paf 1218: }
1219:
1.36 paf 1220: size_t stdout_write(const void *buf, size_t size) {
1.12 paf 1221: #ifdef WIN32
1.187 paf 1222: size_t to_write = size;
1.12 paf 1223: do{
1.154 paf 1224: int chunk_written=fwrite(buf, 1, min((size_t)8*0x400, size), stdout);
1.12 paf 1225: if(chunk_written<=0)
1226: break;
1227: size-=chunk_written;
1.36 paf 1228: buf=((const char*)buf)+chunk_written;
1.126 paf 1229: } while(size>0);
1.12 paf 1230:
1.187 paf 1231: return to_write-size;
1.12 paf 1232: #else
1.126 paf 1233: return fwrite(buf, 1, size, stdout);
1.12 paf 1234: #endif
1.2 paf 1235: }
1.14 paf 1236:
1.154 paf 1237: char* unescape_chars(const char* cp, int len) {
1238: char* s=new(PointerFreeGC) char[len + 1];
1.14 paf 1239: enum EscapeState {
1.33 paf 1240: EscapeRest,
1241: EscapeFirst,
1.14 paf 1242: EscapeSecond
1243: } escapeState=EscapeRest;
1.193 paf 1244: uchar escapedValue=0;
1.14 paf 1245: int srcPos=0;
1246: int dstPos=0;
1247: while(srcPos < len) {
1.193 paf 1248: uchar ch=(uchar)cp[srcPos];
1.14 paf 1249: switch(escapeState) {
1250: case EscapeRest:
1251: if(ch=='%') {
1252: escapeState=EscapeFirst;
1253: } else if(ch=='+') {
1.126 paf 1254: s[dstPos++]=' ';
1.14 paf 1255: } else {
1256: s[dstPos++]=ch;
1257: }
1258: break;
1259: case EscapeFirst:
1.193 paf 1260: escapedValue=(uchar)(hex_value[ch] << 4);
1.14 paf 1261: escapeState=EscapeSecond;
1262: break;
1263: case EscapeSecond:
1.126 paf 1264: escapedValue +=hex_value[ch];
1.14 paf 1265: s[dstPos++]=escapedValue;
1266: escapeState=EscapeRest;
1267: break;
1268: }
1.126 paf 1269: srcPos++;
1.14 paf 1270: }
1271: s[dstPos]=0;
1272: return s;
1.24 paf 1273: }
1274:
1275: #ifdef WIN32
1.126 paf 1276: void back_slashes_to_slashes(char* s) {
1.24 paf 1277: if(s)
1278: for(; *s; s++)
1279: if(*s=='\\')
1.126 paf 1280: *s='/';
1.24 paf 1281: }
1.42 paf 1282: /*
1.126 paf 1283: void slashes_to_back_slashes(char* s) {
1.42 paf 1284: if(s)
1285: for(; *s; s++)
1286: if(*s=='/')
1.126 paf 1287: *s='\\';
1.42 paf 1288: }
1289: */
1.24 paf 1290: #endif
1.41 paf 1291:
1.126 paf 1292: bool StrEqNc(const char* s1, const char* s2, bool strict) {
1.41 paf 1293: while(true) {
1294: if(!(*s1)) {
1295: if(!(*s2))
1296: return true;
1297: else
1298: return !strict;
1299: } else if(!(*s2))
1300: return !strict;
1.189 paf 1301: if(isalpha((unsigned char)*s1)) {
1.190 paf 1302: if(tolower((unsigned char)*s1) !=tolower((unsigned char)*s2))
1.41 paf 1303: return false;
1304: } else if((*s1) !=(*s2))
1305: return false;
1.126 paf 1306: s1++;
1307: s2++;
1.41 paf 1308: }
1.57 parser 1309: }
1310:
1.84 paf 1311: static bool isLeap(int year) {
1.57 parser 1312: return !(
1313: (year % 4) || ((year % 400) && !(year % 100))
1.126 paf 1314: );
1.57 parser 1315: }
1316:
1317: int getMonthDays(int year, int month) {
1318: int monthDays[]={
1.126 paf 1319: 31,
1320: isLeap(year) ? 29 : 28,
1321: 31,
1322: 30,
1323: 31,
1324: 30,
1325: 31,
1326: 31,
1327: 30,
1328: 31,
1329: 30,
1.57 parser 1330: 31
1.126 paf 1331: };
1332: return monthDays[month];
1.41 paf 1333: }
1.69 parser 1334:
1.126 paf 1335: void remove_crlf(char* start, char* end) {
1336: for(char* p=start; p<end; p++)
1.69 parser 1337: switch(*p) {
1.126 paf 1338: case '\n': *p='|'; break;
1339: case '\r': *p=' '; break;
1.69 parser 1340: }
1.91 paf 1341: }
1342:
1343:
1344: /// must be last in this file
1345: #undef vsnprintf
1.126 paf 1346: int __vsnprintf(char* b, size_t s, const char* f, va_list l) {
1.91 paf 1347: if(!s)
1348: return 0;
1349:
1350: int r;
1351: // note: on win32& maybe somewhere else
1352: // vsnprintf do not writes terminating 0 in 'buffer full' case, reducing
1353: --s;
1.172 paf 1354:
1355: // clients do not check for negative 's', feature: ignore such prints
1356: if((ssize_t)s<0)
1357: return 0;
1358:
1.91 paf 1359: #if _MSC_VER
1360: /*
1361: win32:
1362: mk:@MSITStore:C:\Program%20Files\Microsoft%20Visual%20Studio\MSDN\2001APR\1033\vccore.chm::/html/_crt__vsnprintf.2c_._vsnwprintf.htm
1363:
1.154 paf 1364: if the number of bytes to write exceeds buffer, then count bytes are written and Ö1 is returned
1.91 paf 1365: */
1.126 paf 1366: r=_vsnprintf(b, s, f, l);
1.91 paf 1367: if(r<0)
1368: r=s;
1369: #else
1.126 paf 1370: r=vsnprintf(b, s, f, l);
1.91 paf 1371: /*
1372: solaris:
1373: man vsnprintf
1374:
1375: The snprintf() function returns the number of characters
1376: formatted, that is, the number of characters that would have
1377: been written to the buffer if it were large enough. If the
1378: value of n is 0 on a call to snprintf(), an unspecified
1379: value less than 1 is returned.
1380: */
1381:
1382: if(r<0)
1383: r=0;
1.167 paf 1384: else if((size_t)r>s)
1.91 paf 1385: r=s;
1386: #endif
1387: b[r]=0;
1388: return r;
1389: }
1390:
1.126 paf 1391: int __snprintf(char* b, size_t s, const char* f, ...) {
1.91 paf 1392: va_list l;
1.126 paf 1393: va_start(l, f);
1394: int r=__vsnprintf(b, s, f, l);
1395: va_end(l);
1.91 paf 1396: return r;
1.178 paf 1397: }
1398:
1399: /* mime64 functions are from libgmime[http://spruce.sourceforge.net/gmime/] lib */
1400: /*
1401: * Authors: Michael Zucchi <notzed@helixcode.com>
1402: * Jeffrey Stedfast <fejj@helixcode.com>
1403: *
1404: * Copyright 2000 Helix Code, Inc. (www.helixcode.com)
1405: *
1406: * This program is free software; you can redistribute it and/or modify
1407: * it under the terms of the GNU General Public License as published by
1408: * the Free Software Foundation; either version 2 of the License, or
1409: * (at your option) any later version.
1410: *
1411: * This program is distributed in the hope that it will be useful,
1412: * but WITHOUT ANY WARRANTY; without even the implied warranty of
1413: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1414: * GNU General Public License for more details.
1415: *
1416: * You should have received a copy of the GNU General Public License
1417: * along with this program; if not, write to the Free Software
1418: * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
1419: *
1420: */
1421: static char *base64_alphabet =
1422: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1423:
1424: /**
1425: * g_mime_utils_base64_encode_step:
1426: * @in: input stream
1427: * @inlen: length of the input
1428: * @out: output string
1429: * @state: holds the number of bits that are stored in @save
1430: * @save: leftover bits that have not yet been encoded
1431: *
1432: * Base64 encodes a chunk of data. Performs an 'encode step', only
1433: * encodes blocks of 3 characters to the output at a time, saves
1434: * left-over state in state and save (initialise to 0 on first
1435: * invocation).
1436: *
1437: * Returns the number of bytes encoded.
1438: **/
1439: static size_t
1440: g_mime_utils_base64_encode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save)
1441: {
1.186 paf 1442: register const unsigned char *inptr;
1.178 paf 1443: register unsigned char *outptr;
1444:
1445: if (inlen <= 0)
1446: return 0;
1447:
1448: inptr = in;
1449: outptr = out;
1450:
1451: if (inlen + ((unsigned char *)save)[0] > 2) {
1452: const unsigned char *inend = in + inlen - 2;
1453: register int c1 = 0, c2 = 0, c3 = 0;
1454: register int already;
1455:
1456: already = *state;
1457:
1458: switch (((char *)save)[0]) {
1459: case 1: c1 = ((unsigned char *)save)[1]; goto skip1;
1460: case 2: c1 = ((unsigned char *)save)[1];
1461: c2 = ((unsigned char *)save)[2]; goto skip2;
1462: }
1463:
1464: /* yes, we jump into the loop, no i'm not going to change it, its beautiful! */
1465: while (inptr < inend) {
1466: c1 = *inptr++;
1467: skip1:
1468: c2 = *inptr++;
1469: skip2:
1470: c3 = *inptr++;
1471: *outptr++ = base64_alphabet [c1 >> 2];
1472: *outptr++ = base64_alphabet [(c2 >> 4) | ((c1 & 0x3) << 4)];
1473: *outptr++ = base64_alphabet [((c2 & 0x0f) << 2) | (c3 >> 6)];
1474: *outptr++ = base64_alphabet [c3 & 0x3f];
1475: /* this is a bit ugly ... */
1476: if ((++already) >= 19) {
1477: *outptr++ = '\n';
1478: already = 0;
1479: }
1480: }
1481:
1482: ((unsigned char *)save)[0] = 0;
1483: inlen = 2 - (inptr - inend);
1484: *state = already;
1485: }
1486:
1487: //d(printf ("state = %d, inlen = %d\n", (int)((char *)save)[0], inlen));
1488:
1489: if (inlen > 0) {
1490: register char *saveout;
1491:
1492: /* points to the slot for the next char to save */
1493: saveout = & (((char *)save)[1]) + ((char *)save)[0];
1494:
1495: /* inlen can only be 0 1 or 2 */
1496: switch (inlen) {
1497: case 2: *saveout++ = *inptr++;
1498: case 1: *saveout++ = *inptr++;
1499: }
1500: ((char *)save)[0] += inlen;
1501: }
1502:
1503: /*d(printf ("mode = %d\nc1 = %c\nc2 = %c\n",
1504: (int)((char *)save)[0],
1505: (int)((char *)save)[1],
1506: (int)((char *)save)[2]));*/
1507:
1508: return (outptr - out);
1509: }
1510:
1511: /**
1512: * g_mime_utils_base64_encode_close:
1513: * @in: input stream
1514: * @inlen: length of the input
1515: * @out: output string
1516: * @state: holds the number of bits that are stored in @save
1517: * @save: leftover bits that have not yet been encoded
1518: *
1519: * Base64 encodes the input stream to the output stream. Call this
1520: * when finished encoding data with g_mime_utils_base64_encode_step to
1521: * flush off the last little bit.
1522: *
1523: * Returns the number of bytes encoded.
1524: **/
1525: static size_t
1526: g_mime_utils_base64_encode_close (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save)
1527: {
1528: unsigned char *outptr = out;
1529: int c1, c2;
1530:
1531: if (inlen > 0)
1532: outptr += g_mime_utils_base64_encode_step (in, inlen, outptr, state, save);
1533:
1534: c1 = ((unsigned char *)save)[1];
1535: c2 = ((unsigned char *)save)[2];
1536:
1537: switch (((unsigned char *)save)[0]) {
1538: case 2:
1539: outptr[2] = base64_alphabet [(c2 & 0x0f) << 2];
1540: goto skip;
1541: case 1:
1542: outptr[2] = '=';
1543: skip:
1544: outptr[0] = base64_alphabet [c1 >> 2];
1545: outptr[1] = base64_alphabet [c2 >> 4 | ((c1 & 0x3) << 4)];
1546: outptr[3] = '=';
1547: outptr += 4;
1548: break;
1549: }
1550:
1551: *outptr++ = 0;
1552:
1553: *save = 0;
1554: *state = 0;
1555:
1556: return (outptr - out);
1557: }
1558:
1.210 paf 1559: static unsigned char gmime_base64_rank[256] = {
1560: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
1561: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
1562: 255,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
1563: 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255, 0,255,255,
1564: 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
1565: 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
1566: 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
1567: 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
1568: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
1569: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
1570: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
1571: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
1572: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
1573: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
1574: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
1575: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
1576: };
1577:
1578: /**
1579: * g_mime_utils_base64_decode_step:
1580: * @in: input stream
1581: * @inlen: max length of data to decode
1582: * @out: output stream
1583: * @state: holds the number of bits that are stored in @save
1584: * @save: leftover bits that have not yet been decoded
1585: *
1586: * Decodes a chunk of base64 encoded data.
1587: *
1588: * Returns the number of bytes decoded (which have been dumped in @out).
1589: **/
1590: size_t
1591: g_mime_utils_base64_decode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save)
1592: {
1.213 ! paf 1593: const unsigned char *inptr;
! 1594: unsigned char *outptr;
1.210 paf 1595: const unsigned char *inend;
1.213 ! paf 1596: int saved;
1.210 paf 1597: unsigned char c;
1598: int i;
1599:
1600: inend = in + inlen;
1601: outptr = out;
1602:
1603: /* convert 4 base64 bytes to 3 normal bytes */
1604: saved = *save;
1605: i = *state;
1606: inptr = in;
1607: while (inptr < inend) {
1608: c = gmime_base64_rank[*inptr++];
1609: if (c != 0xff) {
1610: saved = (saved << 6) | c;
1611: i++;
1612: if (i == 4) {
1613: *outptr++ = saved >> 16;
1614: *outptr++ = saved >> 8;
1615: *outptr++ = saved;
1616: i = 0;
1617: }
1618: }
1619: }
1620:
1621: *save = saved;
1622: *state = i;
1623:
1624: /* quick scan back for '=' on the end somewhere */
1625: /* fortunately we can drop 1 output char for each trailing = (upto 2) */
1626: i = 2;
1627: while (inptr > in && i) {
1628: inptr--;
1629: if (gmime_base64_rank[*inptr] != 0xff) {
1630: if (*inptr == '=' && outptr > out)
1631: outptr--;
1632: i--;
1633: }
1634: }
1635:
1636: /* if i != 0 then there is a truncation error! */
1637: return (outptr - out);
1638: }
1639:
1640:
1641: char* pa_base64_encode(const char *in, size_t in_size)
1.178 paf 1642: {
1643: /* wont go to more than 2x size (overly conservative) */
1.210 paf 1644: char* result=new(PointerFreeGC) char[in_size * 2 + 6];
1.178 paf 1645: int state=0;
1646: int save=0;
1.183 paf 1647: #ifndef NDEBUG
1648: size_t filled=
1649: #endif
1.210 paf 1650: g_mime_utils_base64_encode_close ((const unsigned char*)in, in_size,
1.178 paf 1651: (unsigned char*)result, &state, &save);
1.210 paf 1652: assert(filled <= in_size * 2 + 6);
1.178 paf 1653:
1654: return result;
1.98 paf 1655: }
1.210 paf 1656:
1.211 paf 1657: void pa_base64_decode(const char *in, size_t in_size, char*& result, size_t& result_size)
1.210 paf 1658: {
1659: /* wont go to more than had (overly conservative) */
1.211 paf 1660: result=new(PointerFreeGC) char[in_size+1/*terminator*/];
1.210 paf 1661: int state=0;
1662: int save=0;
1663: result_size=
1664: g_mime_utils_base64_decode_step ((const unsigned char*)in, in_size,
1665: (unsigned char*)result, &state, &save);
1666: assert(result_size <= in_size);
1.211 paf 1667: result[result_size]=0; // for text files
1.210 paf 1668: }
E-mail: