--- parser3/src/main/untaint.C 2001/04/05 13:19:43 1.34 +++ parser3/src/main/untaint.C 2009/08/21 08:38:55 1.151 @@ -1,249 +1,663 @@ /** @file Parser: String class part: untaint mechanizm. - Copyright(c) 2001 ArtLebedev Group(http://www.artlebedev.com) - - Author: Alexander Petrosyan (http://design.ru/paf) - - $Id: untaint.C,v 1.34 2001/04/05 13:19:43 paf Exp $ + Copyright(c) 2001-2005 ArtLebedev Group (http://www.artlebedev.com) + Author: Alexandr Petrosian (http://paf.design.ru) */ -#include "pa_config_includes.h" +static const char * const IDENT_UNTAINT_C="$Date: 2009/08/21 08:38:55 $"; + -#include "pa_pool.h" #include "pa_string.h" #include "pa_hash.h" #include "pa_exception.h" #include "pa_table.h" #include "pa_globals.h" +#include "pa_dictionary.h" +#include "pa_common.h" +#include "pa_charset.h" +#include "pa_request_charsets.h" +#include "pa_sapi.h" + +extern "C" { // author forgot to do that +#include "ec.h" +} + #include "pa_sql_connection.h" -#define escape(action) \ - { \ - const char *src=row->item.ptr; \ - for(int size=row->item.size; size--; src++) \ - action \ - } -#define _default default: *dest++=*src; break -#define encode(need_encode_func, prefix) \ - default: \ - if(need_encode_func(*src)) { \ - static const char *hex="0123456789ABCDEF"; \ - char chunk[3]={prefix}; \ - chunk[1]=hex[((unsigned char)*src)/0x10]; \ - chunk[2]=hex[((unsigned char)*src)%0x10]; \ - strncpy(dest, chunk, 3); dest+=3; \ - } else \ - *dest++=*src; \ - break -#define to_char(c) *dest++=c -#define to_string(b, bsize) \ - strncpy(dest, b, bsize); \ - dest+=bsize; \ +// defines -inline bool need_file_encode(unsigned char c){ - if((c>='0') &&(c<='9') ||(c>='A') &&(c<='Z') ||(c>='a') &&(c<='z')) - return false; +#undef CORD_ec_append +// redefining to intercept flushes and implement whitespace optimization +// of all consequent white space chars leaving only first one +#define CORD_ec_append(x, c) \ + { \ + bool skip=false; \ + if(optimize) switch(c) { \ + case ' ': case '\n': case '\t': \ + if(whitespace) \ + skip=true; /*skipping subsequent*/ \ + else \ + whitespace=true; \ + break; \ + default: \ + whitespace=false; \ + break; \ + } \ + if(!skip) { \ + if ((x)[0].ec_bufptr == (x)[0].ec_buf + CORD_BUFSZ) { \ + CORD_ec_flush_buf(x); \ + } \ + *((x)[0].ec_bufptr)++ = (c); \ + } \ + } + + +#define escape_fragment(action) \ + for(; fragment_length--; CORD_next(info->pos)) { \ + char c=CORD_pos_fetch(info->pos); \ + action \ + } +#define _default CORD_ec_append(info->result, c) +#define encode(need_encode_func, prefix, otherwise) \ + if(need_encode_func(c)) { \ + static const char* hex="0123456789ABCDEF"; \ + CORD_ec_append(info->result, prefix); \ + CORD_ec_append(info->result, hex[((unsigned char)c)/0x10]); \ + CORD_ec_append(info->result, hex[((unsigned char)c)%0x10]); \ + } else \ + CORD_ec_append(info->result, otherwise); +#define to_char(c) { CORD_ec_append(info->result, c); whitespace=false; } +#define to_string(s) { CORD_ec_append_cord(info->result, s); whitespace=false; } - return !strchr( -#ifdef WIN32 - ":\\" +inline bool need_file_encode(unsigned char c){ + // russian letters and space ENABLED + // encoding only these... + return strchr( + "*?'\"<>|" +#ifndef WIN32 + ":\\" #endif - "./", c); + , c)!=0; } + inline bool need_uri_encode(unsigned char c){ - if((c>='0') &&(c<='9') ||(c>='A') &&(c<='Z') ||(c>='a') &&(c<='z')) + if((c>='0') &&(c<='9') ||(c>='A') &&(c<='Z') ||(c>='a') &&(c<='z')) return false; - return !strchr("_-./", c); + return !strchr("_-./", c); } -inline bool need_header_encode(unsigned char c){ - if(strchr(" , :", c)) + +inline bool need_http_header_encode(unsigned char c){ + if(strchr(" , :", c)) return false; return need_uri_encode(c); } +inline bool need_regex_escape(unsigned char c){ + return strchr("\\^$.[]|()?*+{}-", c)!=0; +} + +inline bool need_parser_code_escape(unsigned char c){ + return strchr("^$;@()[]{}:#\"", c)!=0; +} + // String -static bool typo_present(Array::Item *value, const void *info) { - Array *row=static_cast(value); - const char *src=static_cast(info); - - int partial; - row->get_string(0)->cmp(partial, src); - return - partial==0 || // full match - partial==1; // typo left column starts 'src' +/* +HTTP-header = field-name ":" [ field-value ] CRLF + + field-name = token + field-value = *( field-content | LWS ) + + field-content = + + + +word = token | quoted-string + +token = 1* + + + +tspecials = "(" | ")" | "<" | ">" | "@" + | "," | ";" | ":" | "\" | <"> + | "/" | "[" | "]" | "?" | "=" + | "{" | "}" | SP | HT + +SP = +HT = + +LWS = [CRLF] 1*( SP | HT ) +TEXT = + +quoted-pair = "\" CHAR + + if(strchr("()<>@,;:\\\"/[]?={} \t", *ptr)) +*/ +inline bool need_quote_http_header(const char* ptr, size_t size) { + for(; size--; ptr++) + if(strchr(";\\\"= \t" /* excluded ()<>@, :/ ? []{} */, *ptr)) + return true; + return false; +} + +#ifndef DOXYGEN +struct Append_fragment_info { + String::Language lang; + String::Languages* dest_languages; + size_t dest_body_plan_length; +}; +#endif +int append_fragment_optimizing(char alang, size_t asize, Append_fragment_info* info) { + const String::Language lang=(String::Language)(unsigned char)alang; + // main idea here: + // tainted piece would get OPTIMIZED bit from 'lang' + // clean piece would be marked OPTIMIZED manually + // pieces with determined languages [not tainted|clean] would retain theirs langs + info->dest_languages->append(info->dest_body_plan_length, + lang==String::L_TAINTED? + info->lang + :lang==String::L_CLEAN? + (String::Language)(String::L_CLEAN|String::L_OPTIMIZE_BIT) // ORing with OPTIMIZED flag + :lang, + asize); + info->dest_body_plan_length+=asize; + + return 0; // 0=continue +} +int append_fragment_nonoptimizing(char alang, size_t asize, Append_fragment_info* info) { + const String::Language lang=(String::Language)(unsigned char)alang; + // The core idea: tainted pieces got marked with context's lang + info->dest_languages->append(info->dest_body_plan_length, + lang==String::L_TAINTED? + info->lang + :lang, + asize); + info->dest_body_plan_length+=asize; + + return 0; // 0=continue } +/** + appends to other String, + marking all tainted pieces of it with @a lang. + or marking ALL pieces of it with a @a lang when @a forced to, + and propagating OPTIMIZE language bit. +*/ +String& String::append_to(String& dest, Language ilang, bool forced) const { + if(is_empty()) + return dest; + + // first: fragment infos + + if(ilang==L_PASS_APPENDED) // without language-change? + dest.langs.appendHelper(dest.body, langs, body); + else if(forced) //forcing passed lang? + dest.langs.appendHelper(dest.body, ilang, body); + else { + if(langs.opt.is_not_just_lang){ + Append_fragment_info info={ilang, &dest.langs, dest.body.length()}; + langs.for_each(body, ilang&L_OPTIMIZE_BIT? + append_fragment_optimizing + :append_fragment_nonoptimizing, &info); + } else { + Language lang=langs.opt.lang; + // see append_fragment_* for explanation + if(ilang&L_OPTIMIZE_BIT){ + dest.langs.appendHelper(dest.body, + lang==String::L_TAINTED? + ilang + :lang==String::L_CLEAN? + (String::Language)(String::L_CLEAN|String::L_OPTIMIZE_BIT) + :lang, + body); + } else { + dest.langs.appendHelper(dest.body, lang==String::L_TAINTED ? ilang:lang, body); + } + } + } + + // next: letters + dest.body<. An 'encoded-word' that appears within a + 'phrase' MUST be separated from any adjacent 'word', 'text' or + 'special' by 'linear-white-space'. +... + (2) The 8-bit hexadecimal value 20 (e.g., ISO-8859-1 SPACE) may be + represented as "_" (underscore, ASCII 95.). (This character may + not pass through some internetwork mail gateways, but its use + will greatly enhance readability of "Q" encoded data with mail + readers that do not support this encoding.) Note that the "_" + always represents hexadecimal 20, even if the SPACE character + occupies a different code position in the character set in use. + + paf: obviously, + without "=", or one could not differ "=E0" and "russian letter a" + and without "_", or in would mean 0x20 +*/ +inline bool mail_header_char_valid_within_Qencoded(char c) { + return c>='A' && c<='Z' + || c>='a' && c<='Z' + || c>='0' && c<='9' + || strchr("!*+-/", c); +} +inline bool addr_spec_soon(const char *src) { + for(char c; (c=*src); src++) + if(c=='<') + return true; + else if(!(c==' ' || c=='\t')) + return false; + return false; +} /** - @test optimize whitespaces for all but 'html' - @todo fix theoretical \n mem overrun in TYPO replacements + RFC + Upper case should be used for hexadecimal digits "A" through "F" + The 8-bit hexadecimal value 20 (e.g., ISO-8859-1 SPACE) + may be represented as "_" */ -char *String::store_to(char *dest, Untaint_lang lang, SQL_Connection *connection) const { - // $MAIN:html-typo table - Table *user_typo_table=static_cast(pool().tag()); - Table *typo_table=user_typo_table?user_typo_table:default_typo_table; - - const Chunk *chunk=&head; - do { - const Chunk::Row *row=chunk->rows; - for(size_t i=0; icount; i++, row++) { - if(row==append_here) - goto break2; - - // WARNING: - // string can grow only UNTAINT_TIMES_BIGGER - switch(lang==UL_UNKNOWN?row->item.lang:lang) { - case UL_CLEAN: - // clean piece - case UL_TAINTED: - // tainted piece, but undefined untaint language - // for VString.as_double of tainted values - // for ^process{body} evaluation - case UL_AS_IS: - // tainted, untaint language: as-is - memcpy(dest, row->item.ptr, row->item.size); - dest+=row->item.size; - break; - case UL_FILE_NAME: - // tainted, untaint language: file [name] - escape(switch(*src) { - case ' ': to_char('_'); break; - encode(need_file_encode, '-'); - }); - break; - case UL_URI: - // tainted, untaint language: uri - escape(switch(*src) { - case ' ': to_char('+'); break; - encode(need_uri_encode, '%'); - }); - break; - case UL_HEADER: - // tainted, untaint language: header - escape(switch(*src) { - encode(need_header_encode, '%'); - }); - break; - case UL_TABLE: - // tainted, untaint language: table - escape(switch(*src) { - case '\t': to_char(' '); break; - case '\n': to_char(' '); break; - _default; - }); - break; - case UL_SQL: - // tainted, untaint language: sql - if(connection) - dest+=connection->quote(dest, row->item.ptr, row->item.size); - else - THROW(0, 0, - this, - "untaint in SQL language failed - no connection specified"); - break; - case UL_JS: - escape(switch(*src) { - case '"': to_string("\\\"", 2); break; - case '\'': to_string("\\'", 2); break; - case '\n': to_string("\\n", 2); break; - case '\\': to_string("\\\\", 2); break; - case '\xFF': to_string("\\\xFF", 2); break; - _default; - }); - break; - case UL_HTML: - escape(switch(*src) { - case '&': to_string("&", 5); break; - case '>': to_string(">", 4); break; - case '<': to_string("<", 4); break; - case '"': to_string(""", 6); break; - //TODO: XSLT case '\'': to_string("'", 6); break; - _default; - }); +inline bool mail_header_nonspace_char(char c) { + return c != 0x20; +} + +inline void ec_append(CORD_ec& result, bool& optimize, bool& whitespace, CORD_pos pos, size_t size) { + while(size--) { + CORD_ec_append(result, CORD_pos_fetch(pos)); + CORD_next(pos); + } +} +inline void pa_CORD_pos_advance(CORD_pos pos, size_t n) { + while(true) { + long avail=CORD_pos_chars_left(pos); + if(avail<=0) { + CORD_next(pos); + if(!--n) break; - case UL_HTML_TYPO: { - // tainted, untaint language: html-typo - char *html_for_typo=(char *)malloc(size()*2/* '\n' -> '\' 'n' */+1); - // note: - // there still is a possibility that user - // would not replace \n as she supposed to - // and rather replace \ and n into huge strings - // thus causing memory overrun - // this can be dealed by allocating *2 memory, but that's too expensive - size_t html_for_typo_size; - { // local dest - char *dest=html_for_typo; - escape(switch(*src) { - // convinient name for typo match "\n" - case '\r': - if(typo_table) { - *dest++='\\'; *dest++='n'; // \r -> \n - if(src[1]=='\n') { // \r\n -> remove \n - size--; src++; - } - } - break; - case '\n': - if(typo_table) - to_string("\\n", 2); - break; - //TODO: XSLT case '\'': to_string("'", 6); break; - _default; - }); - *dest=0; - html_for_typo_size=dest-html_for_typo; + } else if((size_t)avail=n + CORD_pos_advance(pos, n); + break; + } + } +} + +#ifndef DOXYGEN +struct Cstr_to_string_body_block_info { + // input + String::Language lang; + SQL_Connection* connection; + const Request_charsets* charsets; + const String::Body* body; + + // output + CORD_ec result; + + // private + CORD_pos pos; + size_t fragment_begin; + bool whitespace; + const char* exception; +}; +#endif + +// @todo: replace info->body->mid with something that uses info->pos +int cstr_to_string_body_block(String::Language to_lang, size_t fragment_length, Cstr_to_string_body_block_info* info) { + bool& whitespace=info->whitespace; + size_t fragment_end=info->fragment_begin+fragment_length; + //fprintf(stderr, "%d, %d =%s=\n", to_lang, fragment_length, info->body->cstr()); + + bool optimize=(to_lang & String::L_OPTIMIZE_BIT)!=0; + if(!optimize) + whitespace=false; + + switch(to_lang & ~String::L_OPTIMIZE_BIT) { + case String::L_CLEAN: + case String::L_TAINTED: + case String::L_AS_IS: + // clean piece + + // tainted piece, but undefined untaint language + // for VString.as_double of tainted values + // for ^process{body} evaluation + + // tainted, untaint language: as-is + ec_append(info->result, optimize, whitespace, info->pos, fragment_length); + break; + case String::L_FILE_SPEC: + // tainted, untaint language: file [name] + { + bool is1251=(info->charsets && info->charsets->source().NAME()=="WINDOWS-1251"); + escape_fragment( + // Macintosh has problems with small Russian letter 'r' + if( is1251 && c=='\xF0' ) { + // fixing that letter for most common charset + to_char('p'); + } else // fallback to default + encode(need_file_encode, '_', c); + ); + } + break; + case String::L_FILE_POST: + { + escape_fragment(switch(c) { + case '\0': to_string("\\0"); break; + case '\\': to_string("\\\\"); break; + default: _default; break; + }); + } + break; + case String::L_URI: + case String::L_HTTP_HEADER: + // tainted, untaint language: http-field-content-text + escape_fragment( + encode(need_uri_encode, '%', c); + ); + break; + case String::L_MAIL_HEADER: + // tainted, untaint language: mail-header + // http://www.ietf.org/rfc/rfc2047.txt + if(info->charsets) { + size_t mail_size; + const char *mail_ptr= + info->body->mid(info->fragment_begin, mail_size=fragment_length).cstr(); + // skip source [we use recoded version] + pa_CORD_pos_advance(info->pos, mail_size); + + const char* charset_name=info->charsets->mail().NAME().cstr(); + + // Subject: Re: parser3: =?koi8-r?Q?=D3=C5=CD=C9=CE=C1=D2?= + bool to_quoted_printable=false; + + bool email=false; + uchar c; + for(const char* src=mail_ptr; (c=(uchar)*src++); ) { + if(c=='\r' || c=='\n') + c=' '; + if(to_quoted_printable && (c==',' || c == '"' || addr_spec_soon(src-1/*position to 'c'*/))) { + email=c=='<'; + to_string("?="); + to_quoted_printable=false; } - // typo table replacements - const char *src=html_for_typo; - do { - // there is a row where first column starts 'src' - if(Table::Item *item=typo_table->first_that(typo_present, src)) { - // get a=>b values - const String& a=*static_cast(item)->get_string(0); - const String& b=*static_cast(item)->get_string(1); - // empty 'a' | 'b' checks - if(a.size()==0 || b.size()==0) { - pool().set_tag(default_typo_table); // avoid recursion - THROW(0, 0, - typo_table->origin_string(), - "typo table column elements must not be empty"); - } - // overflow check: - // b allowed to be max UNTAINT_TIMES_BIGGER then a - if(b.size()>UNTAINT_TIMES_BIGGER*a.size()) { - pool().set_tag(default_typo_table); // avoid recursion - THROW(0, 0, - &b, - "is %g times longer then '%s', " - "while maximum, handled by Parser, is %d", - ((double)b.size())/a.size(), - a.cstr(), - UNTAINT_TIMES_BIGGER); - } - - // skip 'a' in 'src' - src+=a.size(); - // write 'b' to 'dest' - b.store_to(dest); - dest+=b.size(); - } else - *dest++=*src++; - } while(*src); + //RFC + An 'encoded-word' MUST NOT appear in any portion of an 'addr-spec'. + if(!email && ( + !to_quoted_printable && (c & 0x80) // starting quote-printable-encoding on first 8bit char + || to_quoted_printable && !mail_header_char_valid_within_Qencoded(c) + )) { + if(!to_quoted_printable) { + to_string("=?"); + to_string(charset_name); + to_string("?Q?"); + to_quoted_printable=true; + } + encode(mail_header_nonspace_char, '=', '_'); + } else + to_char(c); + if(c=='>') + email=false; + } + if(to_quoted_printable) // close + to_string("?="); + + } else + ec_append(info->result, optimize, whitespace, info->pos, fragment_length); + break; + case String::L_SQL: + // tainted, untaint language: sql + if(info->connection) { + const char *fragment_str=info->body->mid(info->fragment_begin, fragment_length).cstr(); + // skip source [we use recoded version] + pa_CORD_pos_advance(info->pos, fragment_length); + + to_string(info->connection->quote(fragment_str, fragment_length)); + } else { + info->exception="untaint in SQL language failed - no connection specified"; + info->fragment_begin=fragment_end; + return 1; // stop processing. can't throw exception here + } + break; + case String::L_JS: + escape_fragment(switch(c) { + case '\n': to_string("\\n"); break; + case '"': to_string("\\\""); break; + case '\'': to_string("\\'"); break; + case '\\': to_string("\\\\"); break; + case '\xFF': to_string("\\\xFF"); break; + case '\r': to_string("\\r"); break; + default: _default; break; + }); + break; + case String::L_XML: + // [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + escape_fragment(switch(c) { + case '\x20': + case '\x9': + case '\xA': + case '\xD': // this is usually removed on input + _default; break; + case '&': to_string("&"); break; + case '>': to_string(">"); break; + case '<': to_string("<"); break; + case '"': to_string("""); break; + case '\'': to_string("'"); break; + default: + if(((unsigned char)c)<0x20) { + // fixing it, so that libxml would not result + // in fatal error parsing text + // though it really violates standard. + // to indicate there were an error + // replace bad char not to it's code, + // which we can do, + // but rather to '!' to show that input were actually + // invalid. + // life: shows that MSIE can somehow garble form values + // so that they contain these chars. + to_char('!'); + } else { + _default; } - default: - THROW(0, 0, - this, - "unknown untaint language #%d of %d piece", - static_cast(row->item.lang), - i); - } + break; + }); + break; + case String::L_HTML: + escape_fragment(switch(c) { + case '&': to_string("&"); break; + case '>': to_string(">"); break; + case '<': to_string("<"); break; + case '"': to_string("""); break; + default: _default; break; + }); + break; + case String::L_REGEX: + // tainted, untaint language: regex + escape_fragment( + if(need_regex_escape(c)) + to_char('\\') + _default; + ); + break; + case String::L_HTTP_COOKIE: + // tainted, untaint language: cookie (3.3.0 and higher: %uXXXX in UTF-8) + { + const char *fragment_str=info->body->mid(info->fragment_begin, fragment_length).cstr(); + // skip source [we use recoded version] + pa_CORD_pos_advance(info->pos, fragment_length); + String::C output(fragment_str, fragment_length); + + output=Charset::escape(output, info->charsets->source()); + //throw Exception(0, 0, output); + to_string(output); + } - chunk=row->link; - } while(chunk); -break2: - return dest; + break; + case String::L_PARSER_CODE: + // for auto-untaint in process + escape_fragment( + if(need_parser_code_escape(c)) + to_char('^'); + _default; + ); + break; + default: + SAPI::abort("unknown untaint language #%d", + static_cast(to_lang)); // should never + break; // never + } + + info->fragment_begin=fragment_end; + + return 0; // 0=continue +} + + +String::Body String::cstr_to_string_body_taint(Language lang, SQL_Connection* connection, const Request_charsets *charsets) const { + if(is_empty()) + return String::Body(); + + Cstr_to_string_body_block_info info; + // input + info.lang=lang; + info.connection=connection; + info.charsets=charsets; + info.body=&body; + // output + CORD_ec_init(info.result); + // private + body.set_pos(info.pos, 0); + info.fragment_begin=0; + info.exception=0; + info.whitespace=true; + + cstr_to_string_body_block(lang, length(), &info); + + if(info.exception) + throw Exception(0, + 0, + info.exception); + + return String::Body(CORD_ec_to_cord(info.result)); +} + +int cstr_to_string_body_block_untaint(char alang, size_t fragment_length, Cstr_to_string_body_block_info* info){ + const String::Language lang=(String::Language)(unsigned char)alang; + // see append_fragment_* for explanation + if(info->lang&String::L_OPTIMIZE_BIT) + return cstr_to_string_body_block( + lang==String::L_TAINTED? + info->lang + :lang==String::L_CLEAN? + (String::Language)(String::L_CLEAN|String::L_OPTIMIZE_BIT) + :lang, + fragment_length, info); + else + return cstr_to_string_body_block(lang==String::L_TAINTED ? info->lang:lang, fragment_length, info); +} + +String::Body String::cstr_to_string_body_untaint(Language lang, SQL_Connection* connection, const Request_charsets *charsets) const { + if(is_empty()) + return String::Body(); + + Cstr_to_string_body_block_info info; + // input + info.lang=lang; + info.connection=connection; + info.charsets=charsets; + info.body=&body; + // output + CORD_ec_init(info.result); + // private + body.set_pos(info.pos, 0); + info.fragment_begin=0; + info.exception=0; + info.whitespace=true; + + langs.for_each(body, cstr_to_string_body_block_untaint, &info); + + if(info.exception) + throw Exception(0, + 0, + info.exception); + + return String::Body(CORD_ec_to_cord(info.result)); +} + +const char* String::transcode_and_untaint_cstr(Language lang, const Request_charsets *charsets) const { + if(charsets && &charsets->source() != &charsets->client()){ + return cstr_to_string_body_transcode_and_untaint(lang, charsets).cstr(); + } else { + return cstr_to_string_body_untaint(lang, 0, charsets).cstr(); + } +} + +int cstr_to_string_body_block_transcode_and_untaint(char alang, size_t fragment_length, Cstr_to_string_body_block_info* info){ + String::C output_c=Charset::transcode( + String::C(info->body->mid(info->fragment_begin, fragment_length).cstr(), fragment_length), + info->charsets->source(), + info->charsets->client() + ); + String::Body output_body=String::Body(output_c); + + size_t fragment_end=info->fragment_begin+fragment_length; + const String::Body* info_body=info->body; + + info->fragment_begin=0; + info->body=&output_body; + info->body->set_pos(info->pos, 0); + + int result=cstr_to_string_body_block_untaint(alang, output_c.length, info); + + info->fragment_begin=fragment_end; + info->body=info_body; + + return result; +} + +String::Body String::cstr_to_string_body_transcode_and_untaint(Language lang, const Request_charsets *charsets) const { + if(is_empty()) + return String::Body(); + + Cstr_to_string_body_block_info info; + // input + info.lang=lang; + info.connection=0; + info.charsets=charsets; + info.body=&body; + // output + CORD_ec_init(info.result); + // private + body.set_pos(info.pos, 0); + info.fragment_begin=0; + info.exception=0; + info.whitespace=true; + + langs.for_each(body, cstr_to_string_body_block_transcode_and_untaint, &info); + + if(info.exception) + throw Exception(0, + 0, + info.exception); + + return String::Body(CORD_ec_to_cord(info.result)); }