--- parser3/src/main/untaint.C 2001/06/28 07:44:17 1.53 +++ parser3/src/main/untaint.C 2001/10/08 08:52:45 1.65 @@ -2,10 +2,10 @@ Parser: String class part: untaint mechanizm. Copyright(c) 2001 ArtLebedev Group(http://www.artlebedev.com) - Author: Alexander Petrosyan (http://design.ru/paf) + + $Id: untaint.C,v 1.65 2001/10/08 08:52:45 parser Exp $ */ -static const char *RCSId="$Id: untaint.C,v 1.53 2001/06/28 07:44:17 parser Exp $"; #include "pa_pool.h" #include "pa_string.h" @@ -14,6 +14,7 @@ static const char *RCSId="$Id: untaint.C #include "pa_table.h" #include "pa_globals.h" #include "pa_sql_connection.h" +#include "pa_dictionary.h" #define escape(action) \ { \ @@ -29,13 +30,13 @@ static const char *RCSId="$Id: untaint.C char chunk[3]={prefix}; \ chunk[1]=hex[((unsigned char)*src)/0x10]; \ chunk[2]=hex[((unsigned char)*src)%0x10]; \ - strncpy(dest, chunk, 3); dest+=3; \ + memcpy(dest, chunk, 3); dest+=3; \ } else \ *dest++=*src; \ break #define to_char(c) *dest++=c #define to_string(b, bsize) \ - strncpy(dest, b, bsize); \ + memcpy(dest, b, bsize); \ dest+=bsize; \ inline bool need_file_encode(unsigned char c){ @@ -61,6 +62,31 @@ inline bool need_http_header_encode(unsi return need_uri_encode(c); } +// + +static const char * String_Untaint_lang_name[]={ + "U", ///< zero value handy for hash lookup @see untaint_lang_name2enum + "C", ///< clean + "T", ///< tainted, untaint language as assigned later + // untaint languages. assigned by ^untaint[lang]{...} + "P", + /**< + leave language built into string being appended. + just a flag, that value not stored + */ + "A", ///< leave all characters intact + "F", ///< filename + "H", ///< text in HTTP response header + "M", ///< text in mail header + "URI", ///< text in uri + "T", ///< ^table:set body + "SQL", ///< ^table:sql body + "JS", ///< JavaScript code + "HTML", ///< HTML code (for editing) + "UHTML", ///< HTML code with USER chars +}; + + // String static bool typo_present(Array::Item *value, const void *info) { @@ -116,20 +142,24 @@ inline bool need_quote_http_header(const return false; } -/// @todo maybe additional check "are all pieces are clean?" would be profitable? +/** @todo maybe additional check "are all pieces are clean?" would be profitable? + @todo fix potential forigins_mode buf overrun +*/ size_t String::cstr_bufsize(Untaint_lang lang) const { - return (lang==UL_AS_IS?size():size()*UNTAINT_TIMES_BIGGER) +1; + return (lang==UL_AS_IS?size():size()*UNTAINT_TIMES_BIGGER*(forigins_mode?10:1)) +1; } -/// @todo fix theoretical \n mem overrun in TYPO replacements +/** @todo fix theoretical \n mem overrun in TYPO replacements +*/ char *String::store_to(char *dest, Untaint_lang lang, SQL_Connection *connection, const char *charset) const { // $MAIN:html-typo table - Table *user_typo_table=static_cast(pool().tag()); - Table *typo_table=user_typo_table?user_typo_table:default_typo_table; + Dictionary *user_typo_dict=static_cast(pool().tag()); + Dictionary *typo_dict=user_typo_dict?user_typo_dict:default_typo_dict; bool whitespace=true; + bool need_to_close_http_header_quote=false; const Chunk *chunk=&head; do { const Chunk::Row *row=chunk->rows; @@ -137,9 +167,27 @@ char *String::store_to(char *dest, Untai if(row==append_here) goto break2; + Untaint_lang to_lang=lang==UL_UNSPECIFIED?row->item.lang:lang; + + char *dest_before_origins=dest; + + if(forigins_mode) { +#ifndef NO_STRING_ORIGIN + if(row->item.origin.file) + dest+=sprintf(dest, "%s(%d)", + row->item.origin.file, + 1+row->item.origin.line); + else + dest+=sprintf(dest, "unknown"); +#endif + dest+=sprintf(dest, "#%s: ", + String_Untaint_lang_name[to_lang]); + } + char *dest_after_origins=dest; + // WARNING: // string can grow only UNTAINT_TIMES_BIGGER - switch(lang==UL_UNSPECIFIED?row->item.lang:lang) { + switch(to_lang) { case UL_CLEAN: // clean piece { // optimizing whitespace @@ -168,7 +216,7 @@ char *String::store_to(char *dest, Untai memcpy(dest, row->item.ptr, row->item.size); dest+=row->item.size; break; - case UL_FILE_NAME: + case UL_FILE_SPEC: // tainted, untaint language: file [name] escape(switch(*src) { case ' ': to_char('_'); break; @@ -190,7 +238,7 @@ char *String::store_to(char *dest, Untai case '\"': to_string("\\\"", 2); break; _default; }); - *dest++='\"'; + need_to_close_http_header_quote=true; } else { memcpy(dest, row->item.ptr, row->item.size); dest+=row->item.size; @@ -201,19 +249,19 @@ char *String::store_to(char *dest, Untai if(charset) { // Subject: Re: parser3: =?koi8-r?Q?=D3=C5=CD=C9=CE=C1=D2?= const char *src=row->item.ptr; - bool to_base_64=false; + bool to_quoted_printable=false; for(int size=row->item.size; size--; src++) { if(*src & 0x80) { - if(!to_base_64) { + if(!to_quoted_printable) { dest+=sprintf(dest, "=?%.15s?Q?", charset); - to_base_64=true; + to_quoted_printable=true; } dest+=sprintf(dest, "=%02X", *src & 0xFF); } else { *dest++=*src; } } - if(to_base_64) // close + if(to_quoted_printable) // close dest+=sprintf(dest, "?="); } else { memcpy(dest, row->item.ptr, row->item.size); @@ -247,18 +295,32 @@ char *String::store_to(char *dest, Untai _default; }); break; + case UL_XML: + escape(switch(*src) { + case '&': to_string("&", 5); break; + case '>': to_string(">", 4); break; + case '<': to_string("<", 4); break; + case '"': to_string(""", 6); break; + case '\'': to_string("'", 6); break; + _default; + }); + break; case UL_HTML: escape(switch(*src) { case '&': to_string("&", 5); break; case '>': to_string(">", 4); break; case '<': to_string("<", 4); break; case '"': to_string(""", 6); break; - //TODO: XSLT case '\'': to_string("'", 6); break; _default; }); break; case UL_USER_HTML: { // tainted, untaint language: html-typo + if(!typo_dict) // never, always has default + THROW(0, 0, + this, + "untaint to user-html lang failed, no typo table"); + char *html_for_typo= (char *)malloc(row->item.size*2/* '\n' -> '\' 'n' */+1); // note: @@ -273,18 +335,14 @@ char *String::store_to(char *dest, Untai escape(switch(*src) { // convinient name for typo match "\n" case '\r': - if(typo_table) { - *dest++='\\'; *dest++='n'; // \r -> \n - if(src[1]=='\n') { // \r\n -> remove \n - size--; src++; - } + to_string("\\n", 2); // \r -> "\n" + if(size && src[1]=='\n') { // \r\n -> remove \n + size--; src++; } break; case '\n': - if(typo_table) - to_string("\\n", 2); + to_string("\\n", 2); break; - //TODO: XSLT case '\'': to_string("'", 6); break; _default; }); *dest=0; @@ -294,21 +352,14 @@ char *String::store_to(char *dest, Untai const char *src=html_for_typo; do { // there is a row where first column starts 'src' - if(Table::Item *item=typo_table->first_that(typo_present, src)) { + if(Table::Item *item=typo_dict->first_that_starts(src)) { // get a=>b values const String& a=*static_cast(item)->get_string(0); const String& b=*static_cast(item)->get_string(1); - // empty 'a' | 'b' checks - if(a.size()==0 || b.size()==0) { - pool().set_tag(default_typo_table); // avoid recursion - THROW(0, 0, - typo_table->origin_string(), - "typo table column elements must not be empty"); - } // overflow check: // b allowed to be max UNTAINT_TIMES_BIGGER then a if(b.size()>UNTAINT_TIMES_BIGGER*a.size()) { - pool().set_tag(default_typo_table); // avoid recursion + pool().set_tag(0); // avoid recursion THROW(0, 0, &b, "is %g times longer then '%s', " @@ -322,6 +373,7 @@ char *String::store_to(char *dest, Untai src+=a.size(); // write 'b' to 'dest' b.store_to(dest); + // skip 'b' in 'dest' dest+=b.size(); } else *dest++=*src++; @@ -339,9 +391,25 @@ char *String::store_to(char *dest, Untai if((lang==UL_UNSPECIFIED?row->item.lang:lang)!=UL_CLEAN) whitespace=false; + + if(forigins_mode) + if(dest==dest_after_origins) // never moved==optimized space + dest=dest_before_origins; + else { + for(char *p=dest_after_origins; plink; } while(chunk); + + if(need_to_close_http_header_quote) + *dest++='\"'; break2: return dest; }