|
|
1.7 paf 1: /** @file
1.8 paf 2: Parser: String class part: untaint mechanizm.
3:
1.13 ! paf 4: Copyright(c) 2001 ArtLebedev Group(http://www.artlebedev.com)
1.8 paf 5:
1.13 ! paf 6: Author: Alexander Petrosyan <paf@design.ru>(http://design.ru/paf)
1.1 paf 7:
1.13 ! paf 8: $Id: untaint.C,v 1.12 2001/03/23 13:08:11 paf Exp $
1.1 paf 9: */
10:
1.12 paf 11: #include "pa_config_includes.h"
1.1 paf 12:
13: #include "pa_pool.h"
14: #include "pa_string.h"
15: #include "pa_hash.h"
16: #include "pa_exception.h"
1.13 ! paf 17: #include "pa_table.h"
1.1 paf 18:
19: #define escape(cases) \
20: { \
1.13 ! paf 21: const char *src=row->item.ptr; \
! 22: for(int size=row->item.size; size--; src++) \
! 23: switch(*src) { \
1.1 paf 24: cases \
25: } \
26: }
1.13 ! paf 27: #define to_char(a, c) case a: *dest++=c; break
! 28: #define _default default: *dest++=*src; break
! 29: #define to_string(a, b, bsize) \
1.4 paf 30: case a: \
1.13 ! paf 31: strncpy(dest, b, bsize); \
! 32: dest+=bsize; \
1.4 paf 33: break
1.13 ! paf 34: #define encode(need_encode_func, prefix) \
1.5 paf 35: default: \
1.13 ! paf 36: if(need_encode_func(*src)) { \
1.5 paf 37: static const char *hex="0123456789ABCDEF"; \
1.9 paf 38: char chunk[3]={prefix}; \
1.13 ! paf 39: chunk[1]=hex[((unsigned char)*src)/0x10]; \
! 40: chunk[2]=hex[((unsigned char)*src)%0x10]; \
! 41: strncpy(dest, chunk, 3); dest+=3; \
1.5 paf 42: } else \
1.13 ! paf 43: *dest++=*src; \
1.5 paf 44: break
1.4 paf 45:
1.9 paf 46: inline bool need_file_encode(unsigned char c){
1.13 ! paf 47: if((c>='0') &&(c<='9') ||(c>='A') &&(c<='Z') ||(c>='a') &&(c<='z'))
1.9 paf 48: return false;
49:
1.13 ! paf 50: return !strchr("./", c);
1.9 paf 51: }
1.5 paf 52: inline bool need_uri_encode(unsigned char c){
1.13 ! paf 53: if((c>='0') &&(c<='9') ||(c>='A') &&(c<='Z') ||(c>='a') &&(c<='z'))
1.4 paf 54: return false;
55:
1.5 paf 56: return !strchr("_-./", c);
57: }
58: inline bool need_header_encode(unsigned char c){
59: if(strchr(" ,:", c))
60: return false;
61:
62: return need_uri_encode(c);
1.4 paf 63: }
1.1 paf 64:
65: // String
66:
1.13 ! paf 67: static bool typo_present(Array::Item *value, const void *info) {
! 68: Array *row=static_cast<Array *>(value);
! 69: const char *src=static_cast<const char *>(info);
! 70:
! 71: int partial;
! 72: row->get_string(0)->cmp(src, partial);
! 73: return partial==-1;
! 74: }
! 75:
1.9 paf 76: /// @todo optimize whitespaces for all but 'html'
1.13 ! paf 77: char *String::store_to(char *dest) const {
! 78: // $MAIN:html-typo table
! 79: Table *typo_table=static_cast<Table *>(pool().tag());
1.1 paf 80:
81: const Chunk *chunk=&head;
82: do {
83: const Chunk::Row *row=chunk->rows;
84: for(int i=0; i<chunk->count; i++) {
85: if(row==append_here)
86: goto break2;
87:
88: // WARNING:
89: // string can grow only UNTAINT_TIMES_BIGGER
90: switch(row->item.lang) {
1.11 paf 91: case UL_NO:
1.1 paf 92: // clean piece
1.11 paf 93: case UL_YES:
1.1 paf 94: // tainted piece, but undefined untaint language
95: // for VString.get_double of tainted values
96: // for ^process{body} evaluation
1.11 paf 97: case UL_AS_IS:
1.1 paf 98: // tainted, untaint language: as-is
1.13 ! paf 99: memcpy(dest, row->item.ptr, row->item.size);
! 100: dest+=row->item.size;
1.1 paf 101: break;
1.11 paf 102: case UL_FILE_NAME:
1.9 paf 103: // tainted, untaint language: file [name]
104: escape(
1.13 ! paf 105: to_char(' ', '_');
! 106: encode(need_file_encode, '-');
1.9 paf 107: );
108: break;
1.11 paf 109: case UL_URI:
1.4 paf 110: // tainted, untaint language: uri
111: escape(
1.13 ! paf 112: to_char(' ', '+');
! 113: encode(need_uri_encode, '%');
1.5 paf 114: );
115: break;
1.11 paf 116: case UL_HEADER:
1.5 paf 117: // tainted, untaint language: header
118: escape(
1.13 ! paf 119: encode(need_header_encode, '%');
1.4 paf 120: );
121: break;
1.11 paf 122: case UL_TABLE:
1.1 paf 123: escape(
1.13 ! paf 124: to_char('\t', ' ');
! 125: to_char('\n', ' ');
! 126: _default;
1.1 paf 127: );
128: break;
1.11 paf 129: case UL_SQL:
1.1 paf 130: // tainted, untaint language: sql
131: // TODO: зависимость от sql сервера
1.13 ! paf 132: memset(dest, '?', row->item.size);
! 133: dest+=row->item.size;
1.1 paf 134: break;
1.11 paf 135: case UL_JS:
1.1 paf 136: escape(
1.13 ! paf 137: to_string('"', "\\\"", 2);
! 138: to_string('\'', "\\'", 2);
! 139: to_string('\n', "\\n", 2);
! 140: to_string('\r', "\\r", 2);
! 141: to_string('\\', "\\\\", 2);
! 142: to_string('\xFF', "\\\xFF", 2);
! 143: _default;
1.1 paf 144: );
145: break;
1.11 paf 146: case UL_HTML:
1.1 paf 147: escape(
1.13 ! paf 148: to_string('&', "&", 5); // BEFORE consequent relpaces yelding '&'
! 149: to_string('>', ">", 4);
! 150: to_string('<', "<",4);
! 151: to_string('"', """,6);
! 152: to_char('\t', ' ');
! 153: //TODO: XSLT to_string('\'', "'", 6)
! 154: _default;
1.1 paf 155: );
156: break;
1.13 ! paf 157: case UL_HTML_TYPO: {
1.1 paf 158: // tainted, untaint language: html-typo
1.13 ! paf 159: char *html=(char *)malloc(size()*6/*""" the longest possible*/+1);
! 160: size_t html_size;
! 161: { // local dest
! 162: char *dest=html;
! 163: escape(
! 164: to_string('&', "&", 5); // BEFORE consequent relpaces yelding '&'
! 165: to_string('>', ">", 4);
! 166: to_string('<', "<",4);
! 167: to_string('"', """,6);
! 168: to_char('\t', ' ');
! 169: _default;
! 170: );
! 171: *dest=0;
! 172: html_size=dest-html;
! 173: }
! 174: // typo table replacements
! 175: if(typo_table) {
! 176: const char *src=html;
! 177: do {
! 178: // there is a row where first column starts 'src'
! 179: if(Table::Item *item=typo_table->first_that(typo_present, src)) {
! 180: // get a=>b values
! 181: const String& a=*static_cast<Array *>(item)->get_string(0);
! 182: const String& b=*static_cast<Array *>(item)->get_string(1);
! 183: // empty 'a' check
! 184: if(a.size()==0) {
! 185: pool().set_tag(0); // avoid recursion
! 186: THROW(0, 0,
! 187: &a,
! 188: "typo table first column elements must not be empty");
! 189: }
! 190: // overflow check:
! 191: // b allowed to be max UNTAINT_TIMES_BIGGER then a
! 192: if(b.size()>UNTAINT_TIMES_BIGGER*a.size()) {
! 193: pool().set_tag(0); // avoid recursion
! 194: THROW(0, 0,
! 195: &b,
! 196: "is %g times longer then '%s', "
! 197: "while maximum, handled by Parser, is %d",
! 198: ((double)b.size())/a.size(),
! 199: a.cstr(),
! 200: UNTAINT_TIMES_BIGGER);
! 201: }
! 202:
! 203: // skip 'a' in 'src'
! 204: src+=a.size();
! 205: // write 'b' to 'dest'
! 206: b.store_to(dest);
! 207: dest+=b.size();
! 208: } else
! 209: *dest++=*src++;
! 210: } while(*src);
! 211: } else {
! 212: memcpy(dest, html, html_size);
! 213: dest+=html_size;
! 214: }
1.1 paf 215: break;
1.13 ! paf 216: }
1.1 paf 217: default:
218: THROW(0,0,
219: this,
220: "unknown untaint language #%d of %d piece",
221: static_cast<int>(row->item.lang),
222: i);
223: }
224: row++;
225: }
226: chunk=row->link;
227: } while(chunk);
228: break2:
1.13 ! paf 229: return dest;
1.1 paf 230: }