|
|
1.7 paf 1: /** @file
1.8 paf 2: Parser: String class part: untaint mechanizm.
3:
1.13 paf 4: Copyright(c) 2001 ArtLebedev Group(http://www.artlebedev.com)
1.8 paf 5:
1.13 paf 6: Author: Alexander Petrosyan <paf@design.ru>(http://design.ru/paf)
1.1 paf 7:
1.14 ! paf 8: $Id: untaint.C,v 1.13 2001/03/24 19:12:20 paf Exp $
1.1 paf 9: */
10:
1.12 paf 11: #include "pa_config_includes.h"
1.1 paf 12:
13: #include "pa_pool.h"
14: #include "pa_string.h"
15: #include "pa_hash.h"
16: #include "pa_exception.h"
1.13 paf 17: #include "pa_table.h"
1.1 paf 18:
19: #define escape(cases) \
20: { \
1.13 paf 21: const char *src=row->item.ptr; \
22: for(int size=row->item.size; size--; src++) \
23: switch(*src) { \
1.1 paf 24: cases \
25: } \
26: }
1.13 paf 27: #define to_char(a, c) case a: *dest++=c; break
28: #define _default default: *dest++=*src; break
29: #define to_string(a, b, bsize) \
1.4 paf 30: case a: \
1.13 paf 31: strncpy(dest, b, bsize); \
32: dest+=bsize; \
1.4 paf 33: break
1.13 paf 34: #define encode(need_encode_func, prefix) \
1.5 paf 35: default: \
1.13 paf 36: if(need_encode_func(*src)) { \
1.5 paf 37: static const char *hex="0123456789ABCDEF"; \
1.9 paf 38: char chunk[3]={prefix}; \
1.13 paf 39: chunk[1]=hex[((unsigned char)*src)/0x10]; \
40: chunk[2]=hex[((unsigned char)*src)%0x10]; \
41: strncpy(dest, chunk, 3); dest+=3; \
1.5 paf 42: } else \
1.13 paf 43: *dest++=*src; \
1.5 paf 44: break
1.4 paf 45:
1.9 paf 46: inline bool need_file_encode(unsigned char c){
1.13 paf 47: if((c>='0') &&(c<='9') ||(c>='A') &&(c<='Z') ||(c>='a') &&(c<='z'))
1.9 paf 48: return false;
49:
1.13 paf 50: return !strchr("./", c);
1.9 paf 51: }
1.5 paf 52: inline bool need_uri_encode(unsigned char c){
1.13 paf 53: if((c>='0') &&(c<='9') ||(c>='A') &&(c<='Z') ||(c>='a') &&(c<='z'))
1.4 paf 54: return false;
55:
1.5 paf 56: return !strchr("_-./", c);
57: }
58: inline bool need_header_encode(unsigned char c){
59: if(strchr(" ,:", c))
60: return false;
61:
62: return need_uri_encode(c);
1.4 paf 63: }
1.1 paf 64:
65: // String
66:
1.13 paf 67: static bool typo_present(Array::Item *value, const void *info) {
68: Array *row=static_cast<Array *>(value);
69: const char *src=static_cast<const char *>(info);
70:
71: int partial;
72: row->get_string(0)->cmp(src, partial);
1.14 ! paf 73: return
! 74: partial==0 || // full match
! 75: partial==1; // typo left column starts 'src'
1.13 paf 76: }
77:
1.9 paf 78: /// @todo optimize whitespaces for all but 'html'
1.13 paf 79: char *String::store_to(char *dest) const {
80: // $MAIN:html-typo table
81: Table *typo_table=static_cast<Table *>(pool().tag());
1.1 paf 82:
83: const Chunk *chunk=&head;
84: do {
85: const Chunk::Row *row=chunk->rows;
86: for(int i=0; i<chunk->count; i++) {
87: if(row==append_here)
88: goto break2;
89:
90: // WARNING:
91: // string can grow only UNTAINT_TIMES_BIGGER
92: switch(row->item.lang) {
1.11 paf 93: case UL_NO:
1.1 paf 94: // clean piece
1.11 paf 95: case UL_YES:
1.1 paf 96: // tainted piece, but undefined untaint language
97: // for VString.get_double of tainted values
98: // for ^process{body} evaluation
1.11 paf 99: case UL_AS_IS:
1.1 paf 100: // tainted, untaint language: as-is
1.13 paf 101: memcpy(dest, row->item.ptr, row->item.size);
102: dest+=row->item.size;
1.1 paf 103: break;
1.11 paf 104: case UL_FILE_NAME:
1.9 paf 105: // tainted, untaint language: file [name]
106: escape(
1.13 paf 107: to_char(' ', '_');
108: encode(need_file_encode, '-');
1.9 paf 109: );
110: break;
1.11 paf 111: case UL_URI:
1.4 paf 112: // tainted, untaint language: uri
113: escape(
1.13 paf 114: to_char(' ', '+');
115: encode(need_uri_encode, '%');
1.5 paf 116: );
117: break;
1.11 paf 118: case UL_HEADER:
1.5 paf 119: // tainted, untaint language: header
120: escape(
1.13 paf 121: encode(need_header_encode, '%');
1.4 paf 122: );
123: break;
1.11 paf 124: case UL_TABLE:
1.1 paf 125: escape(
1.13 paf 126: to_char('\t', ' ');
127: to_char('\n', ' ');
128: _default;
1.1 paf 129: );
130: break;
1.11 paf 131: case UL_SQL:
1.1 paf 132: // tainted, untaint language: sql
133: // TODO: зависимость от sql сервера
1.13 paf 134: memset(dest, '?', row->item.size);
135: dest+=row->item.size;
1.1 paf 136: break;
1.11 paf 137: case UL_JS:
1.1 paf 138: escape(
1.13 paf 139: to_string('"', "\\\"", 2);
140: to_string('\'', "\\'", 2);
141: to_string('\n', "\\n", 2);
142: to_string('\r', "\\r", 2);
143: to_string('\\', "\\\\", 2);
144: to_string('\xFF', "\\\xFF", 2);
145: _default;
1.1 paf 146: );
147: break;
1.11 paf 148: case UL_HTML:
1.1 paf 149: escape(
1.13 paf 150: to_string('&', "&", 5); // BEFORE consequent relpaces yelding '&'
151: to_string('>', ">", 4);
152: to_string('<', "<",4);
153: to_string('"', """,6);
154: to_char('\t', ' ');
155: //TODO: XSLT to_string('\'', "'", 6)
156: _default;
1.1 paf 157: );
158: break;
1.13 paf 159: case UL_HTML_TYPO: {
1.1 paf 160: // tainted, untaint language: html-typo
1.13 paf 161: char *html=(char *)malloc(size()*6/*""" the longest possible*/+1);
162: size_t html_size;
163: { // local dest
164: char *dest=html;
165: escape(
166: to_string('&', "&", 5); // BEFORE consequent relpaces yelding '&'
167: to_string('>', ">", 4);
168: to_string('<', "<",4);
169: to_string('"', """,6);
170: to_char('\t', ' ');
171: _default;
172: );
173: *dest=0;
174: html_size=dest-html;
175: }
176: // typo table replacements
177: if(typo_table) {
178: const char *src=html;
179: do {
180: // there is a row where first column starts 'src'
181: if(Table::Item *item=typo_table->first_that(typo_present, src)) {
182: // get a=>b values
183: const String& a=*static_cast<Array *>(item)->get_string(0);
184: const String& b=*static_cast<Array *>(item)->get_string(1);
185: // empty 'a' check
186: if(a.size()==0) {
187: pool().set_tag(0); // avoid recursion
188: THROW(0, 0,
189: &a,
190: "typo table first column elements must not be empty");
191: }
192: // overflow check:
193: // b allowed to be max UNTAINT_TIMES_BIGGER then a
194: if(b.size()>UNTAINT_TIMES_BIGGER*a.size()) {
195: pool().set_tag(0); // avoid recursion
196: THROW(0, 0,
197: &b,
198: "is %g times longer then '%s', "
199: "while maximum, handled by Parser, is %d",
200: ((double)b.size())/a.size(),
201: a.cstr(),
202: UNTAINT_TIMES_BIGGER);
203: }
204:
205: // skip 'a' in 'src'
206: src+=a.size();
207: // write 'b' to 'dest'
208: b.store_to(dest);
209: dest+=b.size();
210: } else
211: *dest++=*src++;
212: } while(*src);
213: } else {
214: memcpy(dest, html, html_size);
215: dest+=html_size;
216: }
1.1 paf 217: break;
1.13 paf 218: }
1.1 paf 219: default:
220: THROW(0,0,
221: this,
222: "unknown untaint language #%d of %d piece",
223: static_cast<int>(row->item.lang),
224: i);
225: }
226: row++;
227: }
228: chunk=row->link;
229: } while(chunk);
230: break2:
1.13 paf 231: return dest;
1.1 paf 232: }