|
|
1.7 paf 1: /** @file
1.8 paf 2: Parser: String class part: untaint mechanizm.
3:
1.13 paf 4: Copyright(c) 2001 ArtLebedev Group(http://www.artlebedev.com)
1.8 paf 5:
1.13 paf 6: Author: Alexander Petrosyan <paf@design.ru>(http://design.ru/paf)
1.1 paf 7:
1.15 paf 8: $Id: untaint.C,v 1.14 2001/03/24 19:30:07 paf Exp $
1.1 paf 9: */
10:
1.12 paf 11: #include "pa_config_includes.h"
1.1 paf 12:
13: #include "pa_pool.h"
14: #include "pa_string.h"
15: #include "pa_hash.h"
16: #include "pa_exception.h"
1.13 paf 17: #include "pa_table.h"
1.1 paf 18:
19: #define escape(cases) \
20: { \
1.13 paf 21: const char *src=row->item.ptr; \
22: for(int size=row->item.size; size--; src++) \
23: switch(*src) { \
1.1 paf 24: cases \
25: } \
26: }
1.13 paf 27: #define to_char(a, c) case a: *dest++=c; break
28: #define _default default: *dest++=*src; break
29: #define to_string(a, b, bsize) \
1.4 paf 30: case a: \
1.13 paf 31: strncpy(dest, b, bsize); \
32: dest+=bsize; \
1.4 paf 33: break
1.13 paf 34: #define encode(need_encode_func, prefix) \
1.5 paf 35: default: \
1.13 paf 36: if(need_encode_func(*src)) { \
1.5 paf 37: static const char *hex="0123456789ABCDEF"; \
1.9 paf 38: char chunk[3]={prefix}; \
1.13 paf 39: chunk[1]=hex[((unsigned char)*src)/0x10]; \
40: chunk[2]=hex[((unsigned char)*src)%0x10]; \
41: strncpy(dest, chunk, 3); dest+=3; \
1.5 paf 42: } else \
1.13 paf 43: *dest++=*src; \
1.5 paf 44: break
1.4 paf 45:
1.9 paf 46: inline bool need_file_encode(unsigned char c){
1.13 paf 47: if((c>='0') &&(c<='9') ||(c>='A') &&(c<='Z') ||(c>='a') &&(c<='z'))
1.9 paf 48: return false;
49:
1.13 paf 50: return !strchr("./", c);
1.9 paf 51: }
1.5 paf 52: inline bool need_uri_encode(unsigned char c){
1.13 paf 53: if((c>='0') &&(c<='9') ||(c>='A') &&(c<='Z') ||(c>='a') &&(c<='z'))
1.4 paf 54: return false;
55:
1.5 paf 56: return !strchr("_-./", c);
57: }
58: inline bool need_header_encode(unsigned char c){
59: if(strchr(" ,:", c))
60: return false;
61:
62: return need_uri_encode(c);
1.4 paf 63: }
1.1 paf 64:
65: // String
66:
1.13 paf 67: static bool typo_present(Array::Item *value, const void *info) {
68: Array *row=static_cast<Array *>(value);
69: const char *src=static_cast<const char *>(info);
70:
71: int partial;
72: row->get_string(0)->cmp(src, partial);
1.14 paf 73: return
74: partial==0 || // full match
75: partial==1; // typo left column starts 'src'
1.13 paf 76: }
77:
1.9 paf 78: /// @todo optimize whitespaces for all but 'html'
1.13 paf 79: char *String::store_to(char *dest) const {
80: // $MAIN:html-typo table
81: Table *typo_table=static_cast<Table *>(pool().tag());
1.1 paf 82:
83: const Chunk *chunk=&head;
84: do {
85: const Chunk::Row *row=chunk->rows;
86: for(int i=0; i<chunk->count; i++) {
87: if(row==append_here)
88: goto break2;
89:
90: // WARNING:
91: // string can grow only UNTAINT_TIMES_BIGGER
92: switch(row->item.lang) {
1.11 paf 93: case UL_NO:
1.1 paf 94: // clean piece
1.11 paf 95: case UL_YES:
1.1 paf 96: // tainted piece, but undefined untaint language
97: // for VString.get_double of tainted values
98: // for ^process{body} evaluation
1.11 paf 99: case UL_AS_IS:
1.1 paf 100: // tainted, untaint language: as-is
1.13 paf 101: memcpy(dest, row->item.ptr, row->item.size);
102: dest+=row->item.size;
1.1 paf 103: break;
1.11 paf 104: case UL_FILE_NAME:
1.9 paf 105: // tainted, untaint language: file [name]
106: escape(
1.13 paf 107: to_char(' ', '_');
108: encode(need_file_encode, '-');
1.9 paf 109: );
110: break;
1.11 paf 111: case UL_URI:
1.4 paf 112: // tainted, untaint language: uri
113: escape(
1.13 paf 114: to_char(' ', '+');
115: encode(need_uri_encode, '%');
1.5 paf 116: );
117: break;
1.11 paf 118: case UL_HEADER:
1.5 paf 119: // tainted, untaint language: header
120: escape(
1.13 paf 121: encode(need_header_encode, '%');
1.4 paf 122: );
123: break;
1.11 paf 124: case UL_TABLE:
1.15 paf 125: // tainted, untaint language: table
1.1 paf 126: escape(
1.13 paf 127: to_char('\t', ' ');
128: to_char('\n', ' ');
129: _default;
1.1 paf 130: );
131: break;
1.11 paf 132: case UL_SQL:
1.1 paf 133: // tainted, untaint language: sql
134: // TODO: зависимость от sql сервера
1.13 paf 135: memset(dest, '?', row->item.size);
136: dest+=row->item.size;
1.1 paf 137: break;
1.11 paf 138: case UL_JS:
1.1 paf 139: escape(
1.13 paf 140: to_string('"', "\\\"", 2);
141: to_string('\'', "\\'", 2);
142: to_string('\n', "\\n", 2);
143: to_string('\\', "\\\\", 2);
144: to_string('\xFF', "\\\xFF", 2);
145: _default;
1.1 paf 146: );
147: break;
1.11 paf 148: case UL_HTML:
1.1 paf 149: escape(
1.15 paf 150: to_string('&', "&", 5);
1.13 paf 151: to_string('>', ">", 4);
152: to_string('<', "<",4);
153: to_string('"', """,6);
154: to_char('\t', ' ');
155: //TODO: XSLT to_string('\'', "'", 6)
156: _default;
1.1 paf 157: );
158: break;
1.13 paf 159: case UL_HTML_TYPO: {
1.1 paf 160: // tainted, untaint language: html-typo
1.13 paf 161: char *html=(char *)malloc(size()*6/*""" the longest possible*/+1);
162: size_t html_size;
163: { // local dest
164: char *dest=html;
165: escape(
1.15 paf 166: to_string('&', "&", 5);
1.13 paf 167: to_string('>', ">", 4);
168: to_string('<', "<",4);
169: to_string('"', """,6);
170: to_char('\t', ' ');
1.16 ! paf 171: // convinient name for typo match "\n"
! 172: case '\r':
! 173: *dest++='\\'; *dest++='n'; // \r -> \n
! 174: if(src[1]=='\n') // \r\n -> remove \n
! 175: src++;
! 176: break;
! 177: to_string('\n', "\\n", 2);
1.15 paf 178: //TODO: XSLT to_string('\'', "'", 6)
1.13 paf 179: _default;
180: );
181: *dest=0;
182: html_size=dest-html;
183: }
184: // typo table replacements
185: if(typo_table) {
186: const char *src=html;
187: do {
188: // there is a row where first column starts 'src'
189: if(Table::Item *item=typo_table->first_that(typo_present, src)) {
190: // get a=>b values
191: const String& a=*static_cast<Array *>(item)->get_string(0);
192: const String& b=*static_cast<Array *>(item)->get_string(1);
193: // empty 'a' check
194: if(a.size()==0) {
195: pool().set_tag(0); // avoid recursion
196: THROW(0, 0,
197: &a,
198: "typo table first column elements must not be empty");
199: }
200: // overflow check:
201: // b allowed to be max UNTAINT_TIMES_BIGGER then a
202: if(b.size()>UNTAINT_TIMES_BIGGER*a.size()) {
203: pool().set_tag(0); // avoid recursion
204: THROW(0, 0,
205: &b,
206: "is %g times longer then '%s', "
207: "while maximum, handled by Parser, is %d",
208: ((double)b.size())/a.size(),
209: a.cstr(),
210: UNTAINT_TIMES_BIGGER);
211: }
212:
213: // skip 'a' in 'src'
214: src+=a.size();
215: // write 'b' to 'dest'
216: b.store_to(dest);
217: dest+=b.size();
218: } else
219: *dest++=*src++;
220: } while(*src);
221: } else {
222: memcpy(dest, html, html_size);
223: dest+=html_size;
224: }
1.1 paf 225: break;
1.13 paf 226: }
1.1 paf 227: default:
228: THROW(0,0,
229: this,
230: "unknown untaint language #%d of %d piece",
231: static_cast<int>(row->item.lang),
232: i);
233: }
234: row++;
235: }
236: chunk=row->link;
237: } while(chunk);
238: break2:
1.13 paf 239: return dest;
1.1 paf 240: }