--- parser3/src/include/pa_string.h 2001/02/13 10:30:22 1.18 +++ parser3/src/include/pa_string.h 2001/03/10 12:12:50 1.28 @@ -1,5 +1,5 @@ /* - $Id: pa_string.h,v 1.18 2001/02/13 10:30:22 paf Exp $ + $Id: pa_string.h,v 1.28 2001/03/10 12:12:50 paf Exp $ */ /* @@ -28,23 +28,38 @@ #include "pa_types.h" #ifndef NO_STRING_ORIGIN -# define STRING_APPEND_PARAMS const char *src, char *file, uint line -# define APPEND(src, file, line) real_append(src, file, line) +# define STRING_APPEND_PARAMS const char *src, size_t size, bool tainted, char *file, uint line +# define APPEND(src, size, file, line) real_append(src, size, false, file, line) +# define APPEND_TAINTED(src, size, file, line) real_append(src, size, true, file, line) #else -# define STRING_APPEND_PARAMS const char *src -# define APPEND(src, file, line) real_append(src) +# define STRING_APPEND_PARAMS const char *src, size_t size, bool tainted +# define APPEND(src, size, file, line) real_append(src, size, false) +# define APPEND_TAINTED(src, size, file, line) real_append(src, size, true) #endif - -class String_iterator; +#define APPEND_CONST(src) APPEND(src, 0, 0, 0) class String : public Pooled { - friend String_iterator; public: enum { CR_PREALLOCATED_COUNT=5, CR_GROW_PERCENT=60 }; + enum Untaint_lang { + NO, // clean + YES, // tainted, untaint language as assigned later + // untaint languages. assigned by ^untaint[lang]{...} + PASS_APPENDED, + // leave language built into string being appended + // just a flag, that value not stored + AS_IS, + SQL, + JS, + TABLE, + HTML, + HTML_TYPO + }; + public: String(Pool& apool); @@ -53,8 +68,20 @@ public: int used_rows() const { return fused_rows; } char *cstr() const; String& real_append(STRING_APPEND_PARAMS); - bool operator == (const String& src) const; - String& append(const String_iterator& begin, const String_iterator& end); + int cmp (const String& src) const; + bool operator < (const String& src) const { return cmp(src)<0; } + bool operator > (const String& src) const { return cmp(src)>0; } + bool operator <= (const String& src) const { return cmp(src)<=0; } + bool operator >= (const String& src) const { return cmp(src)>=0; } + bool operator == (const String& src) const { + if(size()!=src.size()) // can speed up in trivial case + return false; + return cmp(src)==0; + } + bool operator != (const String& src) const { return cmp(src)!=0; } + + bool operator == (char* src) const; + String& append(const String& src, Untaint_lang lang); uint hash_code() const; @@ -66,11 +93,14 @@ private: // the number of rows in chunk int count; union Row { - // chunk item - struct { - const char *ptr; // pointer to the start of string fragment - size_t size; // length of the fragment - Origin origin; // origin of this fragment + // fragment + struct { + const char *ptr; // pointer to the start + size_t size; // length + Untaint_lang lang; // untaint flag, later untaint language +#ifndef NO_STRING_ORIGIN + Origin origin; // origin +#endif } item; Chunk *link; // link to the next chunk in chain } rows[CR_PREALLOCATED_COUNT]; @@ -87,8 +117,8 @@ private: Chunk::Row *link_row; private: - // last chank allocated count - int curr_chunk_rows; + // last chunk + Chunk *last_chunk; // string size size_t fsize; @@ -102,6 +132,7 @@ private: return append_here == link_row; } void expand(); + void set_lang(Chunk::Row *row, Untaint_lang lang, size_t size); private: //disabled @@ -109,49 +140,4 @@ private: //disabled }; - -class Char_types { -public: - Char_types(); - void set(char c, int type) { - types[static_cast(c)]=static_cast(type); - } - int get(char c) { - return static_cast(types[static_cast(c)]); - } -private: - char type[0x100]; -}; - -class String_iterator { -public: - String_iterator(String& astring); - - void operator ++() { skip(); } - void operator ++(int) { skip(); } - - int skip_to(Char_type& types); - bool skip_to(char c); - - bool eof() { return position; } - - // current char - char operator() const; - -protected: - // home string - String& string; - // the row in which we are - Chunk::Row *read_here; - // position in text, eof when 0 - char *position; - // when read_here reaches this row, move to the next chunk - Chunk::Row *link_row; - -protected: - - // advances position by one char - void skip(); -}; - #endif