--- parser3/src/include/pa_string.h 2016/09/07 15:03:25 1.218 +++ parser3/src/include/pa_string.h 2024/03/14 03:17:01 1.232 @@ -1,14 +1,14 @@ /** @file Parser: string class decl. - Copyright (c) 2001-2015 Art. Lebedev Studio (http://www.artlebedev.com) - Author: Alexandr Petrosian (http://paf.design.ru) + Copyright (c) 2001-2023 Art. Lebedev Studio (http://www.artlebedev.com) + Authors: Konstantin Morshnev , Alexandr Petrosian */ #ifndef PA_STRING_H #define PA_STRING_H -#define IDENT_PA_STRING_H "$Id: pa_string.h,v 1.218 2016/09/07 15:03:25 moko Exp $" +#define IDENT_PA_STRING_H "$Id: pa_string.h,v 1.232 2024/03/14 03:17:01 moko Exp $" // includes #include "pa_types.h" @@ -16,7 +16,7 @@ extern "C" { // cord's author forgot to do that #define CORD_NO_IO -#include "cord.h" +#include "../lib/cord/include/cord.h" #ifdef CORD_CAT_OPTIMIZATION #define CORD_cat(x, y) CORD_cat_optimized(x, y) @@ -47,15 +47,34 @@ class SQL_Connection; class Dictionary; class Request_charsets; class String; -typedef Array ArrayString; class VRegex; +#ifdef NDEBUG +typedef Array ArrayString; +#else +class ArrayString : public Array { +public: + inline ArrayString(size_t initial=0) : Array(initial){ + } + inline Array& operator+=(element_type src) { + assert(src != NULL); + return Array::operator+=(src); + } + + inline element_type get(size_t index) const { + element_type result=Array::get(index); + assert(result != NULL); + return result; + } +}; +#endif + // generally useful -int pa_atoi(const char* str, const String* problem_source=0); -double pa_atod(const char* str, const String* problem_source=0); -unsigned int pa_atoui(const char *str, int base, const String* problem_source=0); -unsigned long long int pa_atoul(const char *str, int base, const String* problem_source=0); +double pa_atod(const char* str, const String* problem_source); +int pa_atoi(const char* str, int base=10, const String* problem_source=0); +unsigned int pa_atoui(const char *str, int base=10, const String* problem_source=0); +uint64_t pa_atoul(const char *str, int base=10, const String* problem_source=0); /// this is result of pos functions which mean that substr were not found #define STRING_NOT_FOUND ((size_t)-1) @@ -90,29 +109,30 @@ public: WARNING WARNING WARNING WARNING WARNING WARNING */ + +#if _MSC_VER >= 1900 + /// required for VS2015+ to make sizeof(Languages::opt) == sizeof(CORD), will be 16 byte under x64 without it + enum Language : size_t { +#else enum Language { - L_UNSPECIFIED=0, ///< no real string has parts of this lange: it's just convinient to check when string's empty +#endif + L_UNSPECIFIED=0, ///< no real string has parts of this lange: it's just convinient to check when string's empty // these two must go before others, there are checks for >L_AS_IS - L_CLEAN='0', ///< clean WARNING: read above warning before changing - L_AS_IS='A', ///< leave all characters intact WARNING: read above warning before changing + L_CLEAN='0', ///< clean WARNING: read above warning before changing + L_AS_IS='A', ///< leave all characters intact WARNING: read above warning before changing - L_PASS_APPENDED='P', - /**< - leave lang built into string being appended. - just a flag, that value not stored - */ - L_TAINTED='T', ///< tainted, untaint lang as assigned later + L_TAINTED='T', ///< tainted, untaint lang as assigned later // untaint langs. assigned by ^untaint[lang]{...} L_FILE_SPEC='F', ///< file specification L_HTTP_HEADER='h', ///< text in HTTP response header L_MAIL_HEADER='m', ///< text in mail header - L_URI='U', ///< text in uri - L_SQL='Q', ///< ^table:sql body - L_JS='J', ///< JavaScript code - L_XML='X', ///< ^xdoc:create xml - L_HTML='H', ///< HTML code + L_URI='U', ///< text in uri + L_SQL='Q', ///< ^table:sql body + L_JS='J', ///< JavaScript code + L_XML='X', ///< ^xdoc:create xml + L_HTML='H', ///< HTML code L_REGEX='R', ///< RegExp - L_JSON='S', ///< JSON code + L_JSON='S', ///< JSON code L_HTTP_COOKIE='C', ///< cookies encoded as %uXXXX for compartibility with js functions encode/decode L_PARSER_CODE='p', ///< ^process body // READ WARNING ABOVE BEFORE ADDING ANYTHING @@ -176,7 +196,7 @@ public: public: - const char* v() const; + const char* visualize() const; void dump() const; Languages(): langs(0) {} @@ -317,6 +337,7 @@ public: size_t length; C(): str(0), length(0) {} C(const char *astr, size_t asize): str(astr), length(asize) {} + explicit C(Body abody): str(abody.cstr()), length(abody.length()) {} }; struct Cm { @@ -324,6 +345,7 @@ public: size_t length; Cm(): str(0), length(0) {} Cm(char *astr, size_t asize): str(astr), length(asize) {} + explicit Cm(Body abody): str(abody.cstrm()), length(abody.length()) {} }; class Body { @@ -353,7 +375,6 @@ public: public: - const char* v() const; void dump() const; Body(): body(CORD_EMPTY) INIT_HASH_CODE(0) INIT_LENGTH(0) {} @@ -389,6 +410,7 @@ public: inline CORD get_cord() const { return body; } uint get_hash_code() const; + // never null const char* cstr() const { #ifdef STRING_LENGTH_CACHING string_length = length(); @@ -401,6 +423,7 @@ public: return CORD_to_const_char_star(body, length()); } + // never null char* cstrm() const { return CORD_to_char_star(body, length()); } #ifdef STRING_LENGTH_CACHING @@ -492,7 +515,6 @@ private: Body body; ///< all characters of string Languages langs; ///< string characters lang - const char* v() const; void dump() const; #define ASSERT_STRING_INVARIANT(string) \ assert((string).langs.invariant((string).body.length())) @@ -566,6 +588,8 @@ public: return langs.opt.lang; } + char* visualize_langs() const; + /// puts pieces to buf Cm serialize(size_t prolog_size) const; /// appends pieces from buf to self @@ -588,6 +612,7 @@ public: return Body(x).ncmp(0/*x_begin*/, body, 0/*y_begin*/, length())==0; } + String& append_to(String& dest) const; String& append_to(String& dest, Language lang, bool forced=false) const; String& append(const String& src, Language lang, bool forced=false) { return src.append_to(*this, lang, forced); @@ -600,7 +625,7 @@ public: return *this; } - String& operator << (const String& src) { return append(src, L_PASS_APPENDED); } + String& operator << (const String& src) { return src.append_to(*this); } String& operator << (const char* src) { return append_help_length(src, 0, L_AS_IS); } String& operator << (const Body& src){ langs.appendHelper(body, L_AS_IS, src); @@ -654,8 +679,8 @@ public: return body.rskipchars(chars, left, right); } - void split(ArrayString& result, size_t& pos_after, const char* delim, Language lang=L_UNSPECIFIED, int limit=-1) const; - void split(ArrayString& result, size_t& pos_after, const String& delim, Language lang=L_UNSPECIFIED, int limit=-1) const; + void split(ArrayString& result, size_t pos_after, const char* delim, Language lang=L_UNSPECIFIED) const; + void split(ArrayString& result, size_t pos_after, const String& delim, Language lang=L_UNSPECIFIED) const; typedef void (*Row_action)(Table& table, ArrayString* row, int prestart, int prefinish, int poststart, int postfinish, void *info); @@ -677,7 +702,7 @@ public: const String& replace(const Dictionary& dict) const; const String& trim(Trim_kind kind=TRIM_BOTH, const char* chars=0, Charset* source_charset=0) const; double as_double() const { return pa_atod(cstr(), this); } - int as_int() const { return pa_atoi(cstr(), this); } + int as_int() const { return pa_atoi(cstr(), 0, this); } bool as_bool() const { return as_int()!=0; } const String& escape(Charset& source_charset) const;