--- parser3/src/include/pa_string.h	2016/09/07 14:40:07	1.217
+++ parser3/src/include/pa_string.h	2026/01/06 13:27:59	1.239
@@ -1,22 +1,23 @@
 /** @file
 	Parser: string class decl.
 
-	Copyright (c) 2001-2015 Art. Lebedev Studio (http://www.artlebedev.com)
-	Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
+	Copyright (c) 2001-2024 Art. Lebedev Studio (http://www.artlebedev.com)
+	Authors: Konstantin Morshnev <moko@design.ru>, Alexandr Petrosian <paf@design.ru>
 */
 
 #ifndef PA_STRING_H
 #define PA_STRING_H
 
-#define IDENT_PA_STRING_H "$Id: pa_string.h,v 1.217 2016/09/07 14:40:07 moko Exp $"
+#define IDENT_PA_STRING_H "$Id: pa_string.h,v 1.239 2026/01/06 13:27:59 moko Exp $"
 
 // includes
 #include "pa_types.h"
+#include "pa_int.h"
 #include "pa_array.h"
 
 extern "C" { // cord's author forgot to do that
 #define CORD_NO_IO
-#include "cord.h"
+#include "../lib/cord/include/cord.h"
 
 #ifdef CORD_CAT_OPTIMIZATION
 #define CORD_cat(x, y) CORD_cat_optimized(x, y)
@@ -47,15 +48,29 @@ class SQL_Connection;
 class Dictionary;
 class Request_charsets;
 class String;
-typedef Array<const String*> ArrayString;
 class VRegex;
 
-// generally useful
+#ifdef NDEBUG
+typedef Array<const String*> ArrayString;
+#else
+class ArrayString : public Array<const String*> {
+public:
+	inline ArrayString(size_t initial=0) : Array(initial){
+	}
+	inline Array& operator+=(element_type src) {
+		assert(src != NULL);
+		return Array::operator+=(src);
+	}
+
+	inline element_type get(size_t index) const {
+		element_type result=Array::get(index);
+		assert(result != NULL);
+		return result;
+	}
+};
+#endif
 
-int pa_atoi(const char* str, const String* problem_source=0);
-double pa_atod(const char* str, const String* problem_source=0);
-unsigned int pa_atoui(const char *str, int base, const String* problem_source=0);
-unsigned long long int pa_atoul(const char *str, int base, const String* problem_source=0);
+// generally useful
 
 /// this is result of pos functions which mean that substr were not found
 #define STRING_NOT_FOUND ((size_t)-1)
@@ -73,6 +88,7 @@ unsigned long long int pa_atoul(const ch
 
 
 class String: public PA_Object {
+	friend class StringSplitHelper;
 public:
 
 	/** piece is tainted or not. the lang to use when detaint
@@ -89,29 +105,30 @@ public:
 
 		WARNING WARNING WARNING WARNING WARNING WARNING 
 	*/
+
+#if _MSC_VER >= 1900
+	/// required for VS2015+ to make sizeof(Languages::opt) == sizeof(CORD), will be 16 byte under x64 without it
+	enum Language : size_t {
+#else
 	enum Language {
-		L_UNSPECIFIED=0, ///< no real string has parts of this lange: it's just convinient to check when string's empty
+#endif
+		L_UNSPECIFIED=0,	///< no real string has parts of this lange: it's just convinient to check when string's empty
 		// these two must go before others, there are checks for >L_AS_IS
-		L_CLEAN='0',	///< clean  WARNING: read above warning before changing
-		L_AS_IS='A',	///< leave all characters intact  WARNING: read above warning before changing
+		L_CLEAN='0',		///< clean  WARNING: read above warning before changing
+		L_AS_IS='A',		///< leave all characters intact  WARNING: read above warning before changing
 
-		L_PASS_APPENDED='P',
-			/**<
-				leave lang built into string being appended.
-				just a flag, that value not stored
-			*/
-		L_TAINTED='T',	///< tainted, untaint lang as assigned later 
+		L_TAINTED='T',		///< tainted, untaint lang as assigned later
 		// untaint langs. assigned by ^untaint[lang]{...}
 		L_FILE_SPEC='F',	///< file specification
 		L_HTTP_HEADER='h',	///< text in HTTP response header
 		L_MAIL_HEADER='m',	///< text in mail header
-		L_URI='U',			///< text in uri
-		L_SQL='Q',			///< ^table:sql body
-		L_JS='J',			///< JavaScript code
-		L_XML='X',			///< ^xdoc:create xml
-		L_HTML='H',			///< HTML code
+		L_URI='U',		///< text in uri
+		L_SQL='Q',		///< ^table:sql body
+		L_JS='J',		///< JavaScript code
+		L_XML='X',		///< ^xdoc:create xml
+		L_HTML='H',		///< HTML code
 		L_REGEX='R',		///< RegExp
-		L_JSON='S',			///< JSON code
+		L_JSON='S',		///< JSON code
 		L_HTTP_COOKIE='C',	///< cookies encoded as %uXXXX for compartibility with js functions encode/decode
 		L_PARSER_CODE='p',	///< ^process body
 		// READ WARNING ABOVE BEFORE ADDING ANYTHING
@@ -175,7 +192,7 @@ public:
 
 	public:
 
-		const char* v() const;
+		const char* visualize() const;
 		void dump() const;
 
 		Languages(): langs(0) {}
@@ -316,6 +333,7 @@ public:
 		size_t length;
 		C(): str(0), length(0) {}
 		C(const char *astr, size_t asize): str(astr), length(asize) {}
+		explicit C(Body abody): str(abody.cstr()), length(abody.length()) {}
 	};
 
 	struct Cm {
@@ -323,6 +341,7 @@ public:
 		size_t length;
 		Cm(): str(0), length(0) {}
 		Cm(char *astr, size_t asize): str(astr), length(asize) {}
+		explicit Cm(Body abody): str(abody.cstrm()), length(abody.length()) {}
 	};
 
 	class Body {
@@ -352,12 +371,11 @@ public:
 
 	public:
 
-		const char* v() const;
 		void dump() const;
 
 		Body(): body(CORD_EMPTY) INIT_HASH_CODE(0) INIT_LENGTH(0) {}
-		Body(const char *abody): body(AS_CORD(abody)) INIT_HASH_CODE(0) INIT_LENGTH(0) {}
-		Body(CORD abody, uint ahash_code): body(abody) INIT_HASH_CODE(ahash_code) INIT_LENGTH(0) {}
+		explicit Body(const char *abody): body(AS_CORD(abody)) INIT_HASH_CODE(0) INIT_LENGTH(0) {}
+		explicit Body(CORD abody, uint ahash_code): body(abody) INIT_HASH_CODE(ahash_code) INIT_LENGTH(0) {}
 		explicit Body(C ac): body(AS_CORD(ac.str)) INIT_HASH_CODE(0) INIT_LENGTH(ac.length) {}
 		explicit Body(CORD abody): body(abody) INIT_HASH_CODE(0) INIT_LENGTH(0) {
 #ifdef CORD_CAT_OPTIMIZATION
@@ -378,8 +396,7 @@ public:
 #endif
 		}
 
-
-		static Body Format(int value);
+		static Body uitoa(size_t aindex);
 
 		void clear() { ZERO_LENGTH ZERO_HASH_CODE body=CORD_EMPTY; }
 
@@ -388,6 +405,7 @@ public:
 		inline CORD get_cord() const { return body; }
 		uint get_hash_code() const;
 
+		// never null
 		const char* cstr() const {
 #ifdef STRING_LENGTH_CACHING
 			string_length = length();
@@ -400,6 +418,7 @@ public:
 			return CORD_to_const_char_star(body, length());
 		}
 
+		// never null
 		char* cstrm() const { return CORD_to_char_star(body, length()); }
 
 #ifdef STRING_LENGTH_CACHING
@@ -486,12 +505,11 @@ public:
 			size_t* out_start=0, size_t* out_length=0, Charset* source_charset=0) const;
 	};
 
-protected:
+private:
 
 	Body body; ///< all characters of string
 	Languages langs; ///< string characters lang
 
-	const char* v() const;
 	void dump() const;
 	#define ASSERT_STRING_INVARIANT(string) \
 		assert((string).langs.invariant((string).body.length()))
@@ -565,6 +583,8 @@ public:
 		return langs.opt.lang;
 	}
 
+	char* visualize_langs() const;
+
 	/// puts pieces to buf
 	Cm serialize(size_t prolog_size) const;
 	/// appends pieces from buf to self
@@ -587,6 +607,7 @@ public:
 		return Body(x).ncmp(0/*x_begin*/, body, 0/*y_begin*/, length())==0;
 	}
 
+	String& append_to(String& dest) const;
 	String& append_to(String& dest, Language lang, bool forced=false) const;
 	String& append(const String& src, Language lang, bool forced=false) { 
 		return src.append_to(*this, lang, forced);
@@ -599,7 +620,7 @@ public:
 		return *this;
 	}
 
-	String& operator << (const String& src) { return append(src, L_PASS_APPENDED); }
+	String& operator << (const String& src) { return src.append_to(*this); }
 	String& operator << (const char* src) { return append_help_length(src, 0, L_AS_IS); }
 	String& operator << (const Body& src){
 		langs.appendHelper(body, L_AS_IS, src);
@@ -626,17 +647,14 @@ public:
 	String& mid(size_t substr_begin, size_t substr_end) const;
 	String& mid(Charset& charset, size_t from, size_t to, size_t helper_length=0) const;
 
-	/** 
-		ignore lang if it's L_UNSPECIFIED
-		but when specified: look for substring that lies in ONE fragment in THAT lang
-		@return position of substr in string, -1 means "not found" [const char* version]
-	*/
-	size_t pos(const Body substr, size_t this_offset=0, Language lang=L_UNSPECIFIED) const;
-	/// String version of @see pos(const char*, int, Language)
-	size_t pos(const String& substr, size_t this_offset=0, Language lang=L_UNSPECIFIED) const;
-	size_t pos(char c, size_t this_offset=0) const {
-		return body.pos(c, this_offset);
-	}
+	/// return position of substr in string, -1 means "not found" [const char* version]
+	size_t pos(const char* substr, size_t this_offset=0) const { return body.pos(substr, this_offset); }
+	size_t pos(const Body substr, size_t this_offset=0) const { return body.pos(substr, this_offset); }
+	size_t pos(const String& substr, size_t this_offset=0) const { return body.pos(substr.body, this_offset); }
+	size_t pos(char c, size_t this_offset=0) const { return body.pos(c, this_offset); }
+	/// ignore lang if it's L_UNSPECIFIED, otherwise look for substring that lies in ONE fragment in THAT lang
+	size_t pos(const Body substr, size_t this_offset, Language lang) const;
+	size_t pos(const String& substr, size_t this_offset, Language lang) const;
 	size_t pos(Charset& charset, const String& substr, size_t this_offset=0, Language lang=L_UNSPECIFIED) const;
 
 	size_t strrpbrk(const char* chars, size_t left=0) const {
@@ -653,8 +671,8 @@ public:
 		return body.rskipchars(chars, left, right);
 	}
 
-	void split(ArrayString& result, size_t& pos_after, const char* delim, Language lang=L_UNSPECIFIED, int limit=-1) const;
-	void split(ArrayString& result, size_t& pos_after, const String& delim, Language lang=L_UNSPECIFIED, int limit=-1) const;
+	void split(ArrayString& result, size_t pos_after, const char* delim, Language lang=L_UNSPECIFIED) const;
+	void split(ArrayString& result, size_t pos_after, const String& delim, Language lang=L_UNSPECIFIED) const;
 
 	typedef void (*Row_action)(Table& table, ArrayString* row, int prestart, int prefinish, int poststart, int postfinish, void *info);
 
@@ -676,7 +694,7 @@ public:
 	const String& replace(const Dictionary& dict) const;
 	const String& trim(Trim_kind kind=TRIM_BOTH, const char* chars=0, Charset* source_charset=0) const;
 	double as_double() const { return pa_atod(cstr(), this); }
-	int as_int() const { return pa_atoi(cstr(), this); }
+	int as_int() const { return pa_atoi(cstr(), 0, this); }
 	bool as_bool() const { return as_int()!=0; }
 	const String& escape(Charset& source_charset) const;