Annotation of parser3/src/include/pa_charset.h, revision 1.61
1.1 paf 1: /** @file
2: Parser: Charset connection decl.
3:
1.59 moko 4: Copyright (c) 2001-2024 Art. Lebedev Studio (http://www.artlebedev.com)
1.57 moko 5: Authors: Konstantin Morshnev <moko@design.ru>, Alexandr Petrosian <paf@design.ru>
1.1 paf 6: */
7:
8: #ifndef PA_CHARSET_H
9: #define PA_CHARSET_H
1.17 paf 10:
1.61 ! moko 11: #define IDENT_PA_CHARSET_H "$Id: pa_charset.h,v 1.60 2024/12/11 21:57:35 moko Exp $"
1.23 paf 12:
1.1 paf 13:
14: #include "pa_exception.h"
15: #include "pa_common.h"
1.23 paf 16: #include "pa_hash.h"
17: #include "pa_array.h"
1.1 paf 18:
1.58 moko 19: #ifdef HAVE_PCRE2
20: #include <pcre2.h>
21: #else
22: #include <pcre.h>
23: #endif
1.51 moko 24: // we are using some pcre_internal.h stuff as well
25: #include "../lib/pcre/pa_pcre_internal.h"
1.1 paf 26:
1.3 paf 27: #ifdef XML
1.60 moko 28: #include "libxml/xmlstring.h"
1.3 paf 29: #include "libxml/encoding.h"
30: #endif
31:
1.1 paf 32: // defines
33:
1.23 paf 34: #define MAX_CHARSETS 10
35:
36: #define MAX_CHARSET_UNI_CODES 500
37:
1.41 misha 38: #ifndef XMLCh
39: typedef unsigned int XMLCh;
40: #endif
41: #ifndef XMLByte
42: typedef unsigned char XMLByte;
43: #endif
1.3 paf 44:
1.23 paf 45: // helpers
46:
1.42 misha 47: typedef HashString<String::Body> HashStringString;
1.23 paf 48:
1.1 paf 49: /** charset holds name & transcode tables
1.23 paf 50: registers libxml transcoders
1.1 paf 51: */
1.23 paf 52: class Charset: public PA_Object {
1.1 paf 53: public:
54:
1.24 paf 55: Charset(Request_charsets* charsets, const String::Body ANAME, const String* afile_spec);
1.1 paf 56:
1.24 paf 57: const String::Body NAME() const { return FNAME; }
1.30 paf 58: const char* NAME_CSTR() const { return FNAME_CSTR; }
1.1 paf 59:
60: bool isUTF8() const { return fisUTF8; }
61:
1.53 moko 62: static String::C transcode(const String::C src, const Charset& source_charset, const Charset& dest_charset);
63: static String::Body transcode(const String::Body src, const Charset& source_transcoder, const Charset& dest_transcoder);
1.61 ! moko 64: static String::Body transcode(const char* src,const Charset& source_transcoder, const Charset& dest_transcoder){ return transcode(String::Body(src), source_transcoder, dest_transcoder); }
1.53 moko 65: static String& transcode(const String& src, const Charset& source_transcoder, const Charset& dest_transcoder);
66: static void transcode(ArrayString& src, const Charset& source_transcoder, const Charset& dest_transcoder);
67: static void transcode(HashStringString& src, const Charset& source_transcoder, const Charset& dest_transcoder);
68:
69: static String::C escape(const String::C src, const Charset& source_charset);
70: static String::Body escape(const String::Body src, const Charset& source_charset);
71: static String& escape(const String& src, const Charset& source_charset);
72:
73: static String::C escape_JSON(const String::C src, const Charset& source_charset);
74: static String::Body escape_JSON(const String::Body src, const Charset& source_charset);
75: static String& escape_JSON(const String& src, const Charset& source_charset);
1.38 misha 76:
1.35 misha 77: void store_Char(XMLByte*& outPtr, XMLCh src, XMLByte not_found);
1.33 misha 78:
1.9 paf 79: #ifdef XML
1.24 paf 80: xmlCharEncodingHandler& transcoder(const String::Body NAME);
1.9 paf 81: #endif
82:
1.1 paf 83: public:
84:
85: unsigned char pcre_tables[tables_length];
86:
87: private:
88:
1.23 paf 89: void load_definition(Request_charsets& charsets, const String& afile_spec);
1.1 paf 90: void sort_ToTable();
91:
1.23 paf 92: const String::C transcodeToUTF8(const String::C src) const;
93: const String::C transcodeFromUTF8(const String::C src) const;
94:
95: const String::C transcodeToCharset(const String::C src,
96: const Charset& dest_transcoder) const;
1.1 paf 97:
1.4 paf 98: public:
99:
100: struct Tables {
1.25 paf 101: struct Rec {
102: XMLCh intCh;
103: XMLByte extCh;
104: };
105:
1.4 paf 106: XMLCh fromTable[0x100];
1.25 paf 107: Rec toTable[MAX_CHARSET_UNI_CODES];
1.4 paf 108: uint toTableSize;
109: };
110:
1.25 paf 111: struct UTF8CaseTable {
112: struct Rec {
113: XMLCh from, to;
114: };
115:
116: uint size;
117: Rec* records;
118: };
119:
1.1 paf 120: private:
121:
1.24 paf 122: const String::Body FNAME;
1.23 paf 123: char* FNAME_CSTR;
1.1 paf 124: bool fisUTF8;
1.4 paf 125: Tables tables;
1.1 paf 126:
1.45 misha 127: static size_t calc_escaped_length_UTF8(XMLByte* src, size_t src_length);
1.46 moko 128: static size_t calc_escaped_length(const XMLByte* src, size_t src_length, const Charset::Tables& tables);
1.45 misha 129: static size_t calc_escaped_length(const String::C src, const Charset& source_charset);
130: static size_t escape_UTF8(const XMLByte* src, size_t src_length, XMLByte* dest);
131: static size_t escape(const XMLByte* src, size_t src_length, XMLByte* dest, const Charset::Tables& tables);
132:
133: static size_t calc_JSON_escaped_length_UTF8(XMLByte* src, size_t src_length);
1.46 moko 134: static size_t calc_JSON_escaped_length(const XMLByte* src, size_t src_length, const Charset::Tables& tables);
1.45 misha 135: static size_t calc_JSON_escaped_length(const String::C src, const Charset& source_charset);
136: static size_t escape_JSON_UTF8(const XMLByte* src, size_t src_length, XMLByte* dest);
137: static size_t escape_JSON(const XMLByte* src, size_t src_length, XMLByte* dest, const Charset::Tables& tables);
138:
1.47 misha 139: #ifdef XML
140:
141: private:
142: void addEncoding(char* name_cstr);
143: void initTranscoder(const String::Body name, const char* name_cstr);
144:
1.1 paf 145: public:
1.23 paf 146: /// converts xmlChar* null-terminated string to char*
1.32 paf 147: String::C transcode_cstr(const xmlChar* s);
1.8 paf 148: /// converts xmlChar* null-terminated string to parser String
1.32 paf 149: const String& transcode(const xmlChar* s);
1.23 paf 150:
151: /** converts sized char* to xmlChar*
152: @returns xmlChar* WHICH CALLER SHOULD FREE
1.16 paf 153: */
1.23 paf 154: xmlChar* transcode_buf2xchar(const char* buf, size_t buf_size);
1.32 paf 155: /// converts parser String to xmlChar*
156: xmlChar* transcode(const String& s);
157: /// converts parser String::Body to xmlChar*
158: xmlChar* transcode(const String::Body s);
1.1 paf 159:
160: private:
161:
1.23 paf 162: xmlCharEncodingHandler* ftranscoder;
1.1 paf 163:
164: #endif
165:
1.5 paf 166: };
1.25 paf 167:
168:
169: // externs
170:
171: extern Charset::UTF8CaseTable UTF8CaseToUpper;
172: extern Charset::UTF8CaseTable UTF8CaseToLower;
1.28 paf 173: void change_case_UTF8(const XMLByte* srcData, size_t srcLen,
1.43 misha 174: XMLByte* toFill, size_t toFillLen,
175: const Charset::UTF8CaseTable& table);
1.37 misha 176: size_t getUTF8BytePos(const XMLByte* srcBegin, const XMLByte* srcEnd, size_t charPos/*position in characters*/);
177: size_t getUTF8CharPos(const XMLByte* srcBegin, const XMLByte* srcEnd, size_t bytePos/*position in bytes*/);
178: size_t lengthUTF8(const XMLByte* srcBegin, const XMLByte* srcEnd);
1.44 misha 179: unsigned int lengthUTF8Char(const XMLByte c);
1.25 paf 180:
1.52 moko 181: const char *fixUTF8(const char *src);
1.43 misha 182:
183: class UTF8_string_iterator {
184: public:
185: UTF8_string_iterator(const String& astring): fsrcPtr((XMLByte*)astring.cstr()), fsrcEnd(fsrcPtr + astring.length()) {}
186: UTF8_string_iterator(XMLByte* asrcPtr, size_t length): fsrcPtr(asrcPtr), fsrcEnd(fsrcPtr + length) {}
187:
188: bool has_next();
189: XMLCh next() { return fUTF8Char; }
190: XMLByte getFirstByte(){ return ffirstByte; }
191: size_t getCharSize(){ return fcharSize; }
192: private:
193: const XMLByte* fsrcPtr;
194: const XMLByte* fsrcEnd;
195: size_t fcharSize;
196: XMLByte ffirstByte;
197: XMLCh fUTF8Char;
198: };
199:
1.1 paf 200: #endif
E-mail: