Annotation of parser3/src/include/pa_charset.h, revision 1.52
1.1 paf 1: /** @file
2: Parser: Charset connection decl.
3:
1.48 moko 4: Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com)
1.14 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.1 paf 6: */
7:
8: #ifndef PA_CHARSET_H
9: #define PA_CHARSET_H
1.17 paf 10:
1.52 ! moko 11: #define IDENT_PA_CHARSET_H "$Id: pa_charset.h,v 1.51 2012/06/18 20:45:02 moko Exp $"
1.23 paf 12:
1.1 paf 13:
14: #include "pa_exception.h"
15: #include "pa_common.h"
1.23 paf 16: #include "pa_hash.h"
17: #include "pa_array.h"
1.1 paf 18:
19: #include "pcre.h"
1.51 moko 20: // we are using some pcre_internal.h stuff as well
21: #include "../lib/pcre/pa_pcre_internal.h"
1.1 paf 22:
1.3 paf 23: #ifdef XML
24: #include "libxml/encoding.h"
25: #endif
26:
1.1 paf 27: // defines
28:
1.23 paf 29: #define MAX_CHARSETS 10
30:
31: #define MAX_CHARSET_UNI_CODES 500
32:
1.41 misha 33: #ifndef XMLCh
34: typedef unsigned int XMLCh;
35: #endif
36: #ifndef XMLByte
37: typedef unsigned char XMLByte;
38: #endif
1.3 paf 39:
1.23 paf 40: // helpers
41:
1.42 misha 42: typedef HashString<String::Body> HashStringString;
1.23 paf 43:
1.1 paf 44: /** charset holds name & transcode tables
1.23 paf 45: registers libxml transcoders
1.1 paf 46: */
1.23 paf 47: class Charset: public PA_Object {
1.1 paf 48: public:
49:
1.24 paf 50: Charset(Request_charsets* charsets, const String::Body ANAME, const String* afile_spec);
1.1 paf 51:
1.24 paf 52: const String::Body NAME() const { return FNAME; }
1.30 paf 53: const char* NAME_CSTR() const { return FNAME_CSTR; }
1.1 paf 54:
55: bool isUTF8() const { return fisUTF8; }
56:
1.23 paf 57: static String::C transcode(const String::C src,
1.36 misha 58: const Charset& source_charset,
1.38 misha 59: const Charset& dest_charset);
1.21 paf 60:
1.23 paf 61: static String& transcode(const String& src,
1.21 paf 62: const Charset& source_transcoder,
1.23 paf 63: const Charset& dest_transcoder);
1.21 paf 64:
1.24 paf 65: static String::Body transcode(const String::Body src,
1.21 paf 66: const Charset& source_transcoder,
1.23 paf 67: const Charset& dest_transcoder);
1.21 paf 68:
1.23 paf 69: static void transcode(ArrayString& src,
1.21 paf 70: const Charset& source_transcoder,
1.23 paf 71: const Charset& dest_transcoder);
72:
73: static void transcode(HashStringString& src,
74: const Charset& source_transcoder,
75: const Charset& dest_transcoder);
1.1 paf 76:
1.36 misha 77: static String::C escape(const String::C src,
1.37 misha 78: const Charset& source_charset);
1.38 misha 79: static String::Body escape(const String::Body src,
80: const Charset& source_charset);
1.45 misha 81: static String& escape(const String& src,
82: const Charset& source_charset);
1.38 misha 83:
1.45 misha 84: static String::C escape_JSON(const String::C src,
85: const Charset& source_charset);
86: static String::Body escape_JSON(const String::Body src,
87: const Charset& source_charset);
88: static String& escape_JSON(const String& src,
1.38 misha 89: const Charset& source_charset);
90:
1.35 misha 91: void store_Char(XMLByte*& outPtr, XMLCh src, XMLByte not_found);
1.33 misha 92:
1.9 paf 93: #ifdef XML
1.24 paf 94: xmlCharEncodingHandler& transcoder(const String::Body NAME);
1.9 paf 95: #endif
96:
1.1 paf 97: public:
98:
99: unsigned char pcre_tables[tables_length];
100:
101: private:
102:
1.23 paf 103: void load_definition(Request_charsets& charsets, const String& afile_spec);
1.1 paf 104: void sort_ToTable();
105:
1.23 paf 106: const String::C transcodeToUTF8(const String::C src) const;
107: const String::C transcodeFromUTF8(const String::C src) const;
108:
109: const String::C transcodeToCharset(const String::C src,
110: const Charset& dest_transcoder) const;
1.1 paf 111:
1.4 paf 112: public:
113:
114: struct Tables {
1.25 paf 115: struct Rec {
116: XMLCh intCh;
117: XMLByte extCh;
118: };
119:
1.4 paf 120: XMLCh fromTable[0x100];
1.25 paf 121: Rec toTable[MAX_CHARSET_UNI_CODES];
1.4 paf 122: uint toTableSize;
123: };
124:
1.25 paf 125: struct UTF8CaseTable {
126: struct Rec {
127: XMLCh from, to;
128: };
129:
130: uint size;
131: Rec* records;
132: };
133:
1.1 paf 134: private:
135:
1.24 paf 136: const String::Body FNAME;
1.23 paf 137: char* FNAME_CSTR;
1.1 paf 138: bool fisUTF8;
1.4 paf 139: Tables tables;
1.1 paf 140:
1.45 misha 141: static size_t calc_escaped_length_UTF8(XMLByte* src, size_t src_length);
1.46 moko 142: static size_t calc_escaped_length(const XMLByte* src, size_t src_length, const Charset::Tables& tables);
1.45 misha 143: static size_t calc_escaped_length(const String::C src, const Charset& source_charset);
144: static size_t escape_UTF8(const XMLByte* src, size_t src_length, XMLByte* dest);
145: static size_t escape(const XMLByte* src, size_t src_length, XMLByte* dest, const Charset::Tables& tables);
146:
147: static size_t calc_JSON_escaped_length_UTF8(XMLByte* src, size_t src_length);
1.46 moko 148: static size_t calc_JSON_escaped_length(const XMLByte* src, size_t src_length, const Charset::Tables& tables);
1.45 misha 149: static size_t calc_JSON_escaped_length(const String::C src, const Charset& source_charset);
150: static size_t escape_JSON_UTF8(const XMLByte* src, size_t src_length, XMLByte* dest);
151: static size_t escape_JSON(const XMLByte* src, size_t src_length, XMLByte* dest, const Charset::Tables& tables);
152:
1.47 misha 153: #ifdef XML
154:
155: private:
156: void addEncoding(char* name_cstr);
157: void initTranscoder(const String::Body name, const char* name_cstr);
158:
1.1 paf 159: public:
1.23 paf 160: /// converts xmlChar* null-terminated string to char*
1.32 paf 161: String::C transcode_cstr(const xmlChar* s);
1.8 paf 162: /// converts xmlChar* null-terminated string to parser String
1.32 paf 163: const String& transcode(const xmlChar* s);
1.23 paf 164:
165: /** converts sized char* to xmlChar*
166: @returns xmlChar* WHICH CALLER SHOULD FREE
1.16 paf 167: */
1.23 paf 168: xmlChar* transcode_buf2xchar(const char* buf, size_t buf_size);
1.32 paf 169: /// converts parser String to xmlChar*
170: xmlChar* transcode(const String& s);
171: /// converts parser String::Body to xmlChar*
172: xmlChar* transcode(const String::Body s);
1.1 paf 173:
174: private:
175:
1.23 paf 176: xmlCharEncodingHandler* ftranscoder;
1.1 paf 177:
178: #endif
179:
1.5 paf 180: };
1.25 paf 181:
182:
183: // externs
184:
185: extern Charset::UTF8CaseTable UTF8CaseToUpper;
186: extern Charset::UTF8CaseTable UTF8CaseToLower;
1.28 paf 187: void change_case_UTF8(const XMLByte* srcData, size_t srcLen,
1.43 misha 188: XMLByte* toFill, size_t toFillLen,
189: const Charset::UTF8CaseTable& table);
1.37 misha 190: size_t getUTF8BytePos(const XMLByte* srcBegin, const XMLByte* srcEnd, size_t charPos/*position in characters*/);
191: size_t getUTF8CharPos(const XMLByte* srcBegin, const XMLByte* srcEnd, size_t bytePos/*position in bytes*/);
192: size_t lengthUTF8(const XMLByte* srcBegin, const XMLByte* srcEnd);
1.44 misha 193: unsigned int lengthUTF8Char(const XMLByte c);
1.25 paf 194:
1.52 ! moko 195: const char *fixUTF8(const char *src);
1.43 misha 196:
197: class UTF8_string_iterator {
198: public:
199: UTF8_string_iterator(const String& astring): fsrcPtr((XMLByte*)astring.cstr()), fsrcEnd(fsrcPtr + astring.length()) {}
200: UTF8_string_iterator(XMLByte* asrcPtr, size_t length): fsrcPtr(asrcPtr), fsrcEnd(fsrcPtr + length) {}
201:
202: bool has_next();
203: XMLCh next() { return fUTF8Char; }
204: XMLByte getFirstByte(){ return ffirstByte; }
205: size_t getCharSize(){ return fcharSize; }
206: private:
207: const XMLByte* fsrcPtr;
208: const XMLByte* fsrcEnd;
209: size_t fcharSize;
210: XMLByte ffirstByte;
211: XMLCh fUTF8Char;
212: };
213:
1.1 paf 214: #endif
E-mail: