Annotation of parser3/src/include/pa_charset.h, revision 1.49
1.1 paf 1: /** @file
2: Parser: Charset connection decl.
3:
1.48 moko 4: Copyright (c) 2001-2012 Art. Lebedev Studio (http://www.artlebedev.com)
1.14 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.1 paf 6: */
7:
8: #ifndef PA_CHARSET_H
9: #define PA_CHARSET_H
1.17 paf 10:
1.49 ! moko 11: #define IDENT_PA_CHARSET_H "$Id: pa_charset.h,v 1.48 2012-03-16 09:24:08 moko Exp $"
1.23 paf 12:
1.1 paf 13:
14: #include "pa_exception.h"
15: #include "pa_common.h"
1.23 paf 16: #include "pa_hash.h"
17: #include "pa_array.h"
1.1 paf 18:
1.40 misha 19: // pcre.h must be included BEFORE pcre_internal.h
1.1 paf 20: #include "pcre.h"
1.40 misha 21: // have to call config.h for using pcre_internal.h as well
22: #include "pcre_internal.h"
1.1 paf 23:
1.3 paf 24: #ifdef XML
25: #include "libxml/encoding.h"
26: #endif
27:
1.1 paf 28: // defines
29:
1.23 paf 30: #define MAX_CHARSETS 10
31:
32: #define MAX_CHARSET_UNI_CODES 500
33:
1.41 misha 34: #ifndef XMLCh
35: typedef unsigned int XMLCh;
36: #endif
37: #ifndef XMLByte
38: typedef unsigned char XMLByte;
39: #endif
1.3 paf 40:
1.23 paf 41: // helpers
42:
1.42 misha 43: typedef HashString<String::Body> HashStringString;
1.23 paf 44:
1.1 paf 45: /** charset holds name & transcode tables
1.23 paf 46: registers libxml transcoders
1.1 paf 47: */
1.23 paf 48: class Charset: public PA_Object {
1.1 paf 49: public:
50:
1.24 paf 51: Charset(Request_charsets* charsets, const String::Body ANAME, const String* afile_spec);
1.1 paf 52:
1.24 paf 53: const String::Body NAME() const { return FNAME; }
1.30 paf 54: const char* NAME_CSTR() const { return FNAME_CSTR; }
1.1 paf 55:
56: bool isUTF8() const { return fisUTF8; }
57:
1.23 paf 58: static String::C transcode(const String::C src,
1.36 misha 59: const Charset& source_charset,
1.38 misha 60: const Charset& dest_charset);
1.21 paf 61:
1.23 paf 62: static String& transcode(const String& src,
1.21 paf 63: const Charset& source_transcoder,
1.23 paf 64: const Charset& dest_transcoder);
1.21 paf 65:
1.24 paf 66: static String::Body transcode(const String::Body src,
1.21 paf 67: const Charset& source_transcoder,
1.23 paf 68: const Charset& dest_transcoder);
1.21 paf 69:
1.23 paf 70: static void transcode(ArrayString& src,
1.21 paf 71: const Charset& source_transcoder,
1.23 paf 72: const Charset& dest_transcoder);
73:
74: static void transcode(HashStringString& src,
75: const Charset& source_transcoder,
76: const Charset& dest_transcoder);
1.1 paf 77:
1.36 misha 78: static String::C escape(const String::C src,
1.37 misha 79: const Charset& source_charset);
1.38 misha 80: static String::Body escape(const String::Body src,
81: const Charset& source_charset);
1.45 misha 82: static String& escape(const String& src,
83: const Charset& source_charset);
1.38 misha 84:
1.45 misha 85: static String::C escape_JSON(const String::C src,
86: const Charset& source_charset);
87: static String::Body escape_JSON(const String::Body src,
88: const Charset& source_charset);
89: static String& escape_JSON(const String& src,
1.38 misha 90: const Charset& source_charset);
91:
1.35 misha 92: void store_Char(XMLByte*& outPtr, XMLCh src, XMLByte not_found);
1.33 misha 93:
1.9 paf 94: #ifdef XML
1.24 paf 95: xmlCharEncodingHandler& transcoder(const String::Body NAME);
1.9 paf 96: #endif
97:
1.1 paf 98: public:
99:
100: unsigned char pcre_tables[tables_length];
101:
102: private:
103:
1.23 paf 104: void load_definition(Request_charsets& charsets, const String& afile_spec);
1.1 paf 105: void sort_ToTable();
106:
1.23 paf 107: const String::C transcodeToUTF8(const String::C src) const;
108: const String::C transcodeFromUTF8(const String::C src) const;
109:
110: const String::C transcodeToCharset(const String::C src,
111: const Charset& dest_transcoder) const;
1.1 paf 112:
1.4 paf 113: public:
114:
115: struct Tables {
1.25 paf 116: struct Rec {
117: XMLCh intCh;
118: XMLByte extCh;
119: };
120:
1.4 paf 121: XMLCh fromTable[0x100];
1.25 paf 122: Rec toTable[MAX_CHARSET_UNI_CODES];
1.4 paf 123: uint toTableSize;
124: };
125:
1.25 paf 126: struct UTF8CaseTable {
127: struct Rec {
128: XMLCh from, to;
129: };
130:
131: uint size;
132: Rec* records;
133: };
134:
1.1 paf 135: private:
136:
1.24 paf 137: const String::Body FNAME;
1.23 paf 138: char* FNAME_CSTR;
1.1 paf 139: bool fisUTF8;
1.4 paf 140: Tables tables;
1.1 paf 141:
1.45 misha 142: static size_t calc_escaped_length_UTF8(XMLByte* src, size_t src_length);
1.46 moko 143: static size_t calc_escaped_length(const XMLByte* src, size_t src_length, const Charset::Tables& tables);
1.45 misha 144: static size_t calc_escaped_length(const String::C src, const Charset& source_charset);
145: static size_t escape_UTF8(const XMLByte* src, size_t src_length, XMLByte* dest);
146: static size_t escape(const XMLByte* src, size_t src_length, XMLByte* dest, const Charset::Tables& tables);
147:
148: static size_t calc_JSON_escaped_length_UTF8(XMLByte* src, size_t src_length);
1.46 moko 149: static size_t calc_JSON_escaped_length(const XMLByte* src, size_t src_length, const Charset::Tables& tables);
1.45 misha 150: static size_t calc_JSON_escaped_length(const String::C src, const Charset& source_charset);
151: static size_t escape_JSON_UTF8(const XMLByte* src, size_t src_length, XMLByte* dest);
152: static size_t escape_JSON(const XMLByte* src, size_t src_length, XMLByte* dest, const Charset::Tables& tables);
153:
1.47 misha 154: #ifdef XML
155:
156: private:
157: void addEncoding(char* name_cstr);
158: void initTranscoder(const String::Body name, const char* name_cstr);
159:
1.1 paf 160: public:
1.23 paf 161: /// converts xmlChar* null-terminated string to char*
1.32 paf 162: String::C transcode_cstr(const xmlChar* s);
1.8 paf 163: /// converts xmlChar* null-terminated string to parser String
1.32 paf 164: const String& transcode(const xmlChar* s);
1.23 paf 165:
166: /** converts sized char* to xmlChar*
167: @returns xmlChar* WHICH CALLER SHOULD FREE
1.16 paf 168: */
1.23 paf 169: xmlChar* transcode_buf2xchar(const char* buf, size_t buf_size);
1.32 paf 170: /// converts parser String to xmlChar*
171: xmlChar* transcode(const String& s);
172: /// converts parser String::Body to xmlChar*
173: xmlChar* transcode(const String::Body s);
1.1 paf 174:
175: private:
176:
1.23 paf 177: xmlCharEncodingHandler* ftranscoder;
1.1 paf 178:
179: #endif
180:
1.5 paf 181: };
1.25 paf 182:
183:
184: // externs
185:
186: extern Charset::UTF8CaseTable UTF8CaseToUpper;
187: extern Charset::UTF8CaseTable UTF8CaseToLower;
1.28 paf 188: void change_case_UTF8(const XMLByte* srcData, size_t srcLen,
1.43 misha 189: XMLByte* toFill, size_t toFillLen,
190: const Charset::UTF8CaseTable& table);
1.37 misha 191: size_t getUTF8BytePos(const XMLByte* srcBegin, const XMLByte* srcEnd, size_t charPos/*position in characters*/);
192: size_t getUTF8CharPos(const XMLByte* srcBegin, const XMLByte* srcEnd, size_t bytePos/*position in bytes*/);
193: size_t lengthUTF8(const XMLByte* srcBegin, const XMLByte* srcEnd);
1.44 misha 194: unsigned int lengthUTF8Char(const XMLByte c);
1.25 paf 195:
1.43 misha 196:
197: class UTF8_string_iterator {
198: public:
199: UTF8_string_iterator(const String& astring): fsrcPtr((XMLByte*)astring.cstr()), fsrcEnd(fsrcPtr + astring.length()) {}
200: UTF8_string_iterator(XMLByte* asrcPtr, size_t length): fsrcPtr(asrcPtr), fsrcEnd(fsrcPtr + length) {}
201:
202: bool has_next();
203: XMLCh next() { return fUTF8Char; }
204: XMLByte getFirstByte(){ return ffirstByte; }
205: size_t getCharSize(){ return fcharSize; }
206: private:
207: const XMLByte* fsrcPtr;
208: const XMLByte* fsrcEnd;
209: size_t fcharSize;
210: XMLByte ffirstByte;
211: XMLCh fUTF8Char;
212: };
213:
1.1 paf 214: #endif
E-mail: