Annotation of parser3/src/include/pa_charset.h, revision 1.45
1.1 paf 1: /** @file
2: Parser: Charset connection decl.
3:
1.41 misha 4: Copyright (c) 2001-2009 ArtLebedev Group (http://www.artlebedev.com)
1.14 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.1 paf 6: */
7:
8: #ifndef PA_CHARSET_H
9: #define PA_CHARSET_H
1.17 paf 10:
1.45 ! misha 11: static const char * const IDENT_CHARSET_H="$Date: 2009-11-06 05:01:24 $";
1.23 paf 12:
1.1 paf 13:
14: #include "pa_exception.h"
15: #include "pa_common.h"
1.23 paf 16: #include "pa_hash.h"
17: #include "pa_array.h"
1.1 paf 18:
1.40 misha 19: // pcre.h must be included BEFORE pcre_internal.h
1.1 paf 20: #include "pcre.h"
1.40 misha 21: // have to call config.h for using pcre_internal.h as well
22: #include "../lib/pcre/config.h"
23: #include "pcre_internal.h"
1.1 paf 24:
1.3 paf 25: #ifdef XML
26: #include "libxml/encoding.h"
27: #endif
28:
1.1 paf 29: // defines
30:
1.23 paf 31: #define MAX_CHARSETS 10
32:
33: #define MAX_CHARSET_UNI_CODES 500
34:
1.41 misha 35: #ifndef XMLCh
36: typedef unsigned int XMLCh;
37: #endif
38: #ifndef XMLByte
39: typedef unsigned char XMLByte;
40: #endif
1.3 paf 41:
1.23 paf 42: // helpers
43:
1.42 misha 44: typedef HashString<String::Body> HashStringString;
1.23 paf 45:
1.1 paf 46: /** charset holds name & transcode tables
1.23 paf 47: registers libxml transcoders
1.1 paf 48: */
1.23 paf 49: class Charset: public PA_Object {
1.1 paf 50: public:
51:
1.24 paf 52: Charset(Request_charsets* charsets, const String::Body ANAME, const String* afile_spec);
1.1 paf 53:
1.24 paf 54: const String::Body NAME() const { return FNAME; }
1.30 paf 55: const char* NAME_CSTR() const { return FNAME_CSTR; }
1.1 paf 56:
57: bool isUTF8() const { return fisUTF8; }
58:
1.23 paf 59: static String::C transcode(const String::C src,
1.36 misha 60: const Charset& source_charset,
1.38 misha 61: const Charset& dest_charset);
1.21 paf 62:
1.23 paf 63: static String& transcode(const String& src,
1.21 paf 64: const Charset& source_transcoder,
1.23 paf 65: const Charset& dest_transcoder);
1.21 paf 66:
1.24 paf 67: static String::Body transcode(const String::Body src,
1.21 paf 68: const Charset& source_transcoder,
1.23 paf 69: const Charset& dest_transcoder);
1.21 paf 70:
1.23 paf 71: static void transcode(ArrayString& src,
1.21 paf 72: const Charset& source_transcoder,
1.23 paf 73: const Charset& dest_transcoder);
74:
75: static void transcode(HashStringString& src,
76: const Charset& source_transcoder,
77: const Charset& dest_transcoder);
1.1 paf 78:
1.36 misha 79: static String::C escape(const String::C src,
1.37 misha 80: const Charset& source_charset);
1.38 misha 81: static String::Body escape(const String::Body src,
82: const Charset& source_charset);
1.45 ! misha 83: static String& escape(const String& src,
! 84: const Charset& source_charset);
1.38 misha 85:
1.45 ! misha 86: static String::C escape_JSON(const String::C src,
! 87: const Charset& source_charset);
! 88: static String::Body escape_JSON(const String::Body src,
! 89: const Charset& source_charset);
! 90: static String& escape_JSON(const String& src,
1.38 misha 91: const Charset& source_charset);
92:
1.35 misha 93: void store_Char(XMLByte*& outPtr, XMLCh src, XMLByte not_found);
1.33 misha 94:
1.9 paf 95: #ifdef XML
1.24 paf 96: xmlCharEncodingHandler& transcoder(const String::Body NAME);
1.9 paf 97: #endif
98:
1.1 paf 99: public:
100:
101: unsigned char pcre_tables[tables_length];
102:
103: private:
104:
1.23 paf 105: void load_definition(Request_charsets& charsets, const String& afile_spec);
1.1 paf 106: void sort_ToTable();
107:
1.23 paf 108: const String::C transcodeToUTF8(const String::C src) const;
109: const String::C transcodeFromUTF8(const String::C src) const;
110:
111: const String::C transcodeToCharset(const String::C src,
112: const Charset& dest_transcoder) const;
1.1 paf 113:
1.4 paf 114: public:
115:
116: struct Tables {
1.25 paf 117: struct Rec {
118: XMLCh intCh;
119: XMLByte extCh;
120: };
121:
1.4 paf 122: XMLCh fromTable[0x100];
1.25 paf 123: Rec toTable[MAX_CHARSET_UNI_CODES];
1.4 paf 124: uint toTableSize;
125: };
126:
1.25 paf 127: struct UTF8CaseTable {
128: struct Rec {
129: XMLCh from, to;
130: };
131:
132: uint size;
133: Rec* records;
134: };
135:
1.1 paf 136: private:
137:
1.24 paf 138: const String::Body FNAME;
1.23 paf 139: char* FNAME_CSTR;
1.1 paf 140: bool fisUTF8;
1.4 paf 141: Tables tables;
1.1 paf 142:
143: #ifdef XML
144:
145: private:
1.23 paf 146: void addEncoding(char* name_cstr);
1.24 paf 147: void initTranscoder(const String::Body name, const char* name_cstr);
1.45 ! misha 148:
! 149: static size_t calc_escaped_length_UTF8(XMLByte* src, size_t src_length);
! 150: static size_t calc_escaped_length(XMLByte* src, size_t src_length, const Charset::Tables& tables);
! 151: static size_t calc_escaped_length(const String::C src, const Charset& source_charset);
! 152: static size_t escape_UTF8(const XMLByte* src, size_t src_length, XMLByte* dest);
! 153: static size_t escape(const XMLByte* src, size_t src_length, XMLByte* dest, const Charset::Tables& tables);
! 154:
! 155: static size_t calc_JSON_escaped_length_UTF8(XMLByte* src, size_t src_length);
! 156: static size_t calc_JSON_escaped_length(XMLByte* src, size_t src_length, const Charset::Tables& tables);
! 157: static size_t calc_JSON_escaped_length(const String::C src, const Charset& source_charset);
! 158: static size_t escape_JSON_UTF8(const XMLByte* src, size_t src_length, XMLByte* dest);
! 159: static size_t escape_JSON(const XMLByte* src, size_t src_length, XMLByte* dest, const Charset::Tables& tables);
! 160:
1.1 paf 161: public:
1.23 paf 162: /// converts xmlChar* null-terminated string to char*
1.32 paf 163: String::C transcode_cstr(const xmlChar* s);
1.8 paf 164: /// converts xmlChar* null-terminated string to parser String
1.32 paf 165: const String& transcode(const xmlChar* s);
1.23 paf 166:
167: /** converts sized char* to xmlChar*
168: @returns xmlChar* WHICH CALLER SHOULD FREE
1.16 paf 169: */
1.23 paf 170: xmlChar* transcode_buf2xchar(const char* buf, size_t buf_size);
1.32 paf 171: /// converts parser String to xmlChar*
172: xmlChar* transcode(const String& s);
173: /// converts parser String::Body to xmlChar*
174: xmlChar* transcode(const String::Body s);
1.1 paf 175:
176: private:
177:
1.23 paf 178: xmlCharEncodingHandler* ftranscoder;
1.1 paf 179:
180: #endif
181:
1.5 paf 182: };
1.25 paf 183:
184:
185: // externs
186:
187: extern Charset::UTF8CaseTable UTF8CaseToUpper;
188: extern Charset::UTF8CaseTable UTF8CaseToLower;
1.28 paf 189: void change_case_UTF8(const XMLByte* srcData, size_t srcLen,
1.43 misha 190: XMLByte* toFill, size_t toFillLen,
191: const Charset::UTF8CaseTable& table);
1.37 misha 192: size_t getUTF8BytePos(const XMLByte* srcBegin, const XMLByte* srcEnd, size_t charPos/*position in characters*/);
193: size_t getUTF8CharPos(const XMLByte* srcBegin, const XMLByte* srcEnd, size_t bytePos/*position in bytes*/);
194: size_t lengthUTF8(const XMLByte* srcBegin, const XMLByte* srcEnd);
1.44 misha 195: unsigned int lengthUTF8Char(const XMLByte c);
1.25 paf 196:
1.43 misha 197:
198: class UTF8_string_iterator {
199: public:
200: UTF8_string_iterator(const String& astring): fsrcPtr((XMLByte*)astring.cstr()), fsrcEnd(fsrcPtr + astring.length()) {}
201: UTF8_string_iterator(XMLByte* asrcPtr, size_t length): fsrcPtr(asrcPtr), fsrcEnd(fsrcPtr + length) {}
202:
203: bool has_next();
204: XMLCh next() { return fUTF8Char; }
205: XMLByte getFirstByte(){ return ffirstByte; }
206: size_t getCharSize(){ return fcharSize; }
207: private:
208: const XMLByte* fsrcPtr;
209: const XMLByte* fsrcEnd;
210: size_t fcharSize;
211: XMLByte ffirstByte;
212: XMLCh fUTF8Char;
213: };
214:
1.1 paf 215: #endif
E-mail: