Annotation of parser3/src/include/pa_charset.h, revision 1.36
1.1 paf 1: /** @file
2: Parser: Charset connection decl.
3:
1.31 paf 4: Copyright (c) 2001-2005 ArtLebedev Group (http://www.artlebedev.com)
1.14 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.1 paf 6: */
7:
8: #ifndef PA_CHARSET_H
9: #define PA_CHARSET_H
1.17 paf 10:
1.36 ! misha 11: static const char * const IDENT_CHARSET_H="$Date: 2008-06-03 17:06:51 $";
1.23 paf 12:
1.1 paf 13:
14: #include "pa_exception.h"
15: #include "pa_common.h"
1.23 paf 16: #include "pa_hash.h"
17: #include "pa_array.h"
1.1 paf 18:
19: // hiding into namespace so to avoid stupid conflict
20: namespace PCRE {
21: #include "pcre.h"
22: # include "internal.h"
23: };
24:
1.3 paf 25: #ifdef XML
26: #include "libxml/encoding.h"
27: #endif
28:
1.1 paf 29: // defines
30:
1.23 paf 31: #define MAX_CHARSETS 10
32:
33: #define MAX_CHARSET_UNI_CODES 500
34:
1.3 paf 35: # ifndef XMLCh
1.10 paf 36: typedef unsigned int XMLCh;
1.3 paf 37: # endif
38: # ifndef XMLByte
1.11 paf 39: typedef unsigned char XMLByte;
1.3 paf 40: # endif
41:
1.23 paf 42: // helpers
43:
1.24 paf 44: typedef Hash<const String::Body, String::Body> HashStringString;
1.23 paf 45:
1.1 paf 46: /** charset holds name & transcode tables
1.23 paf 47: registers libxml transcoders
1.1 paf 48: */
1.23 paf 49: class Charset: public PA_Object {
1.1 paf 50: public:
51:
1.24 paf 52: Charset(Request_charsets* charsets, const String::Body ANAME, const String* afile_spec);
1.1 paf 53:
1.24 paf 54: const String::Body NAME() const { return FNAME; }
1.30 paf 55: const char* NAME_CSTR() const { return FNAME_CSTR; }
1.1 paf 56:
57: bool isUTF8() const { return fisUTF8; }
58:
1.23 paf 59: static String::C transcode(const String::C src,
1.36 ! misha 60: const Charset& source_charset,
! 61: const Charset& dest_charset
1.1 paf 62: );
1.21 paf 63:
1.23 paf 64: static String& transcode(const String& src,
1.21 paf 65: const Charset& source_transcoder,
1.23 paf 66: const Charset& dest_transcoder);
1.21 paf 67:
1.24 paf 68: static String::Body transcode(const String::Body src,
1.21 paf 69: const Charset& source_transcoder,
1.23 paf 70: const Charset& dest_transcoder);
1.21 paf 71:
1.23 paf 72: static void transcode(ArrayString& src,
1.21 paf 73: const Charset& source_transcoder,
1.23 paf 74: const Charset& dest_transcoder);
75:
76: static void transcode(HashStringString& src,
77: const Charset& source_transcoder,
78: const Charset& dest_transcoder);
1.1 paf 79:
1.36 ! misha 80: static String::C escape(const String::C src,
! 81: const Charset& source_charset
! 82: );
! 83:
1.35 misha 84: void store_Char(XMLByte*& outPtr, XMLCh src, XMLByte not_found);
1.33 misha 85:
1.9 paf 86: #ifdef XML
1.24 paf 87: xmlCharEncodingHandler& transcoder(const String::Body NAME);
1.9 paf 88: #endif
89:
1.1 paf 90: public:
91:
92: unsigned char pcre_tables[tables_length];
93:
94: private:
95:
1.23 paf 96: void load_definition(Request_charsets& charsets, const String& afile_spec);
1.1 paf 97: void sort_ToTable();
98:
1.23 paf 99: const String::C transcodeToUTF8(const String::C src) const;
100: const String::C transcodeFromUTF8(const String::C src) const;
101:
102: const String::C transcodeToCharset(const String::C src,
103: const Charset& dest_transcoder) const;
1.1 paf 104:
1.4 paf 105: public:
106:
107: struct Tables {
1.25 paf 108: struct Rec {
109: XMLCh intCh;
110: XMLByte extCh;
111: };
112:
1.4 paf 113: XMLCh fromTable[0x100];
1.25 paf 114: Rec toTable[MAX_CHARSET_UNI_CODES];
1.4 paf 115: uint toTableSize;
116: };
117:
1.25 paf 118: struct UTF8CaseTable {
119: struct Rec {
120: XMLCh from, to;
121: };
122:
123: uint size;
124: Rec* records;
125: };
126:
1.1 paf 127: private:
128:
1.24 paf 129: const String::Body FNAME;
1.23 paf 130: char* FNAME_CSTR;
1.1 paf 131: bool fisUTF8;
1.4 paf 132: Tables tables;
1.1 paf 133:
134: #ifdef XML
135:
136: private:
1.23 paf 137: void addEncoding(char* name_cstr);
1.24 paf 138: void initTranscoder(const String::Body name, const char* name_cstr);
1.9 paf 139:
1.1 paf 140: public:
1.23 paf 141: /// converts xmlChar* null-terminated string to char*
1.32 paf 142: String::C transcode_cstr(const xmlChar* s);
1.8 paf 143: /// converts xmlChar* null-terminated string to parser String
1.32 paf 144: const String& transcode(const xmlChar* s);
1.23 paf 145:
146: /** converts sized char* to xmlChar*
147: @returns xmlChar* WHICH CALLER SHOULD FREE
1.16 paf 148: */
1.23 paf 149: xmlChar* transcode_buf2xchar(const char* buf, size_t buf_size);
1.32 paf 150: /// converts parser String to xmlChar*
151: xmlChar* transcode(const String& s);
152: /// converts parser String::Body to xmlChar*
153: xmlChar* transcode(const String::Body s);
1.1 paf 154:
155: private:
156:
1.23 paf 157: xmlCharEncodingHandler* ftranscoder;
1.1 paf 158:
159: #endif
160:
1.5 paf 161: };
1.25 paf 162:
163:
164: // externs
165:
166: extern Charset::UTF8CaseTable UTF8CaseToUpper;
167: extern Charset::UTF8CaseTable UTF8CaseToLower;
1.28 paf 168: void change_case_UTF8(const XMLByte* srcData, size_t srcLen,
169: XMLByte* toFill, size_t toFillLen,
1.25 paf 170: const Charset::UTF8CaseTable& table);
171:
1.1 paf 172: #endif
E-mail: