Annotation of parser3/src/include/pa_charset.h, revision 1.26
1.1 paf 1: /** @file
2: Parser: Charset connection decl.
3:
1.23 paf 4: Copyright (c) 2001-2003 ArtLebedev Group (http://www.artlebedev.com)
1.14 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.1 paf 6: */
7:
8: #ifndef PA_CHARSET_H
9: #define PA_CHARSET_H
1.17 paf 10:
1.26 ! paf 11: static const char* IDENT_CHARSET_H="$Date: 2003/09/29 09:42:12 $";
1.23 paf 12:
1.1 paf 13:
14: #include "pa_exception.h"
15: #include "pa_common.h"
1.23 paf 16: #include "pa_hash.h"
17: #include "pa_array.h"
1.1 paf 18:
19: // hiding into namespace so to avoid stupid conflict
20: namespace PCRE {
21: #include "pcre.h"
22: # include "internal.h"
23: };
24:
1.3 paf 25: #ifdef XML
26: #include "libxml/encoding.h"
1.26 ! paf 27: #include "gdome.h"
1.3 paf 28: #endif
29:
1.1 paf 30: // defines
31:
1.23 paf 32: #define MAX_CHARSETS 10
33:
34: #define MAX_CHARSET_UNI_CODES 500
35:
1.3 paf 36: # ifndef XMLCh
1.10 paf 37: typedef unsigned int XMLCh;
1.3 paf 38: # endif
39: # ifndef XMLByte
1.11 paf 40: typedef unsigned char XMLByte;
1.3 paf 41: # endif
42:
1.23 paf 43: // forwards
44:
45: class GdomeDOMString_auto_ptr;
46:
47: // helpers
48:
1.24 paf 49: typedef Hash<const String::Body, String::Body> HashStringString;
1.23 paf 50:
1.1 paf 51: /** charset holds name & transcode tables
1.23 paf 52: registers libxml transcoders
1.1 paf 53: */
1.23 paf 54: class Charset: public PA_Object {
1.1 paf 55: public:
56:
1.24 paf 57: Charset(Request_charsets* charsets, const String::Body ANAME, const String* afile_spec);
1.1 paf 58:
1.24 paf 59: const String::Body NAME() const { return FNAME; }
1.1 paf 60:
61: bool isUTF8() const { return fisUTF8; }
62:
1.23 paf 63: static String::C transcode(const String::C src,
64: const Charset& source_transcoder,
65: const Charset& dest_transcoder
1.1 paf 66: );
1.21 paf 67:
1.23 paf 68: static String& transcode(const String& src,
1.21 paf 69: const Charset& source_transcoder,
1.23 paf 70: const Charset& dest_transcoder);
1.21 paf 71:
1.24 paf 72: static String::Body transcode(const String::Body src,
1.21 paf 73: const Charset& source_transcoder,
1.23 paf 74: const Charset& dest_transcoder);
1.21 paf 75:
1.23 paf 76: static void transcode(ArrayString& src,
1.21 paf 77: const Charset& source_transcoder,
1.23 paf 78: const Charset& dest_transcoder);
79:
80: static void transcode(HashStringString& src,
81: const Charset& source_transcoder,
82: const Charset& dest_transcoder);
1.1 paf 83:
1.9 paf 84: #ifdef XML
1.24 paf 85: xmlCharEncodingHandler& transcoder(const String::Body NAME);
1.9 paf 86: #endif
87:
1.1 paf 88: public:
89:
90: unsigned char pcre_tables[tables_length];
91:
92: private:
93:
1.23 paf 94: void load_definition(Request_charsets& charsets, const String& afile_spec);
1.1 paf 95: void sort_ToTable();
96:
1.23 paf 97: const String::C transcodeToUTF8(const String::C src) const;
98: const String::C transcodeFromUTF8(const String::C src) const;
99:
100: const String::C transcodeToCharset(const String::C src,
101: const Charset& dest_transcoder) const;
1.1 paf 102:
1.4 paf 103: public:
104:
105: struct Tables {
1.25 paf 106: struct Rec {
107: XMLCh intCh;
108: XMLByte extCh;
109: };
110:
1.4 paf 111: XMLCh fromTable[0x100];
1.25 paf 112: Rec toTable[MAX_CHARSET_UNI_CODES];
1.4 paf 113: uint toTableSize;
114: };
115:
1.25 paf 116: struct UTF8CaseTable {
117: struct Rec {
118: XMLCh from, to;
119: };
120:
121: uint size;
122: Rec* records;
123: };
124:
1.1 paf 125: private:
126:
1.24 paf 127: const String::Body FNAME;
1.23 paf 128: char* FNAME_CSTR;
1.1 paf 129: bool fisUTF8;
1.4 paf 130: Tables tables;
1.1 paf 131:
132: #ifdef XML
133:
134: private:
1.23 paf 135: void addEncoding(char* name_cstr);
1.24 paf 136: void initTranscoder(const String::Body name, const char* name_cstr);
1.9 paf 137:
1.1 paf 138: public:
1.23 paf 139: /// converts GdomeDOMString string to char*
140: String::C transcode_cstr(GdomeDOMString* s);
1.3 paf 141: /// converts GdomeDOMString string to parser String
1.23 paf 142: const String& transcode(GdomeDOMString* s);
143: /// converts xmlChar* null-terminated string to char*
144: String::C transcode_cstr(xmlChar* s);
1.8 paf 145: /// converts xmlChar* null-terminated string to parser String
1.23 paf 146: const String& transcode(xmlChar* s);
147:
148: /** converts sized char* to xmlChar*
149: @returns xmlChar* WHICH CALLER SHOULD FREE
1.16 paf 150: */
1.23 paf 151: xmlChar* transcode_buf2xchar(const char* buf, size_t buf_size);
152: /// converts char* to GdomeDOMString
153: GdomeDOMString_auto_ptr transcode_buf2dom(const char* buf, size_t buf_size);
1.3 paf 154: /// converts parser String to GdomeDOMString
1.5 paf 155: GdomeDOMString_auto_ptr transcode(const String& s);
1.24 paf 156: /// converts parser String::Body to GdomeDOMString
157: GdomeDOMString_auto_ptr transcode(const String::Body s);
1.1 paf 158:
159: private:
160:
1.23 paf 161: xmlCharEncodingHandler* ftranscoder;
1.1 paf 162:
163: #endif
164:
1.5 paf 165: };
1.25 paf 166:
167:
168: // externs
169:
170: extern Charset::UTF8CaseTable UTF8CaseToUpper;
171: extern Charset::UTF8CaseTable UTF8CaseToLower;
172: void change_case_UTF8(const XMLByte* srcData, XMLByte* toFill,
173: const Charset::UTF8CaseTable& table);
174:
1.5 paf 175:
1.12 paf 176: #ifdef XML
1.5 paf 177: /// Auto-object used to track GdomeDOMString usage
178: class GdomeDOMString_auto_ptr {
1.23 paf 179: GdomeDOMString* fstring;
1.5 paf 180: public:
1.23 paf 181: /// frees astring afterwards!!!
182: explicit GdomeDOMString_auto_ptr(xmlChar* astring) : fstring(gdome_str_mkref_xml(astring)) {}
183: explicit GdomeDOMString_auto_ptr(GdomeDOMString* astring=0) : fstring(astring) {
1.7 paf 184: // not ref-ing, owning
1.5 paf 185: }
186: ~GdomeDOMString_auto_ptr() {
1.23 paf 187: if(fstring)
188: gdome_str_unref(fstring);
1.5 paf 189: }
1.22 paf 190: /* GdomeDOMString* get() {
191: return fstring;
192: }*/
193: GdomeDOMString* use() {
194: if(fstring)
195: gdome_str_ref(fstring);
1.5 paf 196: return fstring;
197: }
198: GdomeDOMString* operator->() {
199: return fstring;
200: }
1.22 paf 201: /* GdomeDOMString& operator*() {
1.23 paf 202: return* fstring;
1.22 paf 203: }*/
1.5 paf 204:
205: // copying
206: GdomeDOMString_auto_ptr(const GdomeDOMString_auto_ptr& src) : fstring(src.fstring) {
207: gdome_str_ref(fstring);
208: }
209: GdomeDOMString_auto_ptr& operator =(const GdomeDOMString_auto_ptr& src) {
210: if(this == &src)
1.23 paf 211: return* this;
1.5 paf 212:
1.22 paf 213: if(fstring)
214: gdome_str_unref(fstring);
1.5 paf 215: fstring=src.fstring;
1.22 paf 216: if(fstring)
217: gdome_str_ref(fstring);
1.5 paf 218:
1.23 paf 219: return* this;
1.5 paf 220: }
1.1 paf 221: };
1.12 paf 222: #endif
1.1 paf 223:
224: #endif
E-mail: