Annotation of parser3/src/include/pa_charset.h, revision 1.25
1.1 paf 1: /** @file
2: Parser: Charset connection decl.
3:
1.23 paf 4: Copyright (c) 2001-2003 ArtLebedev Group (http://www.artlebedev.com)
1.14 paf 5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
1.1 paf 6: */
7:
8: #ifndef PA_CHARSET_H
9: #define PA_CHARSET_H
1.17 paf 10:
1.25 ! paf 11: static const char* IDENT_CHARSET_H="$Date: 2003/09/25 09:15:02 $";
1.23 paf 12:
1.1 paf 13:
14: #include "pa_exception.h"
15: #include "pa_common.h"
1.23 paf 16: #include "pa_hash.h"
17: #include "pa_array.h"
1.1 paf 18:
19: // hiding into namespace so to avoid stupid conflict
20: namespace PCRE {
21: #include "pcre.h"
22: # include "internal.h"
23: };
24:
1.3 paf 25: #ifdef XML
26: #include "libxml/encoding.h"
27: #endif
28:
1.1 paf 29: // defines
30:
1.23 paf 31: #define MAX_CHARSETS 10
32:
33: #define MAX_CHARSET_UNI_CODES 500
34:
1.3 paf 35: # ifndef XMLCh
1.10 paf 36: typedef unsigned int XMLCh;
1.3 paf 37: # endif
38: # ifndef XMLByte
1.11 paf 39: typedef unsigned char XMLByte;
1.3 paf 40: # endif
41:
1.23 paf 42: // forwards
43:
44: class GdomeDOMString_auto_ptr;
45:
46: // helpers
47:
1.24 paf 48: typedef Hash<const String::Body, String::Body> HashStringString;
1.23 paf 49:
1.1 paf 50: /** charset holds name & transcode tables
1.23 paf 51: registers libxml transcoders
1.1 paf 52: */
1.23 paf 53: class Charset: public PA_Object {
1.1 paf 54: public:
55:
1.24 paf 56: Charset(Request_charsets* charsets, const String::Body ANAME, const String* afile_spec);
1.1 paf 57:
1.24 paf 58: const String::Body NAME() const { return FNAME; }
1.1 paf 59:
60: bool isUTF8() const { return fisUTF8; }
61:
1.23 paf 62: static String::C transcode(const String::C src,
63: const Charset& source_transcoder,
64: const Charset& dest_transcoder
1.1 paf 65: );
1.21 paf 66:
1.23 paf 67: static String& transcode(const String& src,
1.21 paf 68: const Charset& source_transcoder,
1.23 paf 69: const Charset& dest_transcoder);
1.21 paf 70:
1.24 paf 71: static String::Body transcode(const String::Body src,
1.21 paf 72: const Charset& source_transcoder,
1.23 paf 73: const Charset& dest_transcoder);
1.21 paf 74:
1.23 paf 75: static void transcode(ArrayString& src,
1.21 paf 76: const Charset& source_transcoder,
1.23 paf 77: const Charset& dest_transcoder);
78:
79: static void transcode(HashStringString& src,
80: const Charset& source_transcoder,
81: const Charset& dest_transcoder);
1.1 paf 82:
1.9 paf 83: #ifdef XML
1.24 paf 84: xmlCharEncodingHandler& transcoder(const String::Body NAME);
1.9 paf 85: #endif
86:
1.1 paf 87: public:
88:
89: unsigned char pcre_tables[tables_length];
90:
91: private:
92:
1.23 paf 93: void load_definition(Request_charsets& charsets, const String& afile_spec);
1.1 paf 94: void sort_ToTable();
95:
1.23 paf 96: const String::C transcodeToUTF8(const String::C src) const;
97: const String::C transcodeFromUTF8(const String::C src) const;
98:
99: const String::C transcodeToCharset(const String::C src,
100: const Charset& dest_transcoder) const;
1.1 paf 101:
1.4 paf 102: public:
103:
104: struct Tables {
1.25 ! paf 105: struct Rec {
! 106: XMLCh intCh;
! 107: XMLByte extCh;
! 108: };
! 109:
1.4 paf 110: XMLCh fromTable[0x100];
1.25 ! paf 111: Rec toTable[MAX_CHARSET_UNI_CODES];
1.4 paf 112: uint toTableSize;
113: };
114:
1.25 ! paf 115: struct UTF8CaseTable {
! 116: struct Rec {
! 117: XMLCh from, to;
! 118: };
! 119:
! 120: uint size;
! 121: Rec* records;
! 122: };
! 123:
1.1 paf 124: private:
125:
1.24 paf 126: const String::Body FNAME;
1.23 paf 127: char* FNAME_CSTR;
1.1 paf 128: bool fisUTF8;
1.4 paf 129: Tables tables;
1.1 paf 130:
131: #ifdef XML
132:
133: private:
1.23 paf 134: void addEncoding(char* name_cstr);
1.24 paf 135: void initTranscoder(const String::Body name, const char* name_cstr);
1.9 paf 136:
1.1 paf 137: public:
1.23 paf 138: /// converts GdomeDOMString string to char*
139: String::C transcode_cstr(GdomeDOMString* s);
1.3 paf 140: /// converts GdomeDOMString string to parser String
1.23 paf 141: const String& transcode(GdomeDOMString* s);
142: /// converts xmlChar* null-terminated string to char*
143: String::C transcode_cstr(xmlChar* s);
1.8 paf 144: /// converts xmlChar* null-terminated string to parser String
1.23 paf 145: const String& transcode(xmlChar* s);
146:
147: /** converts sized char* to xmlChar*
148: @returns xmlChar* WHICH CALLER SHOULD FREE
1.16 paf 149: */
1.23 paf 150: xmlChar* transcode_buf2xchar(const char* buf, size_t buf_size);
151: /// converts char* to GdomeDOMString
152: GdomeDOMString_auto_ptr transcode_buf2dom(const char* buf, size_t buf_size);
1.3 paf 153: /// converts parser String to GdomeDOMString
1.5 paf 154: GdomeDOMString_auto_ptr transcode(const String& s);
1.24 paf 155: /// converts parser String::Body to GdomeDOMString
156: GdomeDOMString_auto_ptr transcode(const String::Body s);
1.1 paf 157:
158: private:
159:
1.23 paf 160: xmlCharEncodingHandler* ftranscoder;
1.1 paf 161:
162: #endif
163:
1.5 paf 164: };
1.25 ! paf 165:
! 166:
! 167: // externs
! 168:
! 169: extern Charset::UTF8CaseTable UTF8CaseToUpper;
! 170: extern Charset::UTF8CaseTable UTF8CaseToLower;
! 171: void change_case_UTF8(const XMLByte* srcData, XMLByte* toFill,
! 172: const Charset::UTF8CaseTable& table);
! 173:
1.5 paf 174:
1.12 paf 175: #ifdef XML
1.5 paf 176: /// Auto-object used to track GdomeDOMString usage
177: class GdomeDOMString_auto_ptr {
1.23 paf 178: GdomeDOMString* fstring;
1.5 paf 179: public:
1.23 paf 180: /// frees astring afterwards!!!
181: explicit GdomeDOMString_auto_ptr(xmlChar* astring) : fstring(gdome_str_mkref_xml(astring)) {}
182: explicit GdomeDOMString_auto_ptr(GdomeDOMString* astring=0) : fstring(astring) {
1.7 paf 183: // not ref-ing, owning
1.5 paf 184: }
185: ~GdomeDOMString_auto_ptr() {
1.23 paf 186: if(fstring)
187: gdome_str_unref(fstring);
1.5 paf 188: }
1.22 paf 189: /* GdomeDOMString* get() {
190: return fstring;
191: }*/
192: GdomeDOMString* use() {
193: if(fstring)
194: gdome_str_ref(fstring);
1.5 paf 195: return fstring;
196: }
197: GdomeDOMString* operator->() {
198: return fstring;
199: }
1.22 paf 200: /* GdomeDOMString& operator*() {
1.23 paf 201: return* fstring;
1.22 paf 202: }*/
1.5 paf 203:
204: // copying
205: GdomeDOMString_auto_ptr(const GdomeDOMString_auto_ptr& src) : fstring(src.fstring) {
206: gdome_str_ref(fstring);
207: }
208: GdomeDOMString_auto_ptr& operator =(const GdomeDOMString_auto_ptr& src) {
209: if(this == &src)
1.23 paf 210: return* this;
1.5 paf 211:
1.22 paf 212: if(fstring)
213: gdome_str_unref(fstring);
1.5 paf 214: fstring=src.fstring;
1.22 paf 215: if(fstring)
216: gdome_str_ref(fstring);
1.5 paf 217:
1.23 paf 218: return* this;
1.5 paf 219: }
1.1 paf 220: };
1.12 paf 221: #endif
1.1 paf 222:
223: #endif
E-mail: