Annotation of parser3/src/main/pa_charset_connection.C, revision 1.11
1.1 parser 1: /** @file
2: Parser: Charset connection implementation.
3:
4: Copyright (c) 2001 ArtLebedev Group (http://www.artlebedev.com)
1.11 ! paf 5: Author: Alexander Petrosyan <paf@design.ru> (http://paf.design.ru)
1.1 parser 6:
1.11 ! paf 7: $Id: pa_charset_connection.C,v 1.10 2001/10/19 14:15:23 parser Exp $
1.1 parser 8: */
9:
10: #include "pa_charset_connection.h"
11: //#include "pa_exception.h"
12: //#include "pa_common.h"
13: //#include "pa_threads.h"
14:
15: #ifdef XML
16: # include <util/XercesDefs.hpp>
17: # include <util/TransENameMap.hpp>
18: # include <util/XML256TableTranscoder.hpp>
19: # include <util/PlatformUtils.hpp>
20: # include <PlatformSupport/XalanTranscodingServices.hpp>
21: #endif
22:
23:
24: // globals
25:
26:
27: // consts
28:
29: #define MAX_CHARSET_UNI_CODES 500
30:
31: //
32:
33: inline void prepare_case_tables(unsigned char *tables) {
34: unsigned char *lcc_table=tables+lcc_offset;
35: unsigned char *fcc_table=tables+fcc_offset;
36: for(int i=0; i<0x100; i++)
37: lcc_table[i]=fcc_table[i]=i;
38: }
39: inline void cstr2ctypes(unsigned char *tables, const unsigned char *cstr,
40: unsigned char bit) {
41: unsigned char *ctypes_table=tables+ctypes_offset;
42: ctypes_table[0]=bit;
43: for(; *cstr; cstr++) {
44: unsigned char c=*cstr;
45: ctypes_table[c]|=bit;
46: }
47: }
48: inline unsigned int to_wchar_code(const char *cstr) {
49: if(!cstr || !*cstr)
50: return 0;
51: if(cstr[1]==0)
52: return (unsigned int)(unsigned char)cstr[0];
53:
54: char *error_pos;
55: return (unsigned int)strtol(cstr, &error_pos, 0);
56: }
57: inline bool to_bool(const char *cstr) {
58: return cstr && *cstr!=0;
59: }
60: static void element2ctypes(unsigned char c, bool belongs,
61: unsigned char *tables, unsigned char bit, int group_offset=-1) {
62: if(!belongs)
63: return;
64:
65: unsigned char *ctypes_table=tables+ctypes_offset;
66:
67: ctypes_table[c]|=bit;
68: if(group_offset>=0)
69: tables[cbits_offset+group_offset+c/8] |= 1 << (c%8);
70: }
71: static void element2case(unsigned char from, unsigned char to,
72: unsigned char *tables) {
73: if(!to)
74: return;
75:
76: unsigned char *lcc_table=tables+lcc_offset;
77: unsigned char *fcc_table=tables+fcc_offset;
78: lcc_table[from]=to;
79: fcc_table[from]=to; fcc_table[to]=from;
80: }
81:
82: #ifdef XML
83:
84: static int sort_cmp_Trans_rec_intCh(const void *a, const void *b) {
1.7 parser 85: return
86: static_cast<const XMLTransService::TransRec *>(a)->intCh-
87: static_cast<const XMLTransService::TransRec *>(b)->intCh;
1.1 parser 88: }
89:
90: template <class TType> class ENameMapFor2 : public ENameMap
91: {
92: public :
93: // -----------------------------------------------------------------------
94: // Constructors and Destructor
95: // -----------------------------------------------------------------------
96: ENameMapFor2(
97: const XMLCh* const encodingName
98: , const XMLCh* const fromTable
99: , const XMLTransService::TransRec* const toTable
100: , const unsigned int toTableSize
101: ) : ENameMap(encodingName),
102: ffromTable(fromTable),
103: ftoTable(toTable),
104: ftoTableSize(toTableSize) {}
105:
106: // -----------------------------------------------------------------------
107: // Implementation of virtual factory method
108: // -----------------------------------------------------------------------
109: virtual XMLTranscoder* makeNew(const unsigned int blockSize) const {
110: return new TType(
111: getKey(),
112: blockSize,
113: ffromTable,
114: ftoTable, ftoTableSize);
115: }
116: private:
117: const XMLCh* const ffromTable;
118: const XMLTransService::TransRec* const ftoTable;
119: const unsigned int ftoTableSize;
120:
121: private :
122: // -----------------------------------------------------------------------
123: // Unimplemented constructors and operators
124: // -----------------------------------------------------------------------
125: ENameMapFor2();
126: ENameMapFor2(const ENameMapFor2<TType>&);
127: void operator=(const ENameMapFor2<TType>&);
128: };
129:
130: class XML256TableTranscoder2 : public XML256TableTranscoder
131: {
132: public :
133: XML256TableTranscoder2(
134: const XMLCh* const encodingName
135: , const unsigned int blockSize
136: , const XMLCh* const fromTable
137: , const XMLTransService::TransRec* const toTable
138: , const unsigned int toTableSize
139: ) : XML256TableTranscoder(encodingName, blockSize, fromTable, toTable, toTableSize) {}
140:
141: private :
142: XML256TableTranscoder2();
143: XML256TableTranscoder2(const XML256TableTranscoder2&);
144: void operator=(const XML256TableTranscoder2&);
145: };
146: #endif
147:
148: void Charset_connection::load(Pool& pool, time_t new_disk_time) {
149: // pcre_tables
150: // lowcase, flipcase, bits digit+word+whitespace, masks
1.9 parser 151:
152: // must not move this inside of prepare_case_tables
153: // don't know the size there
154: memset(fpcre_tables, 0, sizeof(fpcre_tables));
1.1 parser 155: prepare_case_tables(fpcre_tables);
156: cstr2ctypes(fpcre_tables, (const unsigned char *)"*+?{^.$|()[", ctype_meta);
157:
158: #ifdef XML
159: // transcoder
160: XMLCh *fromTable=(XMLCh *)calloc(sizeof(XMLCh)*0x100);
161: XMLTransService::TransRec *toTable=(XMLTransService::TransRec *)calloc(
162: sizeof(XMLTransService::TransRec)*MAX_CHARSET_UNI_CODES);
163: unsigned int toTableSz=0;
1.7 parser 164: // strangly vital
165: toTable[toTableSz].intCh=0;
166: toTable[toTableSz].extCh=(XMLByte)0;
167: toTableSz++;
1.1 parser 168: #endif
169:
170: // loading text
171: char *data=file_read_text(pool, ffile_spec);
172:
173: // ignore header
174: getrow(&data);
175:
176: // parse cells
1.6 parser 177: char *row;
178: while(row=getrow(&data)) {
179: // remove empty&comment lines
180: if(!*row || *row=='#')
1.1 parser 181: continue;
182:
183: // char white-space digit hex-digit letter word lowercase unicode1 unicode2
184: unsigned int c=0;
185: char *cell;
1.6 parser 186: for(int column=0; cell=lsplit(&row, '\t'); column++) {
1.1 parser 187: switch(column) {
188: case 0: c=to_wchar_code(cell); break;
189: // fpcre_tables
190: case 1: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_space, cbit_space); break;
191: case 2: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_digit, cbit_digit); break;
192: case 3: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_xdigit); break;
193: case 4: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_letter); break;
194: case 5: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_word, cbit_word); break;
195: case 6: element2case(c, to_wchar_code(cell), fpcre_tables); break;
196: #ifdef XML
197: case 7:
198: case 8:
199: // transcoder
200: if(toTableSz>MAX_CHARSET_UNI_CODES)
1.10 parser 201: throw Exception(0, 0,
1.1 parser 202: &ffile_spec,
203: "charset must contain not more then %d unicode values", MAX_CHARSET_UNI_CODES);
204:
205: XMLCh unicode=(XMLCh)to_wchar_code(cell);
206: if(!unicode && column==7/*unicode1 column*/)
207: unicode=(XMLCh)c;
208: if(unicode) {
209: if(!fromTable[c])
210: fromTable[c]=unicode;
211: toTable[toTableSz].intCh=unicode;
212: toTable[toTableSz].extCh=(XMLByte)c;
213: toTableSz++;
214: }
215: break;
216: #endif
217: }
218: }
219: };
220:
221: #ifdef XML
222: // sort by the Unicode code point
223: _qsort(toTable, toTableSz, sizeof(*toTable),
224: sort_cmp_Trans_rec_intCh);
1.7 parser 225: //FILE *f=fopen("c:\\temp\\a", "wb");
226: //fwrite(toTable, toTableSz, sizeof(*toTable), f);
227: //fclose(f);
228:
1.1 parser 229:
230: // addEncoding
231: XalanDOMString sencoding(fname.cstr());
232: const XMLCh* const auto_encoding_cstr=sencoding.c_str();
233: int size=sizeof(XMLCh)*(sencoding.size()+1);
234: XMLCh* pool_encoding_cstr=(XMLCh*)malloc(size);
235: memcpy(pool_encoding_cstr, auto_encoding_cstr, size);
236: XMLString::upperCase(pool_encoding_cstr);
237:
238: XMLPlatformUtils::fgTransService->addEncoding(
239: pool_encoding_cstr,
240: new ENameMapFor2<XML256TableTranscoder2>(
241: pool_encoding_cstr
242: , fromTable
243: , toTable
244: , toTableSz
245: ));
246: #endif
247:
248: prev_disk_time=new_disk_time;
249: }
E-mail: