Annotation of parser3/src/main/pa_charset_connection.C, revision 1.6
1.1 parser 1: /** @file
2: Parser: Charset connection implementation.
3:
4: Copyright (c) 2001 ArtLebedev Group (http://www.artlebedev.com)
5: Author: Alexander Petrosyan <paf@design.ru> (http://design.ru/paf)
6:
1.6 ! parser 7: $Id: pa_charset_connection.C,v 1.5 2001/10/02 11:11:47 parser Exp $
1.1 parser 8: */
9:
10: #include "pa_charset_connection.h"
11: //#include "pa_exception.h"
12: //#include "pa_common.h"
13: //#include "pa_threads.h"
14:
15: #ifdef XML
16: # include <util/XercesDefs.hpp>
17: # include <util/TransENameMap.hpp>
18: # include <util/XML256TableTranscoder.hpp>
19: # include <util/PlatformUtils.hpp>
20: # include <PlatformSupport/XalanTranscodingServices.hpp>
21: #endif
22:
23:
24: // globals
25:
26:
27: // consts
28:
29: #define MAX_CHARSET_UNI_CODES 500
30:
31: //
32:
33: inline void prepare_case_tables(unsigned char *tables) {
1.5 parser 34: memset(tables, 0, sizeof(tables));
1.4 parser 35:
1.1 parser 36: unsigned char *lcc_table=tables+lcc_offset;
37: unsigned char *fcc_table=tables+fcc_offset;
38: for(int i=0; i<0x100; i++)
39: lcc_table[i]=fcc_table[i]=i;
40: }
41: inline void cstr2ctypes(unsigned char *tables, const unsigned char *cstr,
42: unsigned char bit) {
43: unsigned char *ctypes_table=tables+ctypes_offset;
44: ctypes_table[0]=bit;
45: for(; *cstr; cstr++) {
46: unsigned char c=*cstr;
47: ctypes_table[c]|=bit;
48: }
49: }
50: inline unsigned int to_wchar_code(const char *cstr) {
51: if(!cstr || !*cstr)
52: return 0;
53: if(cstr[1]==0)
54: return (unsigned int)(unsigned char)cstr[0];
55:
56: char *error_pos;
57: return (unsigned int)strtol(cstr, &error_pos, 0);
58: }
59: inline bool to_bool(const char *cstr) {
60: return cstr && *cstr!=0;
61: }
62: static void element2ctypes(unsigned char c, bool belongs,
63: unsigned char *tables, unsigned char bit, int group_offset=-1) {
64: if(!belongs)
65: return;
66:
67: unsigned char *ctypes_table=tables+ctypes_offset;
68:
69: ctypes_table[c]|=bit;
70: if(group_offset>=0)
71: tables[cbits_offset+group_offset+c/8] |= 1 << (c%8);
72: }
73: static void element2case(unsigned char from, unsigned char to,
74: unsigned char *tables) {
75: if(!to)
76: return;
77:
78: unsigned char *lcc_table=tables+lcc_offset;
79: unsigned char *fcc_table=tables+fcc_offset;
80: lcc_table[from]=to;
81: fcc_table[from]=to; fcc_table[to]=from;
82: }
83:
84: #ifdef XML
85:
86: static int sort_cmp_Trans_rec_intCh(const void *a, const void *b) {
87: const XMLCh ca=static_cast<const XMLTransService::TransRec *>(a)->intCh;
88: const XMLCh cb=static_cast<const XMLTransService::TransRec *>(b)->intCh;
89: // move zeros to end of table
90: if(ca==0)
91: return +1;
92: if(cb==0)
93: return -1;
94:
95: //
96: return ca-cb;
97: }
98:
99: template <class TType> class ENameMapFor2 : public ENameMap
100: {
101: public :
102: // -----------------------------------------------------------------------
103: // Constructors and Destructor
104: // -----------------------------------------------------------------------
105: ENameMapFor2(
106: const XMLCh* const encodingName
107: , const XMLCh* const fromTable
108: , const XMLTransService::TransRec* const toTable
109: , const unsigned int toTableSize
110: ) : ENameMap(encodingName),
111: ffromTable(fromTable),
112: ftoTable(toTable),
113: ftoTableSize(toTableSize) {}
114:
115: // -----------------------------------------------------------------------
116: // Implementation of virtual factory method
117: // -----------------------------------------------------------------------
118: virtual XMLTranscoder* makeNew(const unsigned int blockSize) const {
119: return new TType(
120: getKey(),
121: blockSize,
122: ffromTable,
123: ftoTable, ftoTableSize);
124: }
125: private:
126: const XMLCh* const ffromTable;
127: const XMLTransService::TransRec* const ftoTable;
128: const unsigned int ftoTableSize;
129:
130: private :
131: // -----------------------------------------------------------------------
132: // Unimplemented constructors and operators
133: // -----------------------------------------------------------------------
134: ENameMapFor2();
135: ENameMapFor2(const ENameMapFor2<TType>&);
136: void operator=(const ENameMapFor2<TType>&);
137: };
138:
139: class XML256TableTranscoder2 : public XML256TableTranscoder
140: {
141: public :
142: XML256TableTranscoder2(
143: const XMLCh* const encodingName
144: , const unsigned int blockSize
145: , const XMLCh* const fromTable
146: , const XMLTransService::TransRec* const toTable
147: , const unsigned int toTableSize
148: ) : XML256TableTranscoder(encodingName, blockSize, fromTable, toTable, toTableSize) {}
149:
150: private :
151: XML256TableTranscoder2();
152: XML256TableTranscoder2(const XML256TableTranscoder2&);
153: void operator=(const XML256TableTranscoder2&);
154: };
155: #endif
156:
157: void Charset_connection::load(Pool& pool, time_t new_disk_time) {
158: // pcre_tables
159: // lowcase, flipcase, bits digit+word+whitespace, masks
160: prepare_case_tables(fpcre_tables);
161: cstr2ctypes(fpcre_tables, (const unsigned char *)"*+?{^.$|()[", ctype_meta);
162:
163: #ifdef XML
164: // transcoder
165: XMLCh *fromTable=(XMLCh *)calloc(sizeof(XMLCh)*0x100);
166: XMLTransService::TransRec *toTable=(XMLTransService::TransRec *)calloc(
167: sizeof(XMLTransService::TransRec)*MAX_CHARSET_UNI_CODES);
168: unsigned int toTableSz=0;
169: #endif
170:
171: // loading text
172: char *data=file_read_text(pool, ffile_spec);
173:
174: // ignore header
175: getrow(&data);
176:
177: // parse cells
1.6 ! parser 178: char *row;
! 179: while(row=getrow(&data)) {
! 180: // remove empty&comment lines
! 181: if(!*row || *row=='#')
1.1 parser 182: continue;
183:
184: // char white-space digit hex-digit letter word lowercase unicode1 unicode2
185: unsigned int c=0;
186: char *cell;
1.6 ! parser 187: for(int column=0; cell=lsplit(&row, '\t'); column++) {
1.1 parser 188: switch(column) {
189: case 0: c=to_wchar_code(cell); break;
190: // fpcre_tables
191: case 1: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_space, cbit_space); break;
192: case 2: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_digit, cbit_digit); break;
193: case 3: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_xdigit); break;
194: case 4: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_letter); break;
195: case 5: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_word, cbit_word); break;
196: case 6: element2case(c, to_wchar_code(cell), fpcre_tables); break;
197: #ifdef XML
198: case 7:
199: case 8:
200: // transcoder
201: if(toTableSz>MAX_CHARSET_UNI_CODES)
202: PTHROW(0, 0,
203: &ffile_spec,
204: "charset must contain not more then %d unicode values", MAX_CHARSET_UNI_CODES);
205:
206: XMLCh unicode=(XMLCh)to_wchar_code(cell);
207: if(!unicode && column==7/*unicode1 column*/)
208: unicode=(XMLCh)c;
209: if(unicode) {
210: if(!fromTable[c])
211: fromTable[c]=unicode;
212: toTable[toTableSz].intCh=unicode;
213: toTable[toTableSz].extCh=(XMLByte)c;
214: toTableSz++;
215: }
216: break;
217: #endif
218: }
219: }
220: };
221:
222: #ifdef XML
223: // sort by the Unicode code point
224: _qsort(toTable, toTableSz, sizeof(*toTable),
225: sort_cmp_Trans_rec_intCh);
226:
227: // addEncoding
228: XalanDOMString sencoding(fname.cstr());
229: const XMLCh* const auto_encoding_cstr=sencoding.c_str();
230: int size=sizeof(XMLCh)*(sencoding.size()+1);
231: XMLCh* pool_encoding_cstr=(XMLCh*)malloc(size);
232: memcpy(pool_encoding_cstr, auto_encoding_cstr, size);
233: XMLString::upperCase(pool_encoding_cstr);
234:
235: XMLPlatformUtils::fgTransService->addEncoding(
236: pool_encoding_cstr,
237: new ENameMapFor2<XML256TableTranscoder2>(
238: pool_encoding_cstr
239: , fromTable
240: , toTable
241: , toTableSz
242: ));
243: #endif
244:
245: prev_disk_time=new_disk_time;
246: }
E-mail: