Annotation of parser3/src/main/pa_charset_connection.C, revision 1.3
1.1 parser 1: /** @file
2: Parser: Charset connection implementation.
3:
4: Copyright (c) 2001 ArtLebedev Group (http://www.artlebedev.com)
5: Author: Alexander Petrosyan <paf@design.ru> (http://design.ru/paf)
6:
1.3 ! parser 7: $Id: pa_charset_connection.C,v 1.2 2001/10/01 13:26:47 parser Exp $
1.1 parser 8: */
9:
10: #include "pa_charset_connection.h"
11: //#include "pa_exception.h"
12: //#include "pa_common.h"
13: //#include "pa_threads.h"
14:
15: #ifdef XML
16: # include <util/XercesDefs.hpp>
17: # include <util/TransENameMap.hpp>
18: # include <util/XML256TableTranscoder.hpp>
19: # include <util/PlatformUtils.hpp>
20: # include <PlatformSupport/XalanTranscodingServices.hpp>
21: #endif
22:
23:
24: // globals
25:
26:
27: // consts
28:
29: #define MAX_CHARSET_UNI_CODES 500
30:
31: //
32:
33: inline void prepare_case_tables(unsigned char *tables) {
34: unsigned char *lcc_table=tables+lcc_offset;
35: unsigned char *fcc_table=tables+fcc_offset;
36: for(int i=0; i<0x100; i++)
37: lcc_table[i]=fcc_table[i]=i;
38: }
39: inline void cstr2ctypes(unsigned char *tables, const unsigned char *cstr,
40: unsigned char bit) {
41: unsigned char *ctypes_table=tables+ctypes_offset;
42: ctypes_table[0]=bit;
43: for(; *cstr; cstr++) {
44: unsigned char c=*cstr;
45: ctypes_table[c]|=bit;
46: }
47: }
48: inline unsigned int to_wchar_code(const char *cstr) {
49: if(!cstr || !*cstr)
50: return 0;
51: if(cstr[1]==0)
52: return (unsigned int)(unsigned char)cstr[0];
53:
54: char *error_pos;
55: return (unsigned int)strtol(cstr, &error_pos, 0);
56: }
57: inline bool to_bool(const char *cstr) {
58: return cstr && *cstr!=0;
59: }
60: static void element2ctypes(unsigned char c, bool belongs,
61: unsigned char *tables, unsigned char bit, int group_offset=-1) {
62: if(!belongs)
63: return;
64:
65: unsigned char *ctypes_table=tables+ctypes_offset;
66:
67: ctypes_table[c]|=bit;
68: if(group_offset>=0)
69: tables[cbits_offset+group_offset+c/8] |= 1 << (c%8);
70: }
71: static void element2case(unsigned char from, unsigned char to,
72: unsigned char *tables) {
73: if(!to)
74: return;
75:
76: unsigned char *lcc_table=tables+lcc_offset;
77: unsigned char *fcc_table=tables+fcc_offset;
78: lcc_table[from]=to;
79: fcc_table[from]=to; fcc_table[to]=from;
80: }
81:
82: #ifdef XML
83:
84: static int sort_cmp_Trans_rec_intCh(const void *a, const void *b) {
85: const XMLCh ca=static_cast<const XMLTransService::TransRec *>(a)->intCh;
86: const XMLCh cb=static_cast<const XMLTransService::TransRec *>(b)->intCh;
87: // move zeros to end of table
88: if(ca==0)
89: return +1;
90: if(cb==0)
91: return -1;
92:
93: //
94: return ca-cb;
95: }
96:
97: template <class TType> class ENameMapFor2 : public ENameMap
98: {
99: public :
100: // -----------------------------------------------------------------------
101: // Constructors and Destructor
102: // -----------------------------------------------------------------------
103: ENameMapFor2(
104: const XMLCh* const encodingName
105: , const XMLCh* const fromTable
106: , const XMLTransService::TransRec* const toTable
107: , const unsigned int toTableSize
108: ) : ENameMap(encodingName),
109: ffromTable(fromTable),
110: ftoTable(toTable),
111: ftoTableSize(toTableSize) {}
112:
113: // -----------------------------------------------------------------------
114: // Implementation of virtual factory method
115: // -----------------------------------------------------------------------
116: virtual XMLTranscoder* makeNew(const unsigned int blockSize) const {
117: return new TType(
118: getKey(),
119: blockSize,
120: ffromTable,
121: ftoTable, ftoTableSize);
122: }
123: private:
124: const XMLCh* const ffromTable;
125: const XMLTransService::TransRec* const ftoTable;
126: const unsigned int ftoTableSize;
127:
128: private :
129: // -----------------------------------------------------------------------
130: // Unimplemented constructors and operators
131: // -----------------------------------------------------------------------
132: ENameMapFor2();
133: ENameMapFor2(const ENameMapFor2<TType>&);
134: void operator=(const ENameMapFor2<TType>&);
135: };
136:
137: class XML256TableTranscoder2 : public XML256TableTranscoder
138: {
139: public :
140: XML256TableTranscoder2(
141: const XMLCh* const encodingName
142: , const unsigned int blockSize
143: , const XMLCh* const fromTable
144: , const XMLTransService::TransRec* const toTable
145: , const unsigned int toTableSize
146: ) : XML256TableTranscoder(encodingName, blockSize, fromTable, toTable, toTableSize) {}
147:
148: private :
149: XML256TableTranscoder2();
150: XML256TableTranscoder2(const XML256TableTranscoder2&);
151: void operator=(const XML256TableTranscoder2&);
152: };
153: #endif
154:
155: void Charset_connection::load(Pool& pool, time_t new_disk_time) {
156: // pcre_tables
157: // lowcase, flipcase, bits digit+word+whitespace, masks
158: prepare_case_tables(fpcre_tables);
159: cstr2ctypes(fpcre_tables, (const unsigned char *)"*+?{^.$|()[", ctype_meta);
160:
161: #ifdef XML
162: // transcoder
163: XMLCh *fromTable=(XMLCh *)calloc(sizeof(XMLCh)*0x100);
164: XMLTransService::TransRec *toTable=(XMLTransService::TransRec *)calloc(
165: sizeof(XMLTransService::TransRec)*MAX_CHARSET_UNI_CODES);
166: unsigned int toTableSz=0;
167: #endif
168:
169: // loading text
170: char *data=file_read_text(pool, ffile_spec);
171:
172: // ignore header
173: getrow(&data);
174:
175: // parse cells
176: char *row_chars;
177: while(row_chars=getrow(&data)) {
178: if(!*row_chars) // remove empty lines
179: continue;
180:
181: // char white-space digit hex-digit letter word lowercase unicode1 unicode2
182: unsigned int c=0;
183: char *cell;
184: for(int column=0; cell=lsplit(&row_chars, '\t'); column++) {
185: switch(column) {
186: case 0: c=to_wchar_code(cell); break;
187: // fpcre_tables
188: case 1: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_space, cbit_space); break;
189: case 2: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_digit, cbit_digit); break;
190: case 3: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_xdigit); break;
191: case 4: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_letter); break;
192: case 5: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_word, cbit_word); break;
193: case 6: element2case(c, to_wchar_code(cell), fpcre_tables); break;
194: #ifdef XML
195: case 7:
196: case 8:
197: // transcoder
198: if(toTableSz>MAX_CHARSET_UNI_CODES)
199: PTHROW(0, 0,
200: &ffile_spec,
201: "charset must contain not more then %d unicode values", MAX_CHARSET_UNI_CODES);
202:
203: XMLCh unicode=(XMLCh)to_wchar_code(cell);
204: if(!unicode && column==7/*unicode1 column*/)
205: unicode=(XMLCh)c;
206: if(unicode) {
207: if(!fromTable[c])
208: fromTable[c]=unicode;
209: toTable[toTableSz].intCh=unicode;
210: toTable[toTableSz].extCh=(XMLByte)c;
211: toTableSz++;
212: }
213: break;
214: #endif
215: }
216: }
217: };
218:
219: #ifdef XML
220: // sort by the Unicode code point
221: _qsort(toTable, toTableSz, sizeof(*toTable),
222: sort_cmp_Trans_rec_intCh);
223:
224: // addEncoding
225: XalanDOMString sencoding(fname.cstr());
226: const XMLCh* const auto_encoding_cstr=sencoding.c_str();
227: int size=sizeof(XMLCh)*(sencoding.size()+1);
228: XMLCh* pool_encoding_cstr=(XMLCh*)malloc(size);
229: memcpy(pool_encoding_cstr, auto_encoding_cstr, size);
230: XMLString::upperCase(pool_encoding_cstr);
231:
232: XMLPlatformUtils::fgTransService->addEncoding(
233: pool_encoding_cstr,
234: new ENameMapFor2<XML256TableTranscoder2>(
235: pool_encoding_cstr
236: , fromTable
237: , toTable
238: , toTableSz
239: ));
240: #endif
241:
242: prev_disk_time=new_disk_time;
243: }
E-mail: