Annotation of parser3/src/main/pa_charset_connection.C, revision 1.2
1.1 parser 1: /** @file
2: Parser: Charset connection implementation.
3:
4: Copyright (c) 2001 ArtLebedev Group (http://www.artlebedev.com)
5: Author: Alexander Petrosyan <paf@design.ru> (http://design.ru/paf)
6:
1.2 ! parser 7: $Id: pa_charset_connection.C,v 1.1 2001/10/01 10:53:16 parser Exp $
1.1 parser 8: */
9:
10: #include "pa_charset_connection.h"
11: //#include "pa_exception.h"
12: //#include "pa_common.h"
13: //#include "pa_threads.h"
14:
15: #ifdef XML
16: # include <util/XercesDefs.hpp>
17: # include <util/TransENameMap.hpp>
18: # include <util/XML256TableTranscoder.hpp>
19: # include <util/PlatformUtils.hpp>
20: # include <PlatformSupport/XalanTranscodingServices.hpp>
21: #endif
22:
23:
24: // globals
25:
26:
27: // consts
28:
29: #define MAX_CHARSET_UNI_CODES 500
30:
31: //
32:
33: inline void prepare_case_tables(unsigned char *tables) {
34: unsigned char *lcc_table=tables+lcc_offset;
35: unsigned char *fcc_table=tables+fcc_offset;
36: for(int i=0; i<0x100; i++)
37: lcc_table[i]=fcc_table[i]=i;
38: }
39: inline void cstr2ctypes(unsigned char *tables, const unsigned char *cstr,
40: unsigned char bit) {
41: unsigned char *ctypes_table=tables+ctypes_offset;
42: ctypes_table[0]=bit;
43: for(; *cstr; cstr++) {
44: unsigned char c=*cstr;
45: ctypes_table[c]|=bit;
46: }
47: }
48: inline unsigned int to_wchar_code(const char *cstr) {
49: if(!cstr || !*cstr)
50: return 0;
51: if(cstr[1]==0)
52: return (unsigned int)(unsigned char)cstr[0];
53:
54: char *error_pos;
55: return (unsigned int)strtol(cstr, &error_pos, 0);
56: }
57: inline bool to_bool(const char *cstr) {
58: return cstr && *cstr!=0;
59: }
60: static void element2ctypes(unsigned char c, bool belongs,
61: unsigned char *tables, unsigned char bit, int group_offset=-1) {
62: if(!belongs)
63: return;
64:
65: unsigned char *ctypes_table=tables+ctypes_offset;
66:
67: ctypes_table[c]|=bit;
68: if(group_offset>=0)
69: tables[cbits_offset+group_offset+c/8] |= 1 << (c%8);
70: }
71: static void element2case(unsigned char from, unsigned char to,
72: unsigned char *tables) {
73: if(!to)
74: return;
75:
76: unsigned char *lcc_table=tables+lcc_offset;
77: unsigned char *fcc_table=tables+fcc_offset;
78: lcc_table[from]=to;
79: fcc_table[from]=to; fcc_table[to]=from;
80: }
81:
82: #ifdef XML
83:
84: static int sort_cmp_Trans_rec_intCh(const void *a, const void *b) {
85: const XMLCh ca=static_cast<const XMLTransService::TransRec *>(a)->intCh;
86: const XMLCh cb=static_cast<const XMLTransService::TransRec *>(b)->intCh;
87: // move zeros to end of table
88: if(ca==0)
89: return +1;
90: if(cb==0)
91: return -1;
92:
93: //
94: return ca-cb;
95: }
96:
97: template <class TType> class ENameMapFor2 : public ENameMap
98: {
99: public :
100: // -----------------------------------------------------------------------
101: // Constructors and Destructor
102: // -----------------------------------------------------------------------
103: ENameMapFor2(
104: const XMLCh* const encodingName
105: , const XMLCh* const fromTable
106: , const XMLTransService::TransRec* const toTable
107: , const unsigned int toTableSize
108: ) : ENameMap(encodingName),
109: ffromTable(fromTable),
110: ftoTable(toTable),
111: ftoTableSize(toTableSize) {}
112: ~ENameMapFor2() {}
113:
114: // -----------------------------------------------------------------------
115: // Implementation of virtual factory method
116: // -----------------------------------------------------------------------
117: virtual XMLTranscoder* makeNew(const unsigned int blockSize) const {
118: return new TType(
119: getKey(),
120: blockSize,
121: ffromTable,
122: ftoTable, ftoTableSize);
123: }
124: private:
125: const XMLCh* const ffromTable;
126: const XMLTransService::TransRec* const ftoTable;
127: const unsigned int ftoTableSize;
128:
129: private :
130: // -----------------------------------------------------------------------
131: // Unimplemented constructors and operators
132: // -----------------------------------------------------------------------
133: ENameMapFor2();
134: ENameMapFor2(const ENameMapFor2<TType>&);
135: void operator=(const ENameMapFor2<TType>&);
136: };
137:
138: class XML256TableTranscoder2 : public XML256TableTranscoder
139: {
140: public :
141: XML256TableTranscoder2(
142: const XMLCh* const encodingName
143: , const unsigned int blockSize
144: , const XMLCh* const fromTable
145: , const XMLTransService::TransRec* const toTable
146: , const unsigned int toTableSize
147: ) : XML256TableTranscoder(encodingName, blockSize, fromTable, toTable, toTableSize) {}
148:
149: private :
150: XML256TableTranscoder2();
151: XML256TableTranscoder2(const XML256TableTranscoder2&);
152: void operator=(const XML256TableTranscoder2&);
153: };
154: #endif
155:
156: void Charset_connection::load(Pool& pool, time_t new_disk_time) {
157: // pcre_tables
158: // lowcase, flipcase, bits digit+word+whitespace, masks
159: prepare_case_tables(fpcre_tables);
160: cstr2ctypes(fpcre_tables, (const unsigned char *)"*+?{^.$|()[", ctype_meta);
161:
162: #ifdef XML
163: // transcoder
164: XMLCh *fromTable=(XMLCh *)calloc(sizeof(XMLCh)*0x100);
165: XMLTransService::TransRec *toTable=(XMLTransService::TransRec *)calloc(
166: sizeof(XMLTransService::TransRec)*MAX_CHARSET_UNI_CODES);
167: unsigned int toTableSz=0;
168: #endif
169:
170: // loading text
171: char *data=file_read_text(pool, ffile_spec);
172:
173: // ignore header
174: getrow(&data);
175:
176: // parse cells
177: char *row_chars;
178: while(row_chars=getrow(&data)) {
179: if(!*row_chars) // remove empty lines
180: continue;
181:
182: // char white-space digit hex-digit letter word lowercase unicode1 unicode2
183: unsigned int c=0;
184: char *cell;
185: for(int column=0; cell=lsplit(&row_chars, '\t'); column++) {
186: switch(column) {
187: case 0: c=to_wchar_code(cell); break;
188: // fpcre_tables
189: case 1: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_space, cbit_space); break;
190: case 2: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_digit, cbit_digit); break;
191: case 3: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_xdigit); break;
192: case 4: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_letter); break;
193: case 5: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_word, cbit_word); break;
194: case 6: element2case(c, to_wchar_code(cell), fpcre_tables); break;
195: #ifdef XML
196: case 7:
197: case 8:
198: // transcoder
199: if(toTableSz>MAX_CHARSET_UNI_CODES)
200: PTHROW(0, 0,
201: &ffile_spec,
202: "charset must contain not more then %d unicode values", MAX_CHARSET_UNI_CODES);
203:
204: XMLCh unicode=(XMLCh)to_wchar_code(cell);
205: if(!unicode && column==7/*unicode1 column*/)
206: unicode=(XMLCh)c;
207: if(unicode) {
208: if(!fromTable[c])
209: fromTable[c]=unicode;
210: toTable[toTableSz].intCh=unicode;
211: toTable[toTableSz].extCh=(XMLByte)c;
212: toTableSz++;
213: }
214: break;
215: #endif
216: }
217: }
218: };
219:
220: #ifdef XML
221: // sort by the Unicode code point
222: _qsort(toTable, toTableSz, sizeof(*toTable),
223: sort_cmp_Trans_rec_intCh);
224:
225: // addEncoding
226: XalanDOMString sencoding(fname.cstr());
227: const XMLCh* const auto_encoding_cstr=sencoding.c_str();
228: int size=sizeof(XMLCh)*(sencoding.size()+1);
229: XMLCh* pool_encoding_cstr=(XMLCh*)malloc(size);
230: memcpy(pool_encoding_cstr, auto_encoding_cstr, size);
231: XMLString::upperCase(pool_encoding_cstr);
232:
233: /// @todo delete prev encoding with same name
234: XMLPlatformUtils::fgTransService->addEncoding(
235: pool_encoding_cstr,
236: new ENameMapFor2<XML256TableTranscoder2>(
237: pool_encoding_cstr
238: , fromTable
239: , toTable
240: , toTableSz
241: ));
242: #endif
243:
244: prev_disk_time=new_disk_time;
245: }
E-mail: