Annotation of parser3/src/main/pa_charset_connection.C, revision 1.7
1.1 parser 1: /** @file
2: Parser: Charset connection implementation.
3:
4: Copyright (c) 2001 ArtLebedev Group (http://www.artlebedev.com)
5: Author: Alexander Petrosyan <paf@design.ru> (http://design.ru/paf)
6:
1.7 ! parser 7: $Id: pa_charset_connection.C,v 1.6 2001/10/02 13:32:38 parser Exp $
1.1 parser 8: */
9:
10: #include "pa_charset_connection.h"
11: //#include "pa_exception.h"
12: //#include "pa_common.h"
13: //#include "pa_threads.h"
14:
15: #ifdef XML
16: # include <util/XercesDefs.hpp>
17: # include <util/TransENameMap.hpp>
18: # include <util/XML256TableTranscoder.hpp>
19: # include <util/PlatformUtils.hpp>
20: # include <PlatformSupport/XalanTranscodingServices.hpp>
21: #endif
22:
23:
24: // globals
25:
26:
27: // consts
28:
29: #define MAX_CHARSET_UNI_CODES 500
30:
31: //
32:
33: inline void prepare_case_tables(unsigned char *tables) {
1.5 parser 34: memset(tables, 0, sizeof(tables));
1.4 parser 35:
1.1 parser 36: unsigned char *lcc_table=tables+lcc_offset;
37: unsigned char *fcc_table=tables+fcc_offset;
38: for(int i=0; i<0x100; i++)
39: lcc_table[i]=fcc_table[i]=i;
40: }
41: inline void cstr2ctypes(unsigned char *tables, const unsigned char *cstr,
42: unsigned char bit) {
43: unsigned char *ctypes_table=tables+ctypes_offset;
44: ctypes_table[0]=bit;
45: for(; *cstr; cstr++) {
46: unsigned char c=*cstr;
47: ctypes_table[c]|=bit;
48: }
49: }
50: inline unsigned int to_wchar_code(const char *cstr) {
51: if(!cstr || !*cstr)
52: return 0;
53: if(cstr[1]==0)
54: return (unsigned int)(unsigned char)cstr[0];
55:
56: char *error_pos;
57: return (unsigned int)strtol(cstr, &error_pos, 0);
58: }
59: inline bool to_bool(const char *cstr) {
60: return cstr && *cstr!=0;
61: }
62: static void element2ctypes(unsigned char c, bool belongs,
63: unsigned char *tables, unsigned char bit, int group_offset=-1) {
64: if(!belongs)
65: return;
66:
67: unsigned char *ctypes_table=tables+ctypes_offset;
68:
69: ctypes_table[c]|=bit;
70: if(group_offset>=0)
71: tables[cbits_offset+group_offset+c/8] |= 1 << (c%8);
72: }
73: static void element2case(unsigned char from, unsigned char to,
74: unsigned char *tables) {
75: if(!to)
76: return;
77:
78: unsigned char *lcc_table=tables+lcc_offset;
79: unsigned char *fcc_table=tables+fcc_offset;
80: lcc_table[from]=to;
81: fcc_table[from]=to; fcc_table[to]=from;
82: }
83:
84: #ifdef XML
85:
86: static int sort_cmp_Trans_rec_intCh(const void *a, const void *b) {
1.7 ! parser 87: return
! 88: static_cast<const XMLTransService::TransRec *>(a)->intCh-
! 89: static_cast<const XMLTransService::TransRec *>(b)->intCh;
1.1 parser 90: }
91:
92: template <class TType> class ENameMapFor2 : public ENameMap
93: {
94: public :
95: // -----------------------------------------------------------------------
96: // Constructors and Destructor
97: // -----------------------------------------------------------------------
98: ENameMapFor2(
99: const XMLCh* const encodingName
100: , const XMLCh* const fromTable
101: , const XMLTransService::TransRec* const toTable
102: , const unsigned int toTableSize
103: ) : ENameMap(encodingName),
104: ffromTable(fromTable),
105: ftoTable(toTable),
106: ftoTableSize(toTableSize) {}
107:
108: // -----------------------------------------------------------------------
109: // Implementation of virtual factory method
110: // -----------------------------------------------------------------------
111: virtual XMLTranscoder* makeNew(const unsigned int blockSize) const {
112: return new TType(
113: getKey(),
114: blockSize,
115: ffromTable,
116: ftoTable, ftoTableSize);
117: }
118: private:
119: const XMLCh* const ffromTable;
120: const XMLTransService::TransRec* const ftoTable;
121: const unsigned int ftoTableSize;
122:
123: private :
124: // -----------------------------------------------------------------------
125: // Unimplemented constructors and operators
126: // -----------------------------------------------------------------------
127: ENameMapFor2();
128: ENameMapFor2(const ENameMapFor2<TType>&);
129: void operator=(const ENameMapFor2<TType>&);
130: };
131:
132: class XML256TableTranscoder2 : public XML256TableTranscoder
133: {
134: public :
135: XML256TableTranscoder2(
136: const XMLCh* const encodingName
137: , const unsigned int blockSize
138: , const XMLCh* const fromTable
139: , const XMLTransService::TransRec* const toTable
140: , const unsigned int toTableSize
141: ) : XML256TableTranscoder(encodingName, blockSize, fromTable, toTable, toTableSize) {}
142:
143: private :
144: XML256TableTranscoder2();
145: XML256TableTranscoder2(const XML256TableTranscoder2&);
146: void operator=(const XML256TableTranscoder2&);
147: };
148: #endif
149:
150: void Charset_connection::load(Pool& pool, time_t new_disk_time) {
151: // pcre_tables
152: // lowcase, flipcase, bits digit+word+whitespace, masks
153: prepare_case_tables(fpcre_tables);
154: cstr2ctypes(fpcre_tables, (const unsigned char *)"*+?{^.$|()[", ctype_meta);
155:
156: #ifdef XML
157: // transcoder
158: XMLCh *fromTable=(XMLCh *)calloc(sizeof(XMLCh)*0x100);
159: XMLTransService::TransRec *toTable=(XMLTransService::TransRec *)calloc(
160: sizeof(XMLTransService::TransRec)*MAX_CHARSET_UNI_CODES);
161: unsigned int toTableSz=0;
1.7 ! parser 162: // strangly vital
! 163: toTable[toTableSz].intCh=0;
! 164: toTable[toTableSz].extCh=(XMLByte)0;
! 165: toTableSz++;
1.1 parser 166: #endif
167:
168: // loading text
169: char *data=file_read_text(pool, ffile_spec);
170:
171: // ignore header
172: getrow(&data);
173:
174: // parse cells
1.6 parser 175: char *row;
176: while(row=getrow(&data)) {
177: // remove empty&comment lines
178: if(!*row || *row=='#')
1.1 parser 179: continue;
180:
181: // char white-space digit hex-digit letter word lowercase unicode1 unicode2
182: unsigned int c=0;
183: char *cell;
1.6 parser 184: for(int column=0; cell=lsplit(&row, '\t'); column++) {
1.1 parser 185: switch(column) {
186: case 0: c=to_wchar_code(cell); break;
187: // fpcre_tables
188: case 1: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_space, cbit_space); break;
189: case 2: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_digit, cbit_digit); break;
190: case 3: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_xdigit); break;
191: case 4: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_letter); break;
192: case 5: element2ctypes(c, to_bool(cell), fpcre_tables, ctype_word, cbit_word); break;
193: case 6: element2case(c, to_wchar_code(cell), fpcre_tables); break;
194: #ifdef XML
195: case 7:
196: case 8:
197: // transcoder
198: if(toTableSz>MAX_CHARSET_UNI_CODES)
199: PTHROW(0, 0,
200: &ffile_spec,
201: "charset must contain not more then %d unicode values", MAX_CHARSET_UNI_CODES);
202:
203: XMLCh unicode=(XMLCh)to_wchar_code(cell);
204: if(!unicode && column==7/*unicode1 column*/)
205: unicode=(XMLCh)c;
206: if(unicode) {
207: if(!fromTable[c])
208: fromTable[c]=unicode;
209: toTable[toTableSz].intCh=unicode;
210: toTable[toTableSz].extCh=(XMLByte)c;
211: toTableSz++;
212: }
213: break;
214: #endif
215: }
216: }
217: };
218:
219: #ifdef XML
220: // sort by the Unicode code point
221: _qsort(toTable, toTableSz, sizeof(*toTable),
222: sort_cmp_Trans_rec_intCh);
1.7 ! parser 223: //FILE *f=fopen("c:\\temp\\a", "wb");
! 224: //fwrite(toTable, toTableSz, sizeof(*toTable), f);
! 225: //fclose(f);
! 226:
1.1 parser 227:
228: // addEncoding
229: XalanDOMString sencoding(fname.cstr());
230: const XMLCh* const auto_encoding_cstr=sencoding.c_str();
231: int size=sizeof(XMLCh)*(sencoding.size()+1);
232: XMLCh* pool_encoding_cstr=(XMLCh*)malloc(size);
233: memcpy(pool_encoding_cstr, auto_encoding_cstr, size);
234: XMLString::upperCase(pool_encoding_cstr);
235:
236: XMLPlatformUtils::fgTransService->addEncoding(
237: pool_encoding_cstr,
238: new ENameMapFor2<XML256TableTranscoder2>(
239: pool_encoding_cstr
240: , fromTable
241: , toTable
242: , toTableSz
243: ));
244: #endif
245:
246: prev_disk_time=new_disk_time;
247: }
E-mail: