Annotation of parser3/src/main/pa_transcoder.C, revision 1.1
1.1 ! paf 1: /** @file
! 2: Parser: Transcoder impl.
! 3:
! 4: Copyright (c) 2001 ArtLebedev Group (http://www.artlebedev.com)
! 5: Author: Alexander Petrosyan <paf@design.ru> (http://paf.design.ru)
! 6:
! 7: $Id: pa_charset_connection.h,v 1.4 2001/11/05 11:46:23 paf Exp $
! 8: */
! 9:
! 10: #include "pa_common.h"
! 11: #include "pa_transcoder.h"
! 12: #include "pa_exception.h"
! 13:
! 14: static int sort_cmp_Trans_rec_intCh(const void *a, const void *b) {
! 15: return
! 16: static_cast<const Transcoder_TransRec *>(a)->intCh-
! 17: static_cast<const Transcoder_TransRec *>(b)->intCh;
! 18: }
! 19:
! 20: void Transcoder::sort_ToTable() {
! 21: _qsort(toTable, toTableSize, sizeof(*toTable),
! 22: sort_cmp_Trans_rec_intCh);
! 23: //FILE *f=fopen("c:\\temp\\a", "wb");
! 24: //fwrite(toTable, toTableSize, sizeof(*toTable), f);
! 25: //fclose(f);
! 26: }
! 27:
! 28: XMLByte Transcoder::xlatOneTo(const XMLCh toXlat) const {
! 29: unsigned int lowOfs = 0;
! 30: unsigned int hiOfs = toTableSize - 1;
! 31: XMLByte curByte = 0;
! 32: do {
! 33: // Calc the mid point of the low and high offset.
! 34: const unsigned int midOfs = ((hiOfs - lowOfs) / 2) + lowOfs;
! 35:
! 36: // If our test char is greater than the mid point char, then
! 37: // we move up to the upper half. Else we move to the lower
! 38: // half. If its equal, then its our guy.
! 39: if (toXlat > toTable[midOfs].intCh)
! 40: lowOfs = midOfs;
! 41: else if (toXlat < toTable[midOfs].intCh)
! 42: hiOfs = midOfs;
! 43: else
! 44: return toTable[midOfs].extCh;
! 45: } while(lowOfs + 1 < hiOfs);
! 46:
! 47: return 0;
! 48: }
! 49:
! 50: void transcoder_transcode(Pool& pool,
! 51: const Transcoder *source_transcoder, const void *source_body, size_t source_content_length,
! 52: const Transcoder *dest_transcoder, const void *& dest_body, size_t& dest_content_length
! 53: ) {
! 54: switch((source_transcoder?0x10:0x00)|(dest_transcoder?0x01:0x00)) {
! 55: case 0x00:
! 56: dest_body=source_body;
! 57: dest_content_length=source_content_length;
! 58: break;
! 59: case 0x10:
! 60: source_transcoder->transcodeToUTF8(pool,
! 61: source_body, source_content_length,
! 62: dest_body, dest_content_length);
! 63: break;
! 64: case 0x01:
! 65: dest_transcoder->transcodeFromUTF8(pool,
! 66: source_body, source_content_length,
! 67: dest_body, dest_content_length);
! 68: break;
! 69: default: // 0x11
! 70: source_transcoder->transcodeToTranscoder(pool, *dest_transcoder,
! 71: source_body, source_content_length,
! 72: dest_body, dest_content_length);
! 73: break;
! 74: }
! 75: }
! 76:
! 77: void Transcoder::transcodeToUTF8(Pool& pool,
! 78: const void *source_body, size_t source_content_length,
! 79: const void *& dest_body, size_t& dest_content_length) const {
! 80: throw Exception(0, 0,
! 81: 0,
! 82: "transcodeToUTF8 not supported(yet)");
! 83: }
! 84:
! 85:
! 86:
! 87: // ---------------------------------------------------------------------------
! 88: // Local static data
! 89: //
! 90: // gUTFBytes
! 91: // A list of counts of trailing bytes for each initial byte in the input.
! 92: //
! 93: // gUTFOffsets
! 94: // A list of values to offset each result char type, according to how
! 95: // many source bytes when into making it.
! 96: //
! 97: // gFirstByteMark
! 98: // A list of values to mask onto the first byte of an encoded sequence,
! 99: // indexed by the number of bytes used to create the sequence.
! 100: // ---------------------------------------------------------------------------
! 101: static const XMLByte gUTFBytes[0x100] = {
! 102: 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
! 103: , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
! 104: , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
! 105: , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
! 106: , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
! 107: , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
! 108: , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
! 109: , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
! 110: , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
! 111: , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
! 112: , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
! 113: , 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
! 114: , 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
! 115: , 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
! 116: , 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
! 117: , 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
! 118: };
! 119:
! 120: static const uint gUTFOffsets[6] = {
! 121: 0, 0x3080, 0xE2080, 0x3C82080, 0xFA082080, 0x82082080
! 122: };
! 123:
! 124: static const XMLByte gFirstByteMark[7] = {
! 125: 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
! 126: };
! 127: void Transcoder::transcodeFromUTF8(Pool& pool,
! 128: const void *source_body, size_t source_content_length,
! 129: const void *& adest_body, size_t& adest_content_length) const {
! 130: size_t dest_content_length=0;
! 131: XMLByte *dest_body=(XMLByte*)pool.malloc(source_content_length/*surly enough*/);
! 132:
! 133: const XMLByte* srcPtr=(const XMLByte*)source_body;
! 134: const XMLByte* srcEnd=(const XMLByte*)source_body+source_content_length;
! 135: XMLByte* outPtr=dest_body;
! 136:
! 137: // We now loop until we either run out of input data
! 138: while (srcPtr < srcEnd) {
! 139: // Get the next leading byte out
! 140: const XMLByte firstByte = *srcPtr;
! 141:
! 142: // Special-case ASCII, which is a leading byte value of <= 127
! 143: if (firstByte <= 127) {
! 144: *outPtr++ = firstByte;
! 145: srcPtr++;
! 146: continue;
! 147: }
! 148:
! 149: // See how many trailing src bytes this sequence is going to require
! 150: const unsigned int trailingBytes = gUTFBytes[firstByte];
! 151:
! 152: // If there are not enough source bytes to do this one, then we
! 153: // are done. Note that we done >= here because we are implicitly
! 154: // counting the 1 byte we get no matter what.
! 155: if (srcPtr + trailingBytes >= srcEnd)
! 156: break;
! 157:
! 158: // Looks ok, so lets build up the value
! 159: uint tmpVal=0;
! 160: switch(trailingBytes) {
! 161: case 5: tmpVal+=*srcPtr++; tmpVal<<=6;
! 162: case 4: tmpVal+=*srcPtr++; tmpVal<<=6;
! 163: case 3: tmpVal+=*srcPtr++; tmpVal<<=6;
! 164: case 2: tmpVal+=*srcPtr++; tmpVal<<=6;
! 165: case 1: tmpVal+=*srcPtr++; tmpVal<<=6;
! 166: case 0: tmpVal+=*srcPtr++;
! 167: break;
! 168:
! 169: default:
! 170: throw Exception(0, 0,
! 171: 0,
! 172: "transcodeFromUTF8 error: wrong trailingBytes value (%d)", trailingBytes);
! 173: }
! 174: tmpVal-=gUTFOffsets[trailingBytes];
! 175:
! 176: // If it will fit into a single char, then put it in. Otherwise
! 177: // fail [*encode it as a surrogate pair. If its not valid, use the
! 178: // replacement char.*]
! 179: if (!(tmpVal & 0xFFFF0000))
! 180: *outPtr++ = xlatOneTo(tmpVal);
! 181: else
! 182: throw Exception(0, 0,
! 183: 0,
! 184: "transcodeFromUTF8 error: too big tmpVal (0x%08X)", tmpVal);
! 185: }
! 186:
! 187:
! 188: // return
! 189: adest_body=dest_body;
! 190: adest_content_length=outPtr-dest_body;
! 191: }
! 192:
! 193: /// transcode using both transcoders
! 194: void Transcoder::transcodeToTranscoder(Pool& pool,
! 195: const Transcoder& dest_transcoder,
! 196: const void *source_body, size_t source_content_length,
! 197: const void *& adest_body, size_t& dest_content_length) const {
! 198: throw Exception(0, 0,
! 199: 0,
! 200: "transcodeToTranscoder not supported(yet)");
! 201: /*
! 202: void *dest_body;
! 203:
! 204: dest_body=pool.malloc(dest_content_length=source_content_length);
! 205: // dummy
! 206: memset(dest_body, '?', dest_content_length);
! 207:
! 208: adest_body=dest_body;*/
! 209: }
E-mail: