Annotation of parser3/src/main/pa_base64.C, revision 1.6
1.1 moko 1: /** @file
2: Parser: base64 functions impl.
3:
4: Copyright (c) 2001-2017 Art. Lebedev Studio (http://www.artlebedev.com)
5: Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
6: */
7:
8: #include "pa_base64.h"
9: #include "pa_common.h"
10:
1.6 ! moko 11: volatile const char * IDENT_PA_BASE64_C="$Id: pa_base64.C,v 1.5 2019/11/15 21:48:51 moko Exp $" IDENT_PA_BASE64_H;
1.1 moko 12:
13: /*
14: * BASE64 part
15: * Authors: Michael Zucchi <notzed@ximian.com>
16: * Jeffrey Stedfast <fejj@ximian.com>
17: *
18: * Copyright 2000-2004 Ximian, Inc. (www.ximian.com)
19: *
20: * This program is free software; you can redistribute it and/or modify
21: * it under the terms of the GNU General Public License as published by
22: * the Free Software Foundation; either version 2 of the License, or
23: * (at your option) any later version.
24: *
25: * This program is distributed in the hope that it will be useful,
26: * but WITHOUT ANY WARRANTY; without even the implied warranty of
27: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28: * GNU General Public License for more details.
29: *
30: * You should have received a copy of the GNU General Public License
31: * along with this program; if not, write to the Free Software
32: * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
33: *
34: */
35:
1.2 moko 36: static const char *base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1.3 moko 37: static const char *base64_alphabet_url_safe = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
38:
1.6 ! moko 39: Base64Options::Base64Options(bool awrap): strict(false), wrap(awrap), pad(true), abc(base64_alphabet) {}
1.3 moko 40:
41: void Base64Options::set_url_safe_abc() {
1.4 moko 42: abc = base64_alphabet_url_safe;
1.3 moko 43: }
1.1 moko 44:
45: /**
46: * g_mime_utils_base64_encode_step:
47: * @in: input stream
48: * @inlen: length of the input
49: * @out: output string
50: * @state: holds the number of bits that are stored in @save
51: * @save: leftover bits that have not yet been encoded
52: *
53: * Base64 encodes a chunk of data. Performs an 'encode step', only
54: * encodes blocks of 3 characters to the output at a time, saves
55: * left-over state in state and save (initialise to 0 on first
56: * invocation).
57: *
58: * Returns the number of bytes encoded.
59: **/
60:
61: #define BASE64_GROUPS_IN_LINE 19
62:
1.2 moko 63: static size_t g_mime_utils_base64_encode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save) {
1.1 moko 64: register const unsigned char *inptr;
65: register unsigned char *outptr;
66:
67: if (inlen <= 0)
68: return 0;
69:
70: inptr = in;
71: outptr = out;
72:
73: if (inlen + ((unsigned char *)save)[0] > 2) {
74: const unsigned char *inend = in + inlen - 2;
75: register int c1 = 0, c2 = 0, c3 = 0;
76: register int already;
77:
78: already = *state;
79:
80: switch (((char *)save)[0]) {
81: case 1: c1 = ((unsigned char *)save)[1]; goto skip1;
82: case 2: c1 = ((unsigned char *)save)[1];
83: c2 = ((unsigned char *)save)[2]; goto skip2;
84: }
85:
86: /* yes, we jump into the loop, no i'm not going to change it, its beautiful! */
87: while (inptr < inend) {
88: c1 = *inptr++;
89: skip1:
90: c2 = *inptr++;
91: skip2:
92: c3 = *inptr++;
93: *outptr++ = base64_alphabet [c1 >> 2];
94: *outptr++ = base64_alphabet [(c2 >> 4) | ((c1 & 0x3) << 4)];
95: *outptr++ = base64_alphabet [((c2 & 0x0f) << 2) | (c3 >> 6)];
96: *outptr++ = base64_alphabet [c3 & 0x3f];
97: /* this is a bit ugly ... */
98: if ((++already) >= BASE64_GROUPS_IN_LINE) {
99: *outptr++ = '\n';
100: already = 0;
101: }
102: }
103:
104: ((unsigned char *)save)[0] = 0;
105: inlen = 2 - (inptr - inend);
106: *state = already;
107: }
108:
109: //d(printf ("state = %d, inlen = %d\n", (int)((char *)save)[0], inlen));
110:
111: if (inlen > 0) {
112: register char *saveout;
113:
114: /* points to the slot for the next char to save */
115: saveout = & (((char *)save)[1]) + ((char *)save)[0];
116:
117: /* inlen can only be 0 1 or 2 */
118: switch (inlen) {
119: case 2: *saveout++ = *inptr++;
120: case 1: *saveout++ = *inptr++;
121: }
122: *(char *)save = *(char *)save+(char)inlen;
123: }
124:
125: /*d(printf ("mode = %d\nc1 = %c\nc2 = %c\n",
126: (int)((char *)save)[0],
127: (int)((char *)save)[1],
128: (int)((char *)save)[2]));*/
129:
130: return (outptr - out);
131: }
132:
133: /**
134: * g_mime_utils_base64_encode_close:
135: * @in: input stream
136: * @inlen: length of the input
137: * @out: output string
138: * @state: holds the number of bits that are stored in @save
139: * @save: leftover bits that have not yet been encoded
140: *
141: * Base64 encodes the input stream to the output stream. Call this
142: * when finished encoding data with g_mime_utils_base64_encode_step to
143: * flush off the last little bit.
144: *
145: * Returns the number of bytes encoded.
146: **/
1.2 moko 147: static size_t g_mime_utils_base64_encode_close (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save) {
1.1 moko 148: unsigned char *outptr = out;
149: int c1, c2;
150:
151: if (inlen > 0)
152: outptr += g_mime_utils_base64_encode_step (in, inlen, outptr, state, save);
153:
154: c1 = ((unsigned char *)save)[1];
155: c2 = ((unsigned char *)save)[2];
156:
157: switch (((unsigned char *)save)[0]) {
158: case 2:
159: outptr[2] = base64_alphabet [(c2 & 0x0f) << 2];
160: goto skip;
161: case 1:
162: outptr[2] = '=';
163: skip:
164: outptr[0] = base64_alphabet [c1 >> 2];
165: outptr[1] = base64_alphabet [c2 >> 4 | ((c1 & 0x3) << 4)];
166: outptr[3] = '=';
167: outptr += 4;
168: break;
169: }
170:
171: *outptr++ = 0;
172:
173: *save = 0;
174: *state = 0;
175:
176: return (outptr - out);
177: }
178:
179: static unsigned char gmime_base64_rank[256] = {
180: 255,255,255,255,255,255,255,255,255,254,254,255,255,254,255,255,
181: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
182: 254,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
1.6 ! moko 183: 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,253,255,255,
1.1 moko 184: 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
185: 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
186: 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
187: 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
188: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
189: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
190: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
191: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
192: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
193: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
194: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
195: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
196: };
197:
1.5 moko 198: static unsigned char gmime_base64_rank_url_safe[256] = {
199: 255,255,255,255,255,255,255,255,255,254,254,255,255,254,255,255,
200: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
201: 254,255,255,255,255,255,255,255,255,255,255,255,255, 62,255,255,
1.6 ! moko 202: 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,253,255,255,
1.5 moko 203: 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
204: 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255, 63,
205: 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
206: 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
207: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
208: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
209: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
210: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
211: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
212: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
213: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
214: 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
215: };
216:
1.1 moko 217: /**
1.6 ! moko 218: * g_mime_utils_base64_decode:
1.1 moko 219: * @in: input stream
220: * @inlen: max length of data to decode
221: * @out: output stream
222: * @strict: only base64 and whitespace chars are allowed
223: *
224: * Decodes a chunk of base64 encoded data.
225: *
226: * Returns the number of bytes decoded (which have been dumped in @out).
227: **/
1.5 moko 228: size_t g_mime_utils_base64_decode(const unsigned char *in, size_t inlen, unsigned char *out, Base64Options options) {
229: const unsigned char *inptr = in;
230: unsigned char *outptr = out;
231: const unsigned char *inend = in + inlen;
232:
233: int saved = 0;
234: int state = 0;
1.1 moko 235:
1.5 moko 236: unsigned char *abc_rank = options.abc == base64_alphabet ? gmime_base64_rank : gmime_base64_rank_url_safe;
237:
1.1 moko 238: /* convert 4 base64 bytes to 3 normal bytes */
239: while (inptr < inend) {
1.5 moko 240: unsigned char c = abc_rank[*inptr++];
1.1 moko 241: switch(c) {
1.6 ! moko 242: case 255: // non-base64 and non-whitespace chars. not allowed in strict mode
1.5 moko 243: if(options.strict)
1.4 moko 244: throw Exception(BASE64_FORMAT, 0, "Invalid base64 char on position %d is detected", inptr - in - 1);
1.6 ! moko 245: case 254: // whitespace chars 0x09, 0x0A, 0x0D, 0x20 are allowed in any mode
! 246: break;
! 247: case 253: // =
! 248: if(state < 2) {
! 249: if(options.strict)
! 250: throw Exception(BASE64_FORMAT, 0, "Unexpected '=' on position %d is detected", inptr - in - 1);
! 251: break;
! 252: }
! 253: if(state == 2) { // double '='
! 254: if(inptr == inend) {
! 255: if(options.strict)
! 256: throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars");
! 257: break;
! 258: }
! 259: if(*inptr != '=') {
! 260: if(options.strict)
! 261: throw Exception(BASE64_FORMAT, 0, "Unexpected '=' on position %d is detected", inptr - in - 1);
! 262: break;
! 263: }
! 264: inptr++;
! 265: *outptr++ = (unsigned char)(saved >> 4);
! 266: } else { // single '='
! 267: *outptr++ = (unsigned char)(saved >> 10);
! 268: *outptr++ = (unsigned char)(saved >> 2);
! 269: }
! 270: state = 0;
1.1 moko 271: break;
272: default:
273: saved = (saved << 6) | c;
1.5 moko 274: state++;
275: if (state == 4) {
1.1 moko 276: *outptr++ = (unsigned char)(saved >> 16);
277: *outptr++ = (unsigned char)(saved >> 8);
278: *outptr++ = (unsigned char)(saved);
1.5 moko 279: state = 0;
1.1 moko 280: }
281: }
282: }
1.5 moko 283:
1.6 ! moko 284: if(state > 0) {
! 285: if(state > 1) {
! 286: if(options.pad && options.strict)
! 287: throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars");
! 288: if(state == 2) {
! 289: *outptr++ = (unsigned char)(saved >> 4);
! 290: } else {
! 291: *outptr++ = (unsigned char)(saved >> 10);
! 292: *outptr++ = (unsigned char)(saved >> 2);
! 293: }
! 294: } else {
! 295: if(options.strict)
! 296: throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars");
1.1 moko 297: }
298: }
1.5 moko 299:
1.1 moko 300: return (outptr - out);
301: }
302:
1.4 moko 303: size_t pa_base64_size(size_t in_size, bool wrap){
304: size_t new_size = ((in_size / 3 + 1) * 4) + 1 /*zero terminator*/;
305: if (wrap) new_size += new_size / (BASE64_GROUPS_IN_LINE * 4) /*new lines*/;
306: return new_size;
307: }
1.1 moko 308:
1.3 moko 309: char* pa_base64_encode(const char *in, size_t in_size, Base64Options options) {
1.4 moko 310: size_t new_size = pa_base64_size(in_size, options.wrap);
1.1 moko 311: char* result = new(PointerFreeGC) char[new_size];
1.4 moko 312: int state = 0;
313: int save = 0;
314: size_t filled = g_mime_utils_base64_encode_close ((const unsigned char*)in, in_size, (unsigned char*)result, &state, &save);
1.1 moko 315:
316: //throw Exception(PARSER_RUNTIME, 0, "%d %d %d", in_size, new_size, filled);
317: assert(filled <= new_size);
318:
319: return result;
320: }
321:
322: struct File_base64_action_info {
323: unsigned char** base64;
324: };
325:
326: static void file_base64_file_action(struct stat& finfo, int f, const String& file_spec, void *context) {
1.4 moko 327: if(finfo.st_size) {
328: File_base64_action_info& info = *static_cast<File_base64_action_info *>(context);
1.5 moko 329: *info.base64 = new(PointerFreeGC) unsigned char[pa_base64_size(check_file_size(finfo.st_size, file_spec), true)];
1.1 moko 330: unsigned char* base64 = *info.base64;
1.4 moko 331: int state = 0;
332: int save = 0;
1.1 moko 333: int nCount;
334: do {
335: unsigned char buffer[FILE_BUFFER_SIZE];
336: nCount = file_block_read(f, buffer, sizeof(buffer));
337: if( nCount ){
1.4 moko 338: size_t filled = g_mime_utils_base64_encode_step ((const unsigned char*)buffer, nCount, base64, &state, &save);
339: base64 += filled;
1.1 moko 340: }
341: } while(nCount > 0);
342: g_mime_utils_base64_encode_close (0, 0, base64, &state, &save);
343: }
344: }
345:
1.3 moko 346: char* pa_base64_encode(const String& file_spec, Base64Options options){
1.4 moko 347: unsigned char* base64 = 0;
348: File_base64_action_info info = { &base64 };
1.1 moko 349:
350: file_read_action_under_lock(file_spec, "pa_base64_encode", file_base64_file_action, &info);
351:
352: return (char*)base64;
353: }
354:
1.3 moko 355: void pa_base64_decode(const char *in, size_t in_size, char*& result, size_t& result_size, Base64Options options) {
1.1 moko 356: // every 4 base64 bytes are converted into 3 normal bytes
1.4 moko 357: size_t new_size = (in_size + 3) / 4 * 3;
358: result = new(PointerFreeGC) char[new_size + 1 /*terminator*/];
1.1 moko 359:
1.5 moko 360: result_size = g_mime_utils_base64_decode ((const unsigned char*)in, in_size, (unsigned char*)result, options);
1.1 moko 361: assert(result_size <= new_size);
1.4 moko 362: result[result_size] = 0; // for text files
1.1 moko 363: }
364:
E-mail: