--- parser3/src/main/pa_base64.C 2019/11/12 21:18:30 1.2 +++ parser3/src/main/pa_base64.C 2019/11/24 23:32:14 1.9 @@ -8,10 +8,10 @@ #include "pa_base64.h" #include "pa_common.h" -volatile const char * IDENT_PA_BASE64_C="$Id: pa_base64.C,v 1.2 2019/11/12 21:18:30 moko Exp $" IDENT_PA_BASE64_H; +volatile const char * IDENT_PA_BASE64_C="$Id: pa_base64.C,v 1.9 2019/11/24 23:32:14 moko Exp $" IDENT_PA_BASE64_H; /* - * BASE64 part + * BASE64 part inspired by g_mime_utils * Authors: Michael Zucchi * Jeffrey Stedfast * @@ -34,146 +34,78 @@ volatile const char * IDENT_PA_BASE64_C= */ static const char *base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const char *base64_alphabet_url_safe = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; -/** - * g_mime_utils_base64_encode_step: - * @in: input stream - * @inlen: length of the input - * @out: output string - * @state: holds the number of bits that are stored in @save - * @save: leftover bits that have not yet been encoded - * - * Base64 encodes a chunk of data. Performs an 'encode step', only - * encodes blocks of 3 characters to the output at a time, saves - * left-over state in state and save (initialise to 0 on first - * invocation). - * - * Returns the number of bytes encoded. - **/ +Base64Options::Base64Options(bool awrap): strict(false), wrap(awrap), pad(true), abc(base64_alphabet) {} + +void Base64Options::set_url_safe_abc() { + abc = base64_alphabet_url_safe; +} #define BASE64_GROUPS_IN_LINE 19 -static size_t g_mime_utils_base64_encode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save) { - register const unsigned char *inptr; - register unsigned char *outptr; - - if (inlen <= 0) - return 0; - - inptr = in; - outptr = out; - - if (inlen + ((unsigned char *)save)[0] > 2) { +static size_t pa_base64_encode(const unsigned char *in, size_t inlen, unsigned char *out, Base64Options options) { + const unsigned char *inptr = in; + unsigned char *outptr = out; + + const unsigned char *abc = (const unsigned char *)options.abc; + + if (inlen > 2) { const unsigned char *inend = in + inlen - 2; - register int c1 = 0, c2 = 0, c3 = 0; - register int already; + int already=0; - already = *state; - - switch (((char *)save)[0]) { - case 1: c1 = ((unsigned char *)save)[1]; goto skip1; - case 2: c1 = ((unsigned char *)save)[1]; - c2 = ((unsigned char *)save)[2]; goto skip2; - } - - /* yes, we jump into the loop, no i'm not going to change it, its beautiful! */ while (inptr < inend) { - c1 = *inptr++; - skip1: - c2 = *inptr++; - skip2: - c3 = *inptr++; - *outptr++ = base64_alphabet [c1 >> 2]; - *outptr++ = base64_alphabet [(c2 >> 4) | ((c1 & 0x3) << 4)]; - *outptr++ = base64_alphabet [((c2 & 0x0f) << 2) | (c3 >> 6)]; - *outptr++ = base64_alphabet [c3 & 0x3f]; - /* this is a bit ugly ... */ - if ((++already) >= BASE64_GROUPS_IN_LINE) { + int c1 = *inptr++; + int c2 = *inptr++; + int c3 = *inptr++; + *outptr++ = abc[c1 >> 2]; + *outptr++ = abc[(c2 >> 4) | ((c1 & 0x3) << 4)]; + *outptr++ = abc[((c2 & 0x0f) << 2) | (c3 >> 6)]; + *outptr++ = abc[c3 & 0x3f]; + + if ((++already) >= BASE64_GROUPS_IN_LINE && options.wrap) { *outptr++ = '\n'; already = 0; } } - ((unsigned char *)save)[0] = 0; inlen = 2 - (inptr - inend); - *state = already; } - - //d(printf ("state = %d, inlen = %d\n", (int)((char *)save)[0], inlen)); - - if (inlen > 0) { - register char *saveout; - - /* points to the slot for the next char to save */ - saveout = & (((char *)save)[1]) + ((char *)save)[0]; - - /* inlen can only be 0 1 or 2 */ - switch (inlen) { - case 2: *saveout++ = *inptr++; - case 1: *saveout++ = *inptr++; + + if (inlen == 2) { + int c1 = *inptr++; + int c2 = *inptr++; + outptr[0] = abc[c1 >> 2]; + outptr[1] = abc[c2 >> 4 | ((c1 & 0x3) << 4)]; + outptr[2] = abc[(c2 & 0x0f) << 2]; + if(options.pad) { + outptr[3] = '='; + outptr += 4; + } else { + outptr += 3; + } + } else if (inlen == 1) { + int c1 = *inptr++; + outptr[0] = abc[c1 >> 2]; + outptr[1] = abc[(c1 & 0x3) << 4]; + if(options.pad) { + outptr[2] = '='; + outptr[3] = '='; + outptr += 4; + } else { + outptr += 2; } - *(char *)save = *(char *)save+(char)inlen; } - - /*d(printf ("mode = %d\nc1 = %c\nc2 = %c\n", - (int)((char *)save)[0], - (int)((char *)save)[1], - (int)((char *)save)[2]));*/ - - return (outptr - out); -} -/** - * g_mime_utils_base64_encode_close: - * @in: input stream - * @inlen: length of the input - * @out: output string - * @state: holds the number of bits that are stored in @save - * @save: leftover bits that have not yet been encoded - * - * Base64 encodes the input stream to the output stream. Call this - * when finished encoding data with g_mime_utils_base64_encode_step to - * flush off the last little bit. - * - * Returns the number of bytes encoded. - **/ -static size_t g_mime_utils_base64_encode_close (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save) { - unsigned char *outptr = out; - int c1, c2; - - if (inlen > 0) - outptr += g_mime_utils_base64_encode_step (in, inlen, outptr, state, save); - - c1 = ((unsigned char *)save)[1]; - c2 = ((unsigned char *)save)[2]; - - switch (((unsigned char *)save)[0]) { - case 2: - outptr[2] = base64_alphabet [(c2 & 0x0f) << 2]; - goto skip; - case 1: - outptr[2] = '='; - skip: - outptr[0] = base64_alphabet [c1 >> 2]; - outptr[1] = base64_alphabet [c2 >> 4 | ((c1 & 0x3) << 4)]; - outptr[3] = '='; - outptr += 4; - break; - } - - *outptr++ = 0; - - *save = 0; - *state = 0; - - return (outptr - out); + *outptr='\0'; + return outptr - out; } static unsigned char gmime_base64_rank[256] = { 255,255,255,255,255,255,255,255,255,254,254,255,255,254,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 254,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63, - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255, 0,255,255, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,253,255,255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255, 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, @@ -188,138 +120,118 @@ static unsigned char gmime_base64_rank[2 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, }; -/** - * g_mime_utils_base64_decode_step: - * @in: input stream - * @inlen: max length of data to decode - * @out: output stream - * @state: holds the number of bits that are stored in @save - * @save: leftover bits that have not yet been decoded - * @strict: only base64 and whitespace chars are allowed - * - * Decodes a chunk of base64 encoded data. - * - * Returns the number of bytes decoded (which have been dumped in @out). - **/ -size_t g_mime_utils_base64_decode_step(const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save, bool strict=false) { - const unsigned char *inptr; - unsigned char *outptr; - const unsigned char *inend; - int saved; - unsigned char c; - int i; - - inend = in + inlen; - outptr = out; +static unsigned char gmime_base64_rank_url_safe[256] = { + 255,255,255,255,255,255,255,255,255,254,254,255,255,254,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 254,255,255,255,255,255,255,255,255,255,255,255,255, 62,255,255, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,253,255,255, + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255, 63, + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +}; + +size_t pa_base64_decode(const unsigned char *in, size_t inlen, unsigned char *out, Base64Options options) { + const unsigned char *inptr = in; + unsigned char *outptr = out; + const unsigned char *inend = in + inlen; + + int saved = 0; + int state = 0; + unsigned char *abc_rank = options.abc == base64_alphabet ? gmime_base64_rank : gmime_base64_rank_url_safe; + /* convert 4 base64 bytes to 3 normal bytes */ - saved = *save; - i = *state; - inptr = in; while (inptr < inend) { - c = gmime_base64_rank[*inptr++]; + unsigned char c = abc_rank[*inptr++]; switch(c) { - case 0xff: // non-base64 and non-whitespace chars. not allowed in strict mode - if(strict) - throw Exception(BASE64_FORMAT, 0, "Invalid base64 char on position %d is detected", inptr-in-1); - case 0xfe: // whitespace chars 0x09, 0x0A, 0x0D, 0x20 are allowed in any mode + case 255: // non-base64 and non-whitespace chars. not allowed in strict mode + if(options.strict) + throw Exception(BASE64_FORMAT, 0, "Invalid base64 char on position %d is detected", inptr - in - 1); + case 254: // whitespace chars 0x09, 0x0A, 0x0D, 0x20 are allowed in any mode + break; + case 253: // = + if(state < 2) { + if(options.strict) + throw Exception(BASE64_FORMAT, 0, "Unexpected '=' on position %d is detected", inptr - in - 1); + break; + } + if(state == 2) { // double '=' + if(inptr == inend) { + if(options.strict) + throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars"); + break; + } + if(*inptr != '=') { + if(options.strict) + throw Exception(BASE64_FORMAT, 0, "Unexpected '=' on position %d is detected", inptr - in - 1); + break; + } + inptr++; + *outptr++ = (unsigned char)(saved >> 4); + } else { // single '=' + *outptr++ = (unsigned char)(saved >> 10); + *outptr++ = (unsigned char)(saved >> 2); + } + state = 0; break; default: saved = (saved << 6) | c; - i++; - if (i == 4) { + state++; + if (state == 4) { *outptr++ = (unsigned char)(saved >> 16); *outptr++ = (unsigned char)(saved >> 8); *outptr++ = (unsigned char)(saved); - i = 0; + state = 0; } } } - - *save = saved; - *state = i; - - /* quick scan back for '=' on the end somewhere */ - /* fortunately we can drop 1 output char for each trailing = (upto 2) */ - i = 2; - while (inptr > in && i) { - inptr--; - if (gmime_base64_rank[*inptr] <= 0xfe) { - if (*inptr == '=' && outptr > out) - outptr--; - i--; + + if(state > 0) { + if(state > 1) { + if(options.pad && options.strict) + throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars"); + if(state == 2) { + *outptr++ = (unsigned char)(saved >> 4); + } else { + *outptr++ = (unsigned char)(saved >> 10); + *outptr++ = (unsigned char)(saved >> 2); + } + } else { + if(options.strict) + throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars"); } } - - /* if i != 0 then there is a truncation error! */ - return (outptr - out); -} + *outptr='\0'; // for text files + return outptr - out; +} -char* pa_base64_encode(const char *in, size_t in_size) { +char* pa_base64_encode(const char *in, size_t in_size, Base64Options options) { size_t new_size = ((in_size / 3 + 1) * 4); - new_size += new_size / (BASE64_GROUPS_IN_LINE * 4)/*new lines*/ + 1/*zero terminator*/; - char* result = new(PointerFreeGC) char[new_size]; - int state=0; - int save=0; -#ifndef NDEBUG - size_t filled= -#endif - g_mime_utils_base64_encode_close ((const unsigned char*)in, in_size, (unsigned char*)result, &state, &save); + if (options.wrap) + new_size += new_size / (BASE64_GROUPS_IN_LINE * 4) /*new lines*/; + + char* result = new(PointerFreeGC) char[new_size + 1 /*zero terminator*/]; - //throw Exception(PARSER_RUNTIME, 0, "%d %d %d", in_size, new_size, filled); + size_t filled = pa_base64_encode((const unsigned char*)in, in_size, (unsigned char*)result, options); assert(filled <= new_size); return result; } -struct File_base64_action_info { - unsigned char** base64; -}; - -static void file_base64_file_action(struct stat& finfo, int f, const String& file_spec, void *context) { - - if(finfo.st_size) { - File_base64_action_info& info=*static_cast(context); - *info.base64=new(PointerFreeGC) unsigned char[check_file_size(finfo.st_size, file_spec) * 2 + 6]; - unsigned char* base64 = *info.base64; - int state=0; - int save=0; - int nCount; - do { - unsigned char buffer[FILE_BUFFER_SIZE]; - nCount = file_block_read(f, buffer, sizeof(buffer)); - if( nCount ){ - size_t filled=g_mime_utils_base64_encode_step ((const unsigned char*)buffer, nCount, base64, &state, &save); - base64+=filled; - } - } while(nCount > 0); - g_mime_utils_base64_encode_close (0, 0, base64, &state, &save); - } -} - -char* pa_base64_encode(const String& file_spec){ - unsigned char* base64=0; - File_base64_action_info info={&base64}; - - file_read_action_under_lock(file_spec, "pa_base64_encode", file_base64_file_action, &info); - - return (char*)base64; -} - -void pa_base64_decode(const char *in, size_t in_size, char*& result, size_t& result_size, bool strict) { +size_t pa_base64_decode(const char *in, size_t in_size, char*& result, Base64Options options) { // every 4 base64 bytes are converted into 3 normal bytes - // not full set (tail) of 4-bytes set is ignored - size_t new_size=in_size/4*3; - result=new(PointerFreeGC) char[new_size+1/*terminator*/]; - - int state=0; - int save=0; - result_size=g_mime_utils_base64_decode_step ((const unsigned char*)in, in_size, (unsigned char*)result, &state, &save, strict); - assert(result_size <= new_size); - result[result_size]=0; // for text files + size_t new_size = (in_size + 3) / 4 * 3; + result = new(PointerFreeGC) char[new_size + 1 /*terminator*/]; - if(strict && state!=0) - throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars"); + return pa_base64_decode((const unsigned char*)in, in_size, (unsigned char*)result, options); } -