--- parser3/src/main/pa_base64.C 2019/11/12 21:18:30 1.2 +++ parser3/src/main/pa_base64.C 2019/11/20 20:48:25 1.7 @@ -8,7 +8,7 @@ #include "pa_base64.h" #include "pa_common.h" -volatile const char * IDENT_PA_BASE64_C="$Id: pa_base64.C,v 1.2 2019/11/12 21:18:30 moko Exp $" IDENT_PA_BASE64_H; +volatile const char * IDENT_PA_BASE64_C="$Id: pa_base64.C,v 1.7 2019/11/20 20:48:25 moko Exp $" IDENT_PA_BASE64_H; /* * BASE64 part @@ -34,6 +34,13 @@ volatile const char * IDENT_PA_BASE64_C= */ static const char *base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const char *base64_alphabet_url_safe = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + +Base64Options::Base64Options(bool awrap): strict(false), wrap(awrap), pad(true), abc(base64_alphabet) {} + +void Base64Options::set_url_safe_abc() { + abc = base64_alphabet_url_safe; +} /** * g_mime_utils_base64_encode_step: @@ -173,7 +180,7 @@ static unsigned char gmime_base64_rank[2 255,255,255,255,255,255,255,255,255,254,254,255,255,254,255,255, 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, 254,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63, - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255, 0,255,255, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,253,255,255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255, 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, @@ -188,84 +195,113 @@ static unsigned char gmime_base64_rank[2 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, }; -/** - * g_mime_utils_base64_decode_step: - * @in: input stream - * @inlen: max length of data to decode - * @out: output stream - * @state: holds the number of bits that are stored in @save - * @save: leftover bits that have not yet been decoded - * @strict: only base64 and whitespace chars are allowed - * - * Decodes a chunk of base64 encoded data. - * - * Returns the number of bytes decoded (which have been dumped in @out). - **/ -size_t g_mime_utils_base64_decode_step(const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save, bool strict=false) { - const unsigned char *inptr; - unsigned char *outptr; - const unsigned char *inend; - int saved; - unsigned char c; - int i; - - inend = in + inlen; - outptr = out; +static unsigned char gmime_base64_rank_url_safe[256] = { + 255,255,255,255,255,255,255,255,255,254,254,255,255,254,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 254,255,255,255,255,255,255,255,255,255,255,255,255, 62,255,255, + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,253,255,255, + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255, 63, + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, + 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255, +}; + +size_t g_mime_utils_base64_decode(const unsigned char *in, size_t inlen, unsigned char *out, Base64Options options) { + const unsigned char *inptr = in; + unsigned char *outptr = out; + const unsigned char *inend = in + inlen; + + int saved = 0; + int state = 0; + unsigned char *abc_rank = options.abc == base64_alphabet ? gmime_base64_rank : gmime_base64_rank_url_safe; + /* convert 4 base64 bytes to 3 normal bytes */ - saved = *save; - i = *state; - inptr = in; while (inptr < inend) { - c = gmime_base64_rank[*inptr++]; + unsigned char c = abc_rank[*inptr++]; switch(c) { - case 0xff: // non-base64 and non-whitespace chars. not allowed in strict mode - if(strict) - throw Exception(BASE64_FORMAT, 0, "Invalid base64 char on position %d is detected", inptr-in-1); - case 0xfe: // whitespace chars 0x09, 0x0A, 0x0D, 0x20 are allowed in any mode + case 255: // non-base64 and non-whitespace chars. not allowed in strict mode + if(options.strict) + throw Exception(BASE64_FORMAT, 0, "Invalid base64 char on position %d is detected", inptr - in - 1); + case 254: // whitespace chars 0x09, 0x0A, 0x0D, 0x20 are allowed in any mode + break; + case 253: // = + if(state < 2) { + if(options.strict) + throw Exception(BASE64_FORMAT, 0, "Unexpected '=' on position %d is detected", inptr - in - 1); + break; + } + if(state == 2) { // double '=' + if(inptr == inend) { + if(options.strict) + throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars"); + break; + } + if(*inptr != '=') { + if(options.strict) + throw Exception(BASE64_FORMAT, 0, "Unexpected '=' on position %d is detected", inptr - in - 1); + break; + } + inptr++; + *outptr++ = (unsigned char)(saved >> 4); + } else { // single '=' + *outptr++ = (unsigned char)(saved >> 10); + *outptr++ = (unsigned char)(saved >> 2); + } + state = 0; break; default: saved = (saved << 6) | c; - i++; - if (i == 4) { + state++; + if (state == 4) { *outptr++ = (unsigned char)(saved >> 16); *outptr++ = (unsigned char)(saved >> 8); *outptr++ = (unsigned char)(saved); - i = 0; + state = 0; } } } - - *save = saved; - *state = i; - - /* quick scan back for '=' on the end somewhere */ - /* fortunately we can drop 1 output char for each trailing = (upto 2) */ - i = 2; - while (inptr > in && i) { - inptr--; - if (gmime_base64_rank[*inptr] <= 0xfe) { - if (*inptr == '=' && outptr > out) - outptr--; - i--; + + if(state > 0) { + if(state > 1) { + if(options.pad && options.strict) + throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars"); + if(state == 2) { + *outptr++ = (unsigned char)(saved >> 4); + } else { + *outptr++ = (unsigned char)(saved >> 10); + *outptr++ = (unsigned char)(saved >> 2); + } + } else { + if(options.strict) + throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars"); } } - - /* if i != 0 then there is a truncation error! */ + + *outptr='\0'; // for text files return (outptr - out); } +size_t pa_base64_size(size_t in_size, bool wrap){ + size_t new_size = ((in_size / 3 + 1) * 4) + 1 /*zero terminator*/; + if (wrap) new_size += new_size / (BASE64_GROUPS_IN_LINE * 4) /*new lines*/; + return new_size; +} -char* pa_base64_encode(const char *in, size_t in_size) { - size_t new_size = ((in_size / 3 + 1) * 4); - new_size += new_size / (BASE64_GROUPS_IN_LINE * 4)/*new lines*/ + 1/*zero terminator*/; +char* pa_base64_encode(const char *in, size_t in_size, Base64Options options) { + size_t new_size = pa_base64_size(in_size, options.wrap); char* result = new(PointerFreeGC) char[new_size]; - int state=0; - int save=0; -#ifndef NDEBUG - size_t filled= -#endif - g_mime_utils_base64_encode_close ((const unsigned char*)in, in_size, (unsigned char*)result, &state, &save); + int state = 0; + int save = 0; + size_t filled = g_mime_utils_base64_encode_close ((const unsigned char*)in, in_size, (unsigned char*)result, &state, &save); //throw Exception(PARSER_RUNTIME, 0, "%d %d %d", in_size, new_size, filled); assert(filled <= new_size); @@ -278,48 +314,38 @@ struct File_base64_action_info { }; static void file_base64_file_action(struct stat& finfo, int f, const String& file_spec, void *context) { - - if(finfo.st_size) { - File_base64_action_info& info=*static_cast(context); - *info.base64=new(PointerFreeGC) unsigned char[check_file_size(finfo.st_size, file_spec) * 2 + 6]; + if(finfo.st_size) { + File_base64_action_info& info = *static_cast(context); + *info.base64 = new(PointerFreeGC) unsigned char[pa_base64_size(check_file_size(finfo.st_size, file_spec), true)]; unsigned char* base64 = *info.base64; - int state=0; - int save=0; + int state = 0; + int save = 0; int nCount; do { unsigned char buffer[FILE_BUFFER_SIZE]; nCount = file_block_read(f, buffer, sizeof(buffer)); if( nCount ){ - size_t filled=g_mime_utils_base64_encode_step ((const unsigned char*)buffer, nCount, base64, &state, &save); - base64+=filled; + size_t filled = g_mime_utils_base64_encode_step ((const unsigned char*)buffer, nCount, base64, &state, &save); + base64 += filled; } } while(nCount > 0); g_mime_utils_base64_encode_close (0, 0, base64, &state, &save); } } -char* pa_base64_encode(const String& file_spec){ - unsigned char* base64=0; - File_base64_action_info info={&base64}; +char* pa_base64_encode(const String& file_spec, Base64Options options){ + unsigned char* base64 = 0; + File_base64_action_info info = { &base64 }; file_read_action_under_lock(file_spec, "pa_base64_encode", file_base64_file_action, &info); return (char*)base64; } -void pa_base64_decode(const char *in, size_t in_size, char*& result, size_t& result_size, bool strict) { +size_t pa_base64_decode(const char *in, size_t in_size, char*& result, Base64Options options) { // every 4 base64 bytes are converted into 3 normal bytes - // not full set (tail) of 4-bytes set is ignored - size_t new_size=in_size/4*3; - result=new(PointerFreeGC) char[new_size+1/*terminator*/]; - - int state=0; - int save=0; - result_size=g_mime_utils_base64_decode_step ((const unsigned char*)in, in_size, (unsigned char*)result, &state, &save, strict); - assert(result_size <= new_size); - result[result_size]=0; // for text files + size_t new_size = (in_size + 3) / 4 * 3; + result = new(PointerFreeGC) char[new_size + 1 /*terminator*/]; - if(strict && state!=0) - throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars"); + return g_mime_utils_base64_decode ((const unsigned char*)in, in_size, (unsigned char*)result, options); } -