Annotation of parser3/src/main/pa_base64.C, revision 1.5

1.1       moko        1: /**    @file
                      2:        Parser: base64 functions impl.
                      3: 
                      4:        Copyright (c) 2001-2017 Art. Lebedev Studio (http://www.artlebedev.com)
                      5:        Author: Alexandr Petrosian <paf@design.ru> (http://paf.design.ru)
                      6: */
                      7: 
                      8: #include "pa_base64.h"
                      9: #include "pa_common.h"
                     10: 
1.5     ! moko       11: volatile const char * IDENT_PA_BASE64_C="$Id: pa_base64.C,v 1.4 2019/11/14 23:15:39 moko Exp $" IDENT_PA_BASE64_H;
1.1       moko       12: 
                     13: /*
                     14:  * BASE64 part
                     15:  *  Authors: Michael Zucchi <notzed@ximian.com>
                     16:  *           Jeffrey Stedfast <fejj@ximian.com>
                     17:  *
                     18:  *  Copyright 2000-2004 Ximian, Inc. (www.ximian.com)
                     19:  *
                     20:  *  This program is free software; you can redistribute it and/or modify
                     21:  *  it under the terms of the GNU General Public License as published by
                     22:  *  the Free Software Foundation; either version 2 of the License, or
                     23:  *  (at your option) any later version.
                     24:  *
                     25:  *  This program is distributed in the hope that it will be useful,
                     26:  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
                     27:  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     28:  *  GNU General Public License for more details.
                     29:  *
                     30:  *  You should have received a copy of the GNU General Public License
                     31:  *  along with this program; if not, write to the Free Software
                     32:  *  Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
                     33:  *
                     34:  */
                     35: 
1.2       moko       36: static const char *base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1.3       moko       37: static const char *base64_alphabet_url_safe = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
                     38: 
1.4       moko       39: Base64Options::Base64Options(bool awrap): strict(false), wrap(awrap), pad(false), abc(base64_alphabet) {}
1.3       moko       40: 
                     41: void Base64Options::set_url_safe_abc() {
1.4       moko       42:        abc = base64_alphabet_url_safe;
1.3       moko       43: }
1.1       moko       44: 
                     45: /**
                     46:  * g_mime_utils_base64_encode_step:
                     47:  * @in: input stream
                     48:  * @inlen: length of the input
                     49:  * @out: output string
                     50:  * @state: holds the number of bits that are stored in @save
                     51:  * @save: leftover bits that have not yet been encoded
                     52:  *
                     53:  * Base64 encodes a chunk of data. Performs an 'encode step', only
                     54:  * encodes blocks of 3 characters to the output at a time, saves
                     55:  * left-over state in state and save (initialise to 0 on first
                     56:  * invocation).
                     57:  *
                     58:  * Returns the number of bytes encoded.
                     59:  **/
                     60: 
                     61: #define BASE64_GROUPS_IN_LINE 19
                     62: 
1.2       moko       63: static size_t g_mime_utils_base64_encode_step (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save) {
1.1       moko       64:        register const unsigned char *inptr;
                     65:        register unsigned char *outptr;
                     66:        
                     67:        if (inlen <= 0)
                     68:                return 0;
                     69:        
                     70:        inptr = in;
                     71:        outptr = out;
                     72:        
                     73:        if (inlen + ((unsigned char *)save)[0] > 2) {
                     74:                const unsigned char *inend = in + inlen - 2;
                     75:                register int c1 = 0, c2 = 0, c3 = 0;
                     76:                register int already;
                     77:                
                     78:                already = *state;
                     79:                
                     80:                switch (((char *)save)[0]) {
                     81:                case 1: c1 = ((unsigned char *)save)[1]; goto skip1;
                     82:                case 2: c1 = ((unsigned char *)save)[1];
                     83:                        c2 = ((unsigned char *)save)[2]; goto skip2;
                     84:                }
                     85:                
                     86:                /* yes, we jump into the loop, no i'm not going to change it, its beautiful! */
                     87:                while (inptr < inend) {
                     88:                        c1 = *inptr++;
                     89:                skip1:
                     90:                        c2 = *inptr++;
                     91:                skip2:
                     92:                        c3 = *inptr++;
                     93:                        *outptr++ = base64_alphabet [c1 >> 2];
                     94:                        *outptr++ = base64_alphabet [(c2 >> 4) | ((c1 & 0x3) << 4)];
                     95:                        *outptr++ = base64_alphabet [((c2 & 0x0f) << 2) | (c3 >> 6)];
                     96:                        *outptr++ = base64_alphabet [c3 & 0x3f];
                     97:                        /* this is a bit ugly ... */
                     98:                        if ((++already) >= BASE64_GROUPS_IN_LINE) {
                     99:                                *outptr++ = '\n';
                    100:                                already = 0;
                    101:                        }
                    102:                }
                    103:                
                    104:                ((unsigned char *)save)[0] = 0;
                    105:                inlen = 2 - (inptr - inend);
                    106:                *state = already;
                    107:        }
                    108:        
                    109:        //d(printf ("state = %d, inlen = %d\n", (int)((char *)save)[0], inlen));
                    110:        
                    111:        if (inlen > 0) {
                    112:                register char *saveout;
                    113:                
                    114:                /* points to the slot for the next char to save */
                    115:                saveout = & (((char *)save)[1]) + ((char *)save)[0];
                    116:                
                    117:                /* inlen can only be 0 1 or 2 */
                    118:                switch (inlen) {
                    119:                case 2: *saveout++ = *inptr++;
                    120:                case 1: *saveout++ = *inptr++;
                    121:                }
                    122:                *(char *)save = *(char *)save+(char)inlen;
                    123:        }
                    124:        
                    125:        /*d(printf ("mode = %d\nc1 = %c\nc2 = %c\n",
                    126:                  (int)((char *)save)[0],
                    127:                  (int)((char *)save)[1],
                    128:                  (int)((char *)save)[2]));*/
                    129:        
                    130:        return (outptr - out);
                    131: }
                    132: 
                    133: /**
                    134:  * g_mime_utils_base64_encode_close:
                    135:  * @in: input stream
                    136:  * @inlen: length of the input
                    137:  * @out: output string
                    138:  * @state: holds the number of bits that are stored in @save
                    139:  * @save: leftover bits that have not yet been encoded
                    140:  *
                    141:  * Base64 encodes the input stream to the output stream. Call this
                    142:  * when finished encoding data with g_mime_utils_base64_encode_step to
                    143:  * flush off the last little bit.
                    144:  *
                    145:  * Returns the number of bytes encoded.
                    146:  **/
1.2       moko      147: static size_t g_mime_utils_base64_encode_close (const unsigned char *in, size_t inlen, unsigned char *out, int *state, int *save) {
1.1       moko      148:        unsigned char *outptr = out;
                    149:        int c1, c2;
                    150:        
                    151:        if (inlen > 0)
                    152:                outptr += g_mime_utils_base64_encode_step (in, inlen, outptr, state, save);
                    153:        
                    154:        c1 = ((unsigned char *)save)[1];
                    155:        c2 = ((unsigned char *)save)[2];
                    156:        
                    157:        switch (((unsigned char *)save)[0]) {
                    158:        case 2:
                    159:                outptr[2] = base64_alphabet [(c2 & 0x0f) << 2];
                    160:                goto skip;
                    161:        case 1:
                    162:                outptr[2] = '=';
                    163:        skip:
                    164:                outptr[0] = base64_alphabet [c1 >> 2];
                    165:                outptr[1] = base64_alphabet [c2 >> 4 | ((c1 & 0x3) << 4)];
                    166:                outptr[3] = '=';
                    167:                outptr += 4;
                    168:                break;
                    169:        }
                    170:        
                    171:        *outptr++ = 0;
                    172:        
                    173:        *save = 0;
                    174:        *state = 0;
                    175:        
                    176:        return (outptr - out);
                    177: }
                    178: 
                    179: static unsigned char gmime_base64_rank[256] = {
                    180:        255,255,255,255,255,255,255,255,255,254,254,255,255,254,255,255,
                    181:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    182:        254,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
                    183:         52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,  0,255,255,
                    184:        255,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
                    185:         15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
                    186:        255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
                    187:         41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
                    188:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    189:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    190:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    191:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    192:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    193:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    194:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    195:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    196: };
                    197: 
1.5     ! moko      198: static unsigned char gmime_base64_rank_url_safe[256] = {
        !           199:        255,255,255,255,255,255,255,255,255,254,254,255,255,254,255,255,
        !           200:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
        !           201:        254,255,255,255,255,255,255,255,255,255,255,255,255, 62,255,255,
        !           202:         52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,  0,255,255,
        !           203:        255,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
        !           204:         15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255, 63,
        !           205:        255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
        !           206:         41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
        !           207:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
        !           208:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
        !           209:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
        !           210:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
        !           211:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
        !           212:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
        !           213:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
        !           214:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
        !           215: };
        !           216: 
1.1       moko      217: /**
                    218:  * g_mime_utils_base64_decode_step:
                    219:  * @in: input stream
                    220:  * @inlen: max length of data to decode
                    221:  * @out: output stream
                    222:  * @strict: only base64 and whitespace chars are allowed
                    223:  *
                    224:  * Decodes a chunk of base64 encoded data.
                    225:  *
                    226:  * Returns the number of bytes decoded (which have been dumped in @out).
                    227:  **/
1.5     ! moko      228: size_t g_mime_utils_base64_decode(const unsigned char *in, size_t inlen, unsigned char *out, Base64Options options) {
        !           229:        const unsigned char *inptr = in;
        !           230:        unsigned char *outptr = out;
        !           231:        const unsigned char *inend = in + inlen;
        !           232: 
        !           233:        int saved = 0;
        !           234:        int state = 0;
1.1       moko      235:        
1.5     ! moko      236:        unsigned char *abc_rank = options.abc == base64_alphabet ? gmime_base64_rank : gmime_base64_rank_url_safe;
        !           237: 
1.1       moko      238:        /* convert 4 base64 bytes to 3 normal bytes */
                    239:        while (inptr < inend) {
1.5     ! moko      240:                unsigned char c = abc_rank[*inptr++];
1.1       moko      241:                switch(c) {
                    242:                        case 0xff: // non-base64 and non-whitespace chars. not allowed in strict mode
1.5     ! moko      243:                                if(options.strict)
1.4       moko      244:                                        throw Exception(BASE64_FORMAT, 0, "Invalid base64 char on position %d is detected", inptr - in - 1);
1.1       moko      245:                        case 0xfe: // whitespace chars 0x09, 0x0A, 0x0D, 0x20 are allowed in any mode
                    246:                                break;
                    247:                        default:
                    248:                                saved = (saved << 6) | c;
1.5     ! moko      249:                                state++;
        !           250:                                if (state == 4) {
1.1       moko      251:                                        *outptr++ = (unsigned char)(saved >> 16);
                    252:                                        *outptr++ = (unsigned char)(saved >> 8);
                    253:                                        *outptr++ = (unsigned char)(saved);
1.5     ! moko      254:                                        state = 0;
1.1       moko      255:                                }
                    256:                }
                    257:        }
                    258:        
1.5     ! moko      259:        if(options.strict && state !=0 )
        !           260:                throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars");
        !           261: 
1.1       moko      262:        /* quick scan back for '=' on the end somewhere */
                    263:        /* fortunately we can drop 1 output char for each trailing = (upto 2) */
1.5     ! moko      264:        state = 2;
        !           265:        while (inptr > in && state) {
1.1       moko      266:                inptr--;
1.5     ! moko      267:                if (abc_rank[*inptr] <= 0xfe) {
1.1       moko      268:                        if (*inptr == '=' && outptr > out)
                    269:                                outptr--;
1.5     ! moko      270:                        state--;
1.1       moko      271:                }
                    272:        }
1.5     ! moko      273: 
        !           274:        /* if state != 0 then there is a truncation error! */
1.1       moko      275:        return (outptr - out);
                    276: }
                    277: 
1.4       moko      278: size_t pa_base64_size(size_t in_size, bool wrap){
                    279:        size_t new_size = ((in_size / 3 + 1) * 4) + 1 /*zero terminator*/;
                    280:        if (wrap) new_size += new_size / (BASE64_GROUPS_IN_LINE * 4) /*new lines*/;
                    281:        return new_size;
                    282: }
1.1       moko      283: 
1.3       moko      284: char* pa_base64_encode(const char *in, size_t in_size, Base64Options options) {
1.4       moko      285:        size_t new_size = pa_base64_size(in_size, options.wrap);
1.1       moko      286:        char* result = new(PointerFreeGC) char[new_size];
1.4       moko      287:        int state = 0;
                    288:        int save = 0;
                    289:        size_t filled = g_mime_utils_base64_encode_close ((const unsigned char*)in, in_size, (unsigned char*)result, &state, &save);
1.1       moko      290: 
                    291:        //throw Exception(PARSER_RUNTIME, 0, "%d %d %d", in_size, new_size, filled);
                    292:        assert(filled <= new_size);
                    293: 
                    294:        return result;
                    295: }
                    296: 
                    297: struct File_base64_action_info {
                    298:        unsigned char** base64;
                    299: }; 
                    300: 
                    301: static void file_base64_file_action(struct stat& finfo, int f, const String& file_spec, void *context) {
1.4       moko      302:        if(finfo.st_size) {
                    303:                File_base64_action_info& info = *static_cast<File_base64_action_info *>(context);
1.5     ! moko      304:                *info.base64 = new(PointerFreeGC) unsigned char[pa_base64_size(check_file_size(finfo.st_size, file_spec), true)]; 
1.1       moko      305:                unsigned char* base64 = *info.base64;
1.4       moko      306:                int state = 0;
                    307:                int save = 0;
1.1       moko      308:                int nCount;
                    309:                do {
                    310:                        unsigned char buffer[FILE_BUFFER_SIZE];
                    311:                        nCount = file_block_read(f, buffer, sizeof(buffer));
                    312:                        if( nCount ){
1.4       moko      313:                                size_t filled = g_mime_utils_base64_encode_step ((const unsigned char*)buffer, nCount, base64, &state, &save);
                    314:                                base64 += filled;
1.1       moko      315:                        }
                    316:                } while(nCount > 0);
                    317:                g_mime_utils_base64_encode_close (0, 0, base64, &state, &save);
                    318:        }
                    319: }
                    320: 
1.3       moko      321: char* pa_base64_encode(const String& file_spec, Base64Options options){
1.4       moko      322:        unsigned char* base64 = 0;
                    323:        File_base64_action_info info = { &base64 };
1.1       moko      324: 
                    325:        file_read_action_under_lock(file_spec, "pa_base64_encode", file_base64_file_action, &info);
                    326: 
                    327:        return (char*)base64; 
                    328: }
                    329: 
1.3       moko      330: void pa_base64_decode(const char *in, size_t in_size, char*& result, size_t& result_size, Base64Options options) {
1.1       moko      331:        // every 4 base64 bytes are converted into 3 normal bytes
                    332:        // not full set (tail) of 4-bytes set is ignored
1.4       moko      333:        size_t new_size = (in_size + 3) / 4 * 3;
                    334:        result = new(PointerFreeGC) char[new_size + 1 /*terminator*/];
1.1       moko      335: 
1.5     ! moko      336:        result_size = g_mime_utils_base64_decode ((const unsigned char*)in, in_size, (unsigned char*)result, options);
1.1       moko      337:        assert(result_size <= new_size);
1.4       moko      338:        result[result_size] = 0; // for text files
1.1       moko      339: 
                    340: }
                    341: 

E-mail: