Annotation of parser3/src/main/pa_base64.C, revision 1.15

1.1       moko        1: /**    @file
                      2:        Parser: base64 functions impl.
                      3: 
1.15    ! moko        4:        Copyright (c) 2001-2026 Art. Lebedev Studio (https://www.artlebedev.com)
1.12      moko        5:        Authors: Konstantin Morshnev <moko@design.ru>, Alexandr Petrosian <paf@design.ru>
1.1       moko        6: */
                      7: 
                      8: #include "pa_base64.h"
                      9: #include "pa_common.h"
                     10: 
1.15    ! moko       11: volatile const char * IDENT_PA_BASE64_C="$Id: pa_base64.C,v 1.14 2024/12/23 16:59:17 moko Exp $" IDENT_PA_BASE64_H;
1.1       moko       12: 
                     13: /*
1.9       moko       14:  *  BASE64 part inspired by g_mime_utils
1.1       moko       15:  *  Authors: Michael Zucchi <notzed@ximian.com>
                     16:  *           Jeffrey Stedfast <fejj@ximian.com>
                     17:  *
                     18:  *  Copyright 2000-2004 Ximian, Inc. (www.ximian.com)
                     19:  *
                     20:  *  This program is free software; you can redistribute it and/or modify
                     21:  *  it under the terms of the GNU General Public License as published by
                     22:  *  the Free Software Foundation; either version 2 of the License, or
                     23:  *  (at your option) any later version.
                     24:  *
                     25:  *  This program is distributed in the hope that it will be useful,
                     26:  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
                     27:  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     28:  *  GNU General Public License for more details.
                     29:  *
                     30:  *  You should have received a copy of the GNU General Public License
                     31:  *  along with this program; if not, write to the Free Software
                     32:  *  Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
                     33:  *
                     34:  */
                     35: 
1.2       moko       36: static const char *base64_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1.3       moko       37: static const char *base64_alphabet_url_safe = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
                     38: 
1.6       moko       39: Base64Options::Base64Options(bool awrap): strict(false), wrap(awrap), pad(true), abc(base64_alphabet) {}
1.3       moko       40: 
                     41: void Base64Options::set_url_safe_abc() {
1.4       moko       42:        abc = base64_alphabet_url_safe;
1.3       moko       43: }
1.1       moko       44: 
1.9       moko       45: #define BASE64_GROUPS_IN_LINE 19
                     46: 
1.10      moko       47: static size_t pa_base64_encode(const uchar *in, size_t inlen, uchar *out, Base64Options options) {
                     48:        const uchar *inptr = in;
                     49:        uchar *outptr = out;
1.1       moko       50: 
1.10      moko       51:        const uchar *abc = (const uchar *)options.abc;
1.1       moko       52: 
1.9       moko       53:        if (inlen > 2) {
1.10      moko       54:                const uchar *inend = in + inlen - 2;
1.9       moko       55:                int already=0;
1.1       moko       56:                
                     57:                while (inptr < inend) {
1.9       moko       58:                        int c1 = *inptr++;
                     59:                        int c2 = *inptr++;
                     60:                        int c3 = *inptr++;
                     61:                        *outptr++ = abc[c1 >> 2];
                     62:                        *outptr++ = abc[(c2 >> 4) | ((c1 & 0x3) << 4)];
                     63:                        *outptr++ = abc[((c2 & 0x0f) << 2) | (c3 >> 6)];
                     64:                        *outptr++ = abc[c3 & 0x3f];
                     65: 
                     66:                        if ((++already) >= BASE64_GROUPS_IN_LINE && options.wrap) {
1.1       moko       67:                                *outptr++ = '\n';
                     68:                                already = 0;
                     69:                        }
                     70:                }
                     71:                
                     72:                inlen = 2 - (inptr - inend);
                     73:        }
1.9       moko       74: 
                     75:        if (inlen == 2) {
                     76:                int c1 = *inptr++;
                     77:                int c2 = *inptr++;
                     78:                outptr[0] = abc[c1 >> 2];
                     79:                outptr[1] = abc[c2 >> 4 | ((c1 & 0x3) << 4)];
                     80:                outptr[2] = abc[(c2 & 0x0f) << 2];
                     81:                if(options.pad) {
                     82:                        outptr[3] = '=';
                     83:                        outptr += 4;
                     84:                } else {
                     85:                        outptr += 3;
                     86:                }
                     87:        } else if (inlen == 1) {
                     88:                int c1 = *inptr++;
                     89:                outptr[0] = abc[c1 >> 2];
                     90:                outptr[1] = abc[(c1 & 0x3) << 4];
                     91:                if(options.pad) {
                     92:                        outptr[2] = '=';
                     93:                        outptr[3] = '=';
                     94:                        outptr += 4;
                     95:                } else {
                     96:                        outptr += 2;
1.1       moko       97:                }
                     98:        }
                     99: 
1.9       moko      100:        *outptr='\0';
                    101:        return outptr - out;
1.1       moko      102: }
                    103: 
1.10      moko      104: static uchar gmime_base64_rank[256] = {
1.1       moko      105:        255,255,255,255,255,255,255,255,255,254,254,255,255,254,255,255,
                    106:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    107:        254,255,255,255,255,255,255,255,255,255,255, 62,255,255,255, 63,
1.6       moko      108:         52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,253,255,255,
1.1       moko      109:        255,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
                    110:         15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255,255,
                    111:        255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
                    112:         41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
                    113:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    114:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    115:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    116:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    117:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    118:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    119:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    120:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    121: };
                    122: 
1.10      moko      123: static uchar gmime_base64_rank_url_safe[256] = {
1.5       moko      124:        255,255,255,255,255,255,255,255,255,254,254,255,255,254,255,255,
                    125:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    126:        254,255,255,255,255,255,255,255,255,255,255,255,255, 62,255,255,
1.6       moko      127:         52, 53, 54, 55, 56, 57, 58, 59, 60, 61,255,255,255,253,255,255,
1.5       moko      128:        255,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
                    129:         15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,255,255,255,255, 63,
                    130:        255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
                    131:         41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,255,255,255,255,255,
                    132:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    133:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    134:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    135:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    136:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    137:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    138:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    139:        255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
                    140: };
                    141: 
1.10      moko      142: size_t pa_base64_decode(const uchar *in, size_t inlen, uchar *out, Base64Options options) {
                    143:        const uchar *inptr = in;
                    144:        uchar *outptr = out;
                    145:        const uchar *inend = in + inlen;
1.5       moko      146: 
                    147:        int saved = 0;
                    148:        int state = 0;
1.1       moko      149:        
1.10      moko      150:        uchar *abc_rank = options.abc == base64_alphabet ? gmime_base64_rank : gmime_base64_rank_url_safe;
1.5       moko      151: 
1.1       moko      152:        /* convert 4 base64 bytes to 3 normal bytes */
                    153:        while (inptr < inend) {
1.10      moko      154:                uchar c = abc_rank[*inptr++];
1.1       moko      155:                switch(c) {
1.6       moko      156:                        case 255: // non-base64 and non-whitespace chars. not allowed in strict mode
1.5       moko      157:                                if(options.strict)
1.4       moko      158:                                        throw Exception(BASE64_FORMAT, 0, "Invalid base64 char on position %d is detected", inptr - in - 1);
1.6       moko      159:                        case 254: // whitespace chars 0x09, 0x0A, 0x0D, 0x20 are allowed in any mode
                    160:                                break;
                    161:                        case 253: // =
                    162:                                if(state < 2) {
                    163:                                        if(options.strict)
                    164:                                                throw Exception(BASE64_FORMAT, 0, "Unexpected '=' on position %d is detected", inptr - in - 1);
                    165:                                        break;
                    166:                                }
                    167:                                if(state == 2) { // double '='
                    168:                                        if(inptr == inend) {
                    169:                                                if(options.strict)
                    170:                                                        throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars");
                    171:                                                break;
                    172:                                        }
                    173:                                        if(*inptr != '=') {
                    174:                                                if(options.strict)
                    175:                                                        throw Exception(BASE64_FORMAT, 0, "Unexpected '=' on position %d is detected", inptr - in - 1);
                    176:                                                break;
                    177:                                        }
                    178:                                        inptr++;
1.10      moko      179:                                        *outptr++ = (uchar)(saved >> 4);
1.6       moko      180:                                } else { // single '='
1.10      moko      181:                                        *outptr++ = (uchar)(saved >> 10);
                    182:                                        *outptr++ = (uchar)(saved >> 2);
1.6       moko      183:                                }
                    184:                                state = 0;
1.1       moko      185:                                break;
                    186:                        default:
                    187:                                saved = (saved << 6) | c;
1.5       moko      188:                                state++;
                    189:                                if (state == 4) {
1.10      moko      190:                                        *outptr++ = (uchar)(saved >> 16);
                    191:                                        *outptr++ = (uchar)(saved >> 8);
                    192:                                        *outptr++ = (uchar)(saved);
1.5       moko      193:                                        state = 0;
1.1       moko      194:                                }
                    195:                }
                    196:        }
1.5       moko      197: 
1.6       moko      198:        if(state > 0) {
                    199:                if(state > 1) {
                    200:                        if(options.pad && options.strict)
                    201:                                throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars");
                    202:                        if(state == 2) {
1.10      moko      203:                                *outptr++ = (uchar)(saved >> 4);
1.6       moko      204:                        } else {
1.10      moko      205:                                *outptr++ = (uchar)(saved >> 10);
                    206:                                *outptr++ = (uchar)(saved >> 2);
1.6       moko      207:                        }
                    208:                } else {
                    209:                        if(options.strict)
                    210:                                throw Exception(BASE64_FORMAT, 0, "Unexpected end of chars");
1.1       moko      211:                }
                    212:        }
1.5       moko      213: 
1.7       moko      214:        *outptr='\0';  // for text files
1.9       moko      215:        return outptr - out;
1.1       moko      216: }
                    217: 
1.8       moko      218: char* pa_base64_encode(const char *in, size_t in_size, Base64Options options) {
1.9       moko      219:        size_t new_size = ((in_size / 3 + 1) * 4);
                    220:        if (options.wrap)
                    221:                new_size += new_size / (BASE64_GROUPS_IN_LINE * 4) /*new lines*/;
1.1       moko      222: 
1.9       moko      223:        char* result = new(PointerFreeGC) char[new_size + 1 /*zero terminator*/];
1.1       moko      224: 
1.14      moko      225:        PA_UNUSED size_t filled = pa_base64_encode((const uchar*)in, in_size, (uchar*)result, options);
1.1       moko      226:        assert(filled <= new_size);
                    227: 
                    228:        return result;
                    229: }
                    230: 
1.7       moko      231: size_t pa_base64_decode(const char *in, size_t in_size, char*& result, Base64Options options) {
1.1       moko      232:        // every 4 base64 bytes are converted into 3 normal bytes
1.4       moko      233:        size_t new_size = (in_size + 3) / 4 * 3;
                    234:        result = new(PointerFreeGC) char[new_size + 1 /*terminator*/];
1.1       moko      235: 
1.10      moko      236:        return pa_base64_decode((const uchar*)in, in_size, (uchar*)result, options);
1.1       moko      237: }

E-mail: