// base64codec.cpp
//
// Copyright (C) 2004-2020 MicroNeil Research Corporation.
//
// This software is released under the MIT license. See LICENSE.TXT.

#include "base64codec.hpp"

namespace codedweller {

const static char base64encode[65] =                                            // Base64 encoding characters.
  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

// The following table makes conversion fast because it's all lookups. The
// special value XX64 is used everywhere a bad byte is found in the table.

const static unsigned char XXXX = 0xFF;                                         // Bad base64 character.
const static unsigned char PAD0 = 0xFE;                                         // Pad base64 character.
const static unsigned char IGNR = 0xFD;                                         // Ingoreable base64 character.
const static unsigned char STOP = 0xFC;                                         // STOP -- all done.

// Note the special case '=' is used for pad. It is given the value 0xFE.
// Also the IGNR case is any whitespace (Tab, CR, NL) that can be ignored.

// The input to this table is the incoming byte. The output is either XX64
// or a valid base64 numerical value.

const static unsigned char base64decode[256] = {

  // 0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F

  XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,IGNR,IGNR,XXXX,XXXX,IGNR,XXXX,XXXX,  // 0
  XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,  // 1
  IGNR,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,0x3E,XXXX,XXXX,XXXX,0x3F,  // 2
  0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,XXXX,XXXX,XXXX,PAD0,XXXX,XXXX,  // 3
  XXXX,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,  // 4
  0x0F,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,XXXX,XXXX,XXXX,XXXX,XXXX,  // 5
  XXXX,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,  // 6
  0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,0x30,0x31,0x32,0x33,XXXX,XXXX,XXXX,XXXX,XXXX,  // 7
  XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,  // 8
  XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,  // 9
  XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,  // A
  XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,  // B
  XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,  // C
  XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,  // D
  XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,  // E
  XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX   // F
};

//// to_base64 /////////////////////////////////////////////////////////////////

void to_base64::convert(const unsigned char* bfr, const int len) {              // Converts from a char buffer.
    if(NULL == bfr || 0 >= len) {                                               // If there's NULL or no length
        BadConversion = true;                                                   // that was a bad conversion.
        return;                                                                 // lets get out of here.
    }
    int NewSize = (len / 3) * 4;                                                // Base64 takes 4 bytes for every 3;
    if(0 < len % 3) NewSize += 4;                                               // If there are more, add an other 4;
    reserve(NewSize);                                                           // Set aside enough memory for the job.
    int cursor = 0;                                                             // Starting at zero chunk it off.
    while(len > cursor) {

        // Chunk off 4 bytes into an unsigned int for conversion.

        enum EndGames {                                                         // Describe the end game for this
            OneByte,                                                            // chunk as containing either one,
            TwoBytes,                                                           // two,
            ThreeBytes                                                          // or three bytes.
        } EndGame;                                                              // We use this to code the end.

        // Byte 0

        unsigned long REGISTER = 0;                                             // Start with a clear register.
        REGISTER += bfr[cursor]; REGISTER <<= 8; ++cursor;                      // Load Byte 0.

        EndGame = OneByte;                                                      // We've added a byte.

        // Byte 1

        if(len > cursor) {                                                      // If we've got bytes left.
            REGISTER += bfr[cursor];                                            // load the next one and
            ++cursor;                                                           // move the cursor.

            EndGame = TwoBytes;                                                 // We're up to 2 bytes.
        }
        REGISTER <<= 8;                                                         // Shift to the next byte.

        // Byte 2

        if(len > cursor) {                                                      // If we've got bytes left.
            REGISTER += bfr[cursor];                                            // load the next one and
            ++cursor;                                                           // move the cursor.

            EndGame = ThreeBytes;                                               // That's a full house.

        }
        // No shift this time, the register is full ;-)

        // Now that we have 3 bytes and a characterization we can encode the
        // base64 bytes into our vector.

        const int SixBitMask = 0x0000003f;                                      // This is how far to shift.
        char code3 = base64encode[(REGISTER & SixBitMask)]; REGISTER >>= 6;     // Encode four characters for this
        char code2 = base64encode[(REGISTER & SixBitMask)]; REGISTER >>= 6;     // three bytes.
        char code1 = base64encode[(REGISTER & SixBitMask)]; REGISTER >>= 6;
        char code0 = base64encode[(REGISTER & SixBitMask)];

        push_back(code0);                                                       // Push the first 2 encoded bytes onto
        push_back(code1);                                                       // the vector in the original order.

        switch(EndGame) {                                                       // Now handle the end game.
            case OneByte: {                                                     // If the end contains one valid byte
                push_back('=');                                                 // push back two = to indicate that
                push_back('=');                                                 // the last two bytes are padding.
                break;
            }
            case TwoBytes: {                                                    // If the end contains two valid bytes
                push_back(code2);                                               // push back one more code byte and
                push_back('=');                                                 // push back only one = indicating one
                break;                                                          // byte of padding.
            }
            case ThreeBytes:                                                    // If we had the full three bytes to
            default: {                                                          // work with then we have no padding.
                push_back(code2);                                               // Push back the remaining two
                push_back(code3);                                               // code bytes to capture the full
            break;                                                              // encoding. This also works
            }                                                                   // in the middle of the input.
        }                                                                       // That's it for the end game.
    }                                                                           // That's it for this chunk.
    BadConversion = false;                                                      // If we get here we've done good.
}

to_base64::to_base64(const std::vector<unsigned char>& bfr) :                   // Converts from a base64buffer.
  BadConversion(true) {                                                         // No conversion yet ;-)
    convert(&bfr[0], bfr.size());                                               // Recast the pointer and do it.
}

to_base64::to_base64(const std::vector<char>& bfr) :                            // Converts from a base64codec buffer.
  BadConversion(true) {                                                         // No conversion yet ;-)
    convert(reinterpret_cast<const unsigned char*>(&bfr[0]), bfr.size());       // Do this to get it done.
}

to_base64::to_base64(const unsigned char* bfr, const int len) :                 // Converts from a uchar buffer.
  BadConversion(true) {                                                         // No conversion yet ;-)
    convert(bfr, len);                                                          // Do this to get it done.
}


to_base64::to_base64(const char* bfr, const int len) :                          // Converts from a char buffer.
  BadConversion(true) {                                                         // No conversion yet ;-)
    convert(reinterpret_cast<const unsigned char*>(bfr), len);                  // Do this to get it done.
}

to_base64::to_base64(const std::string& s) :                                    // Converts from a c++ string.
  BadConversion(true) {                                                         // No conversion yet ;-)
    convert(reinterpret_cast<const unsigned char*>(s.c_str()), s.length());     // Do this to get it done.
}

to_base64::to_base64(const char* s) :                                           // Converts from a c string.
  BadConversion(true) {                                                         // No conversion yet ;-)
    convert(reinterpret_cast<const unsigned char*>(s), strlen(s));              // Do this to get it done.
}

bool to_base64::Bad() {                                                         // Look at the flag.
    return BadConversion;
}

//// from_base64 ///////////////////////////////////////////////////////////////

unsigned char from_base64::NextSixBits(                                         // Get the next base64 byte.
  int& cursor,
  const unsigned char* bfr,
  const int len) {

    while(len > cursor) {                                                       // Prepare to eat IGNR chars.
        unsigned char c = base64decode[bfr[cursor]];                            // Get the next 6 bits.
        ++cursor;                                                               // Move the cursor for next time.
        if(IGNR == c) continue;                                                 // If we should ignore it, eat.
        if(XXXX == c) return c;                                                 // If it's bad, return it.
        return c;                                                               // If it's ordinary return it.
    }                                                                           // If we run out of bytes
    return STOP;                                                                // return STOP
}

//// Since the BadConversion flag is set on construction, if we bail out
//// of the convert() for any reason then the conversion will be bad.

void from_base64::convert(const unsigned char* bfr, const int len) {            // Converts bfr from base64 to plaintext.
    if(NULL == bfr || 0 >= len) { return; }                                     // If there's nothing to do return bad.

    // Estimate our conversion buffer size.

    int NewSize = len / 4 * 3;                                                  // Four bytes of base64 could be 3 bytes.
    reserve(NewSize);                                                           // Reserve that much space for speed.

    // Start the conversion process.

    int cursor = 0;
    while(len > cursor) {                                                       // Go through the buffer and convert.

        int REGISTER = 0;                                                       // We will use these to convert as we
        unsigned char LOOKUP = 0;                                               // go through the data.

        // First two base64 bytes

        const int MakeRoomFor6Bits = 6;
        LOOKUP = NextSixBits(cursor, bfr, len);                                 // Grab the next six bits.
        if(STOP == LOOKUP) { break; }                                           // If we ran out here it's ok.
        if(XXXX == LOOKUP) { return; }                                          // If the byte is bad bail out!
        REGISTER += LOOKUP; REGISTER <<= MakeRoomFor6Bits;                      // Shift that one into place.

        LOOKUP = NextSixBits(cursor, bfr, len);                                 // Grab the next six bits.
        if(XXXX == LOOKUP || STOP == LOOKUP) { return; }                        // If bad or empty here bail out!
        REGISTER += LOOKUP;                                                     // Load in the six bits.

        // Now we have 12 bits so we can grab our first byte.

        const int GetMS8OutOf12Bits = 4;
        const int BottomFourBits = 0x0000000F;
        push_back(REGISTER >> GetMS8OutOf12Bits);                               // Push back the converted byte.
        REGISTER = (REGISTER & BottomFourBits) << MakeRoomFor6Bits;             // Make room for the next 6 bits.

        // Grab the next 6 bits.

        LOOKUP = NextSixBits(cursor, bfr, len);                                 // Grab the next six bits.
        if(XXXX == LOOKUP || STOP == LOOKUP) { return; }                        // If bad or empty here bail out!
        if(PAD0 == LOOKUP) { break; }                                           // If we've come to a pad we're done!
        REGISTER += LOOKUP;                                                     // Load in the six bits.

        // Now we have 10 bits so we can grab our Second byte.

        const int GetMS8OutOf10Bits = 2;
        const int BottomTwoBits = 0x00000003;
        push_back(REGISTER >> GetMS8OutOf10Bits);                               // Push back the converted byte.
        REGISTER = (REGISTER & BottomTwoBits) << MakeRoomFor6Bits;              // Make room for the next 6 bits.

        LOOKUP = NextSixBits(cursor, bfr, len);                                 // Grab the final six bits.
        if(XXXX == LOOKUP || STOP == LOOKUP) { return; }                        // If bad or empty here bail out!
        if(PAD0 == LOOKUP) { break; }                                           // If we've come to a pad we're done!
        REGISTER += LOOKUP;                                                     // Load in the six bits.

        // Now we should have our final 8 bits :-)
        push_back(REGISTER);                                                    // push back the converted byte.
    }
    BadConversion = false;                                                      // If we get here we did ok.
}

from_base64::from_base64(const std::vector<unsigned char>& bfr) :               // Converts from a base64buffer.
  BadConversion(true) {                                                         // It's bad until we've done it.
    convert(&bfr[0], bfr.size());                                               // Recast the pointer and do it.
}

from_base64::from_base64(const std::vector<char>& bfr) :                        // Converts from a buffer.
  BadConversion(true) {                                                         // It's bad until we've done it.
    convert(reinterpret_cast<const unsigned char*>(&bfr[0]), bfr.size());       // This is how we do it.
}

from_base64::from_base64(const std::string& s) :                                // Converts from a c++ string.
  BadConversion(true) {                                                         // It's bad until we've done it.
    convert(reinterpret_cast<const unsigned char*>(s.c_str()), s.length());     // This is how we do it.
}

from_base64::from_base64(const char* s) :                                       // Converts from a c_string.
  BadConversion(true) {                                                         // It's bad until we've done it.
    convert(reinterpret_cast<const unsigned char*>(s), strlen(s));              // This is how we do it.
}

bool from_base64::Bad() {                                                       // Look at the flag.
    return BadConversion;
}

} // end namespace codedweller