You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

base64codec.cpp 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272
  1. // base64codec.cpp
  2. //
  3. // Copyright (C) 2004-2020 MicroNeil Research Corporation.
  4. //
  5. // This software is released under the MIT license. See LICENSE.TXT.
  6. #include "base64codec.hpp"
  7. namespace codedweller {
  8. const static char base64encode[65] = // Base64 encoding characters.
  9. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  10. // The following table makes conversion fast because it's all lookups. The
  11. // special value XX64 is used everywhere a bad byte is found in the table.
  12. const static unsigned char XXXX = 0xFF; // Bad base64 character.
  13. const static unsigned char PAD0 = 0xFE; // Pad base64 character.
  14. const static unsigned char IGNR = 0xFD; // Ingoreable base64 character.
  15. const static unsigned char STOP = 0xFC; // STOP -- all done.
  16. // Note the special case '=' is used for pad. It is given the value 0xFE.
  17. // Also the IGNR case is any whitespace (Tab, CR, NL) that can be ignored.
  18. // The input to this table is the incoming byte. The output is either XX64
  19. // or a valid base64 numerical value.
  20. const static unsigned char base64decode[256] = {
  21. // 0 1 2 3 4 5 6 7 8 9 A B C D E F
  22. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,IGNR,IGNR,XXXX,XXXX,IGNR,XXXX,XXXX, // 0
  23. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // 1
  24. IGNR,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,0x3E,XXXX,XXXX,XXXX,0x3F, // 2
  25. 0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,XXXX,XXXX,XXXX,PAD0,XXXX,XXXX, // 3
  26. XXXX,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E, // 4
  27. 0x0F,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,XXXX,XXXX,XXXX,XXXX,XXXX, // 5
  28. XXXX,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28, // 6
  29. 0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,0x30,0x31,0x32,0x33,XXXX,XXXX,XXXX,XXXX,XXXX, // 7
  30. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // 8
  31. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // 9
  32. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // A
  33. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // B
  34. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // C
  35. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // D
  36. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // E
  37. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX // F
  38. };
  39. //// to_base64 /////////////////////////////////////////////////////////////////
  40. void to_base64::convert(const unsigned char* bfr, const int len) { // Converts from a char buffer.
  41. if(NULL == bfr || 0 >= len) { // If there's NULL or no length
  42. BadConversion = true; // that was a bad conversion.
  43. return; // lets get out of here.
  44. }
  45. int NewSize = (len / 3) * 4; // Base64 takes 4 bytes for every 3;
  46. if(0 < len % 3) NewSize += 4; // If there are more, add an other 4;
  47. reserve(NewSize); // Set aside enough memory for the job.
  48. int cursor = 0; // Starting at zero chunk it off.
  49. while(len > cursor) {
  50. // Chunk off 4 bytes into an unsigned int for conversion.
  51. enum EndGames { // Describe the end game for this
  52. OneByte, // chunk as containing either one,
  53. TwoBytes, // two,
  54. ThreeBytes // or three bytes.
  55. } EndGame; // We use this to code the end.
  56. // Byte 0
  57. unsigned long REGISTER = 0; // Start with a clear register.
  58. REGISTER += bfr[cursor]; REGISTER <<= 8; ++cursor; // Load Byte 0.
  59. EndGame = OneByte; // We've added a byte.
  60. // Byte 1
  61. if(len > cursor) { // If we've got bytes left.
  62. REGISTER += bfr[cursor]; // load the next one and
  63. ++cursor; // move the cursor.
  64. EndGame = TwoBytes; // We're up to 2 bytes.
  65. }
  66. REGISTER <<= 8; // Shift to the next byte.
  67. // Byte 2
  68. if(len > cursor) { // If we've got bytes left.
  69. REGISTER += bfr[cursor]; // load the next one and
  70. ++cursor; // move the cursor.
  71. EndGame = ThreeBytes; // That's a full house.
  72. }
  73. // No shift this time, the register is full ;-)
  74. // Now that we have 3 bytes and a characterization we can encode the
  75. // base64 bytes into our vector.
  76. const int SixBitMask = 0x0000003f; // This is how far to shift.
  77. char code3 = base64encode[(REGISTER & SixBitMask)]; REGISTER >>= 6; // Encode four characters for this
  78. char code2 = base64encode[(REGISTER & SixBitMask)]; REGISTER >>= 6; // three bytes.
  79. char code1 = base64encode[(REGISTER & SixBitMask)]; REGISTER >>= 6;
  80. char code0 = base64encode[(REGISTER & SixBitMask)];
  81. push_back(code0); // Push the first 2 encoded bytes onto
  82. push_back(code1); // the vector in the original order.
  83. switch(EndGame) { // Now handle the end game.
  84. case OneByte: { // If the end contains one valid byte
  85. push_back('='); // push back two = to indicate that
  86. push_back('='); // the last two bytes are padding.
  87. break;
  88. }
  89. case TwoBytes: { // If the end contains two valid bytes
  90. push_back(code2); // push back one more code byte and
  91. push_back('='); // push back only one = indicating one
  92. break; // byte of padding.
  93. }
  94. case ThreeBytes: // If we had the full three bytes to
  95. default: { // work with then we have no padding.
  96. push_back(code2); // Push back the remaining two
  97. push_back(code3); // code bytes to capture the full
  98. break; // encoding. This also works
  99. } // in the middle of the input.
  100. } // That's it for the end game.
  101. } // That's it for this chunk.
  102. BadConversion = false; // If we get here we've done good.
  103. }
  104. to_base64::to_base64(const std::vector<unsigned char>& bfr) : // Converts from a base64buffer.
  105. BadConversion(true) { // No conversion yet ;-)
  106. convert(&bfr[0], bfr.size()); // Recast the pointer and do it.
  107. }
  108. to_base64::to_base64(const std::vector<char>& bfr) : // Converts from a base64codec buffer.
  109. BadConversion(true) { // No conversion yet ;-)
  110. convert(reinterpret_cast<const unsigned char*>(&bfr[0]), bfr.size()); // Do this to get it done.
  111. }
  112. to_base64::to_base64(const unsigned char* bfr, const int len) : // Converts from a uchar buffer.
  113. BadConversion(true) { // No conversion yet ;-)
  114. convert(bfr, len); // Do this to get it done.
  115. }
  116. to_base64::to_base64(const char* bfr, const int len) : // Converts from a char buffer.
  117. BadConversion(true) { // No conversion yet ;-)
  118. convert(reinterpret_cast<const unsigned char*>(bfr), len); // Do this to get it done.
  119. }
  120. to_base64::to_base64(const std::string& s) : // Converts from a c++ string.
  121. BadConversion(true) { // No conversion yet ;-)
  122. convert(reinterpret_cast<const unsigned char*>(s.c_str()), s.length()); // Do this to get it done.
  123. }
  124. to_base64::to_base64(const char* s) : // Converts from a c string.
  125. BadConversion(true) { // No conversion yet ;-)
  126. convert(reinterpret_cast<const unsigned char*>(s), strlen(s)); // Do this to get it done.
  127. }
  128. bool to_base64::Bad() { // Look at the flag.
  129. return BadConversion;
  130. }
  131. //// from_base64 ///////////////////////////////////////////////////////////////
  132. unsigned char from_base64::NextSixBits( // Get the next base64 byte.
  133. int& cursor,
  134. const unsigned char* bfr,
  135. const int len) {
  136. while(len > cursor) { // Prepare to eat IGNR chars.
  137. unsigned char c = base64decode[bfr[cursor]]; // Get the next 6 bits.
  138. ++cursor; // Move the cursor for next time.
  139. if(IGNR == c) continue; // If we should ignore it, eat.
  140. if(XXXX == c) return c; // If it's bad, return it.
  141. return c; // If it's ordinary return it.
  142. } // If we run out of bytes
  143. return STOP; // return STOP
  144. }
  145. //// Since the BadConversion flag is set on construction, if we bail out
  146. //// of the convert() for any reason then the conversion will be bad.
  147. void from_base64::convert(const unsigned char* bfr, const int len) { // Converts bfr from base64 to plaintext.
  148. if(NULL == bfr || 0 >= len) { return; } // If there's nothing to do return bad.
  149. // Estimate our conversion buffer size.
  150. int NewSize = len / 4 * 3; // Four bytes of base64 could be 3 bytes.
  151. reserve(NewSize); // Reserve that much space for speed.
  152. // Start the conversion process.
  153. int cursor = 0;
  154. while(len > cursor) { // Go through the buffer and convert.
  155. int REGISTER = 0; // We will use these to convert as we
  156. unsigned char LOOKUP = 0; // go through the data.
  157. // First two base64 bytes
  158. const int MakeRoomFor6Bits = 6;
  159. LOOKUP = NextSixBits(cursor, bfr, len); // Grab the next six bits.
  160. if(STOP == LOOKUP) { break; } // If we ran out here it's ok.
  161. if(XXXX == LOOKUP) { return; } // If the byte is bad bail out!
  162. REGISTER += LOOKUP; REGISTER <<= MakeRoomFor6Bits; // Shift that one into place.
  163. LOOKUP = NextSixBits(cursor, bfr, len); // Grab the next six bits.
  164. if(XXXX == LOOKUP || STOP == LOOKUP) { return; } // If bad or empty here bail out!
  165. REGISTER += LOOKUP; // Load in the six bits.
  166. // Now we have 12 bits so we can grab our first byte.
  167. const int GetMS8OutOf12Bits = 4;
  168. const int BottomFourBits = 0x0000000F;
  169. push_back(REGISTER >> GetMS8OutOf12Bits); // Push back the converted byte.
  170. REGISTER = (REGISTER & BottomFourBits) << MakeRoomFor6Bits; // Make room for the next 6 bits.
  171. // Grab the next 6 bits.
  172. LOOKUP = NextSixBits(cursor, bfr, len); // Grab the next six bits.
  173. if(XXXX == LOOKUP || STOP == LOOKUP) { return; } // If bad or empty here bail out!
  174. if(PAD0 == LOOKUP) { break; } // If we've come to a pad we're done!
  175. REGISTER += LOOKUP; // Load in the six bits.
  176. // Now we have 10 bits so we can grab our Second byte.
  177. const int GetMS8OutOf10Bits = 2;
  178. const int BottomTwoBits = 0x00000003;
  179. push_back(REGISTER >> GetMS8OutOf10Bits); // Push back the converted byte.
  180. REGISTER = (REGISTER & BottomTwoBits) << MakeRoomFor6Bits; // Make room for the next 6 bits.
  181. LOOKUP = NextSixBits(cursor, bfr, len); // Grab the final six bits.
  182. if(XXXX == LOOKUP || STOP == LOOKUP) { return; } // If bad or empty here bail out!
  183. if(PAD0 == LOOKUP) { break; } // If we've come to a pad we're done!
  184. REGISTER += LOOKUP; // Load in the six bits.
  185. // Now we should have our final 8 bits :-)
  186. push_back(REGISTER); // push back the converted byte.
  187. }
  188. BadConversion = false; // If we get here we did ok.
  189. }
  190. from_base64::from_base64(const std::vector<unsigned char>& bfr) : // Converts from a base64buffer.
  191. BadConversion(true) { // It's bad until we've done it.
  192. convert(&bfr[0], bfr.size()); // Recast the pointer and do it.
  193. }
  194. from_base64::from_base64(const std::vector<char>& bfr) : // Converts from a buffer.
  195. BadConversion(true) { // It's bad until we've done it.
  196. convert(reinterpret_cast<const unsigned char*>(&bfr[0]), bfr.size()); // This is how we do it.
  197. }
  198. from_base64::from_base64(const std::string& s) : // Converts from a c++ string.
  199. BadConversion(true) { // It's bad until we've done it.
  200. convert(reinterpret_cast<const unsigned char*>(s.c_str()), s.length()); // This is how we do it.
  201. }
  202. from_base64::from_base64(const char* s) : // Converts from a c_string.
  203. BadConversion(true) { // It's bad until we've done it.
  204. convert(reinterpret_cast<const unsigned char*>(s), strlen(s)); // This is how we do it.
  205. }
  206. bool from_base64::Bad() { // Look at the flag.
  207. return BadConversion;
  208. }
  209. } // end namespace codedweller