base64codec.cpp 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. // base64codec.cpp
  2. // Copyright (C) 2006 - 2009 MicroNeil Research Corporation
  3. // See base64codec.hpp
  4. //typedef vector<char> base64codec_buffer;
  5. //typedef vector<char>::iterator base64codec_iterator;
  6. #include "base64codec.hpp"
  7. using namespace std;
  8. namespace CodeDweller {
  9. namespace base64codec {
  10. const static char base64encode[65] = // Base64 encoding characters.
  11. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  12. // The following table makes conversion fast because it's all lookups. The
  13. // special value XX64 is used everywhere a bad byte is found in the table.
  14. const static unsigned char XXXX = 0xFF; // Bad base64 character.
  15. const static unsigned char PAD0 = 0xFE; // Pad base64 character.
  16. const static unsigned char IGNR = 0xFD; // Ingoreable base64 character.
  17. const static unsigned char STOP = 0xFC; // STOP -- all done.
  18. // Note the special case '=' is used for pad. It is given the value 0xFE.
  19. // Also the IGNR case is any whitespace (Tab, CR, NL) that can be ignored.
  20. // The input to this table is the incoming byte. The output is either XX64
  21. // or a valid base64 numerical value.
  22. const static unsigned char base64decode[256] = {
  23. // 0 1 2 3 4 5 6 7 8 9 A B C D E F
  24. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,IGNR,IGNR,XXXX,XXXX,IGNR,XXXX,XXXX, // 0
  25. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // 1
  26. IGNR,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,0x3E,XXXX,XXXX,XXXX,0x3F, // 2
  27. 0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,XXXX,XXXX,XXXX,PAD0,XXXX,XXXX, // 3
  28. XXXX,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E, // 4
  29. 0x0F,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,XXXX,XXXX,XXXX,XXXX,XXXX, // 5
  30. XXXX,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28, // 6
  31. 0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,0x30,0x31,0x32,0x33,XXXX,XXXX,XXXX,XXXX,XXXX, // 7
  32. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // 8
  33. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // 9
  34. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // A
  35. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // B
  36. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // C
  37. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // D
  38. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // E
  39. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX // F
  40. };
  41. } // End namespace base64codec
  42. using namespace base64codec;
  43. //// to_base64 /////////////////////////////////////////////////////////////////
  44. void to_base64::convert(const unsigned char* bfr, const int len) { // Converts from a char buffer.
  45. if(NULL == bfr || 0 >= len) { // If there's NULL or no length
  46. BadConversion = true; // that was a bad conversion.
  47. return; // lets get out of here.
  48. }
  49. int NewSize = (len / 3) * 4; // Base64 takes 4 bytes for every 3;
  50. if(0 < len % 3) NewSize += 4; // If there are more, add an other 4;
  51. reserve(NewSize); // Set aside enough memory for the job.
  52. int cursor = 0; // Starting at zero chunk it off.
  53. while(len > cursor) {
  54. // Chunk off 4 bytes into an unsigned int for conversion.
  55. enum EndGames { // Describe the end game for this
  56. OneByte, // chunk as containing either one,
  57. TwoBytes, // two,
  58. ThreeBytes // or three bytes.
  59. } EndGame; // We use this to code the end.
  60. // Byte 0
  61. unsigned long REGISTER = 0; // Start with a clear register.
  62. REGISTER += bfr[cursor]; REGISTER <<= 8; ++cursor; // Load Byte 0.
  63. EndGame = OneByte; // We've added a byte.
  64. // Byte 1
  65. if(len > cursor) { // If we've got bytes left.
  66. REGISTER += bfr[cursor]; // load the next one and
  67. ++cursor; // move the cursor.
  68. EndGame = TwoBytes; // We're up to 2 bytes.
  69. }
  70. REGISTER <<= 8; // Shift to the next byte.
  71. // Byte 2
  72. if(len > cursor) { // If we've got bytes left.
  73. REGISTER += bfr[cursor]; // load the next one and
  74. ++cursor; // move the cursor.
  75. EndGame = ThreeBytes; // That's a full house.
  76. }
  77. // No shift this time, the register is full ;-)
  78. // Now that we have 3 bytes and a characterization we can encode the
  79. // base64 bytes into our vector.
  80. const int SixBitMask = 0x0000003f; // This is how far to shift.
  81. char code3 = base64encode[(REGISTER & SixBitMask)]; REGISTER >>= 6; // Encode four characters for this
  82. char code2 = base64encode[(REGISTER & SixBitMask)]; REGISTER >>= 6; // three bytes.
  83. char code1 = base64encode[(REGISTER & SixBitMask)]; REGISTER >>= 6;
  84. char code0 = base64encode[(REGISTER & SixBitMask)];
  85. push_back(code0); // Push the first 2 encoded bytes onto
  86. push_back(code1); // the vector in the original order.
  87. switch(EndGame) { // Now handle the end game.
  88. case OneByte: { // If the end contains one valid byte
  89. push_back('='); // push back two = to indicate that
  90. push_back('='); // the last two bytes are padding.
  91. break;
  92. }
  93. case TwoBytes: { // If the end contains two valid bytes
  94. push_back(code2); // push back one more code byte and
  95. push_back('='); // push back only one = indicating one
  96. break; // byte of padding.
  97. }
  98. case ThreeBytes: // If we had the full three bytes to
  99. default: { // work with then we have no padding.
  100. push_back(code2); // Push back the remaining two
  101. push_back(code3); // code bytes to capture the full
  102. break; // encoding. This also works
  103. } // in the middle of the input.
  104. } // That's it for the end game.
  105. } // That's it for this chunk.
  106. BadConversion = false; // If we get here we've done good.
  107. }
  108. to_base64::to_base64(const vector<unsigned char>& bfr) : // Converts from a base64buffer.
  109. BadConversion(true) { // No conversion yet ;-)
  110. convert(&bfr[0], bfr.size()); // Recast the pointer and do it.
  111. }
  112. to_base64::to_base64(const vector<char>& bfr) : // Converts from a base64codec buffer.
  113. BadConversion(true) { // No conversion yet ;-)
  114. convert(reinterpret_cast<const unsigned char*>(&bfr[0]), bfr.size()); // Do this to get it done.
  115. }
  116. to_base64::to_base64(const unsigned char* bfr, const int len) : // Converts from a uchar buffer.
  117. BadConversion(true) { // No conversion yet ;-)
  118. convert(bfr, len); // Do this to get it done.
  119. }
  120. to_base64::to_base64(const char* bfr, const int len) : // Converts from a char buffer.
  121. BadConversion(true) { // No conversion yet ;-)
  122. convert(reinterpret_cast<const unsigned char*>(bfr), len); // Do this to get it done.
  123. }
  124. to_base64::to_base64(const string& s) : // Converts from a c++ string.
  125. BadConversion(true) { // No conversion yet ;-)
  126. convert(reinterpret_cast<const unsigned char*>(s.c_str()), s.length()); // Do this to get it done.
  127. }
  128. to_base64::to_base64(const char* s) : // Converts from a c string.
  129. BadConversion(true) { // No conversion yet ;-)
  130. convert(reinterpret_cast<const unsigned char*>(s), strlen(s)); // Do this to get it done.
  131. }
  132. bool to_base64::Bad() { // Look at the flag.
  133. return BadConversion;
  134. }
  135. //// from_base64 ///////////////////////////////////////////////////////////////
  136. unsigned char from_base64::NextSixBits( // Get the next base64 byte.
  137. int& cursor,
  138. const unsigned char* bfr,
  139. const int len) {
  140. while(len > cursor) { // Prepare to eat IGNR chars.
  141. unsigned char c = base64decode[bfr[cursor]]; // Get the next 6 bits.
  142. ++cursor; // Move the cursor for next time.
  143. if(IGNR == c) continue; // If we should ignore it, eat.
  144. if(XXXX == c) return c; // If it's bad, return it.
  145. return c; // If it's ordinary return it.
  146. } // If we run out of bytes
  147. return STOP; // return STOP
  148. }
  149. //// Since the BadConversion flag is set on construction, if we bail out
  150. //// of the convert() for any reason then the conversion will be bad.
  151. void from_base64::convert(const unsigned char* bfr, const int len) { // Converts bfr from base64 to plaintext.
  152. if(NULL == bfr || 0 >= len) { return; } // If there's nothing to do return bad.
  153. // Estimate our conversion buffer size.
  154. int NewSize = len / 4 * 3; // Four bytes of base64 could be 3 bytes.
  155. reserve(NewSize); // Reserve that much space for speed.
  156. // Start the conversion process.
  157. int cursor = 0;
  158. while(len > cursor) { // Go through the buffer and convert.
  159. int REGISTER = 0; // We will use these to convert as we
  160. unsigned char LOOKUP = 0; // go through the data.
  161. // First two base64 bytes
  162. const int MakeRoomFor6Bits = 6;
  163. LOOKUP = NextSixBits(cursor, bfr, len); // Grab the next six bits.
  164. if(STOP == LOOKUP) { break; } // If we ran out here it's ok.
  165. if(XXXX == LOOKUP) { return; } // If the byte is bad bail out!
  166. REGISTER += LOOKUP; REGISTER <<= MakeRoomFor6Bits; // Shift that one into place.
  167. LOOKUP = NextSixBits(cursor, bfr, len); // Grab the next six bits.
  168. if(XXXX == LOOKUP || STOP == LOOKUP) { return; } // If bad or empty here bail out!
  169. REGISTER += LOOKUP; // Load in the six bits.
  170. // Now we have 12 bits so we can grab our first byte.
  171. const int GetMS8OutOf12Bits = 4;
  172. const int BottomFourBits = 0x0000000F;
  173. push_back(REGISTER >> GetMS8OutOf12Bits); // Push back the converted byte.
  174. REGISTER = (REGISTER & BottomFourBits) << MakeRoomFor6Bits; // Make room for the next 6 bits.
  175. // Grab the next 6 bits.
  176. LOOKUP = NextSixBits(cursor, bfr, len); // Grab the next six bits.
  177. if(XXXX == LOOKUP || STOP == LOOKUP) { return; } // If bad or empty here bail out!
  178. if(PAD0 == LOOKUP) { break; } // If we've come to a pad we're done!
  179. REGISTER += LOOKUP; // Load in the six bits.
  180. // Now we have 10 bits so we can grab our Second byte.
  181. const int GetMS8OutOf10Bits = 2;
  182. const int BottomTwoBits = 0x00000003;
  183. push_back(REGISTER >> GetMS8OutOf10Bits); // Push back the converted byte.
  184. REGISTER = (REGISTER & BottomTwoBits) << MakeRoomFor6Bits; // Make room for the next 6 bits.
  185. LOOKUP = NextSixBits(cursor, bfr, len); // Grab the final six bits.
  186. if(XXXX == LOOKUP || STOP == LOOKUP) { return; } // If bad or empty here bail out!
  187. if(PAD0 == LOOKUP) { break; } // If we've come to a pad we're done!
  188. REGISTER += LOOKUP; // Load in the six bits.
  189. // Now we should have our final 8 bits :-)
  190. push_back(REGISTER); // push back the converted byte.
  191. }
  192. BadConversion = false; // If we get here we did ok.
  193. }
  194. from_base64::from_base64(const vector<unsigned char>& bfr) : // Converts from a base64buffer.
  195. BadConversion(true) { // It's bad until we've done it.
  196. convert(&bfr[0], bfr.size()); // Recast the pointer and do it.
  197. }
  198. from_base64::from_base64(const vector<char>& bfr) : // Converts from a buffer.
  199. BadConversion(true) { // It's bad until we've done it.
  200. convert(reinterpret_cast<const unsigned char*>(&bfr[0]), bfr.size()); // This is how we do it.
  201. }
  202. from_base64::from_base64(const string& s) : // Converts from a c++ string.
  203. BadConversion(true) { // It's bad until we've done it.
  204. convert(reinterpret_cast<const unsigned char*>(s.c_str()), s.length()); // This is how we do it.
  205. }
  206. from_base64::from_base64(const char* s) : // Converts from a c_string.
  207. BadConversion(true) { // It's bad until we've done it.
  208. convert(reinterpret_cast<const unsigned char*>(s), strlen(s)); // This is how we do it.
  209. }
  210. bool from_base64::Bad() { // Look at the flag.
  211. return BadConversion;
  212. }
  213. }