You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

base64codec.cpp 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276
  1. // base64codec.cpp
  2. // Copyright (C) 2006 - 2009 MicroNeil Research Corporation
  3. // See base64codec.hpp
  4. //typedef vector<char> base64codec_buffer;
  5. //typedef vector<char>::iterator base64codec_iterator;
  6. #include "base64codec.hpp"
  7. namespace base64codec {
  8. const static char base64encode[65] = // Base64 encoding characters.
  9. "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
  10. // The following table makes conversion fast because it's all lookups. The
  11. // special value XX64 is used everywhere a bad byte is found in the table.
  12. const static unsigned char XXXX = 0xFF; // Bad base64 character.
  13. const static unsigned char PAD0 = 0xFE; // Pad base64 character.
  14. const static unsigned char IGNR = 0xFD; // Ingoreable base64 character.
  15. const static unsigned char STOP = 0xFC; // STOP -- all done.
  16. // Note the special case '=' is used for pad. It is given the value 0xFE.
  17. // Also the IGNR case is any whitespace (Tab, CR, NL) that can be ignored.
  18. // The input to this table is the incoming byte. The output is either XX64
  19. // or a valid base64 numerical value.
  20. const static unsigned char base64decode[256] = {
  21. // 0 1 2 3 4 5 6 7 8 9 A B C D E F
  22. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,IGNR,IGNR,XXXX,XXXX,IGNR,XXXX,XXXX, // 0
  23. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // 1
  24. IGNR,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,0x3E,XXXX,XXXX,XXXX,0x3F, // 2
  25. 0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,XXXX,XXXX,XXXX,PAD0,XXXX,XXXX, // 3
  26. XXXX,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E, // 4
  27. 0x0F,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,XXXX,XXXX,XXXX,XXXX,XXXX, // 5
  28. XXXX,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28, // 6
  29. 0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,0x30,0x31,0x32,0x33,XXXX,XXXX,XXXX,XXXX,XXXX, // 7
  30. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // 8
  31. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // 9
  32. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // A
  33. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // B
  34. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // C
  35. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // D
  36. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX, // E
  37. XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX,XXXX // F
  38. };
  39. } // End namespace base64codec
  40. using namespace base64codec;
  41. //// to_base64 /////////////////////////////////////////////////////////////////
  42. void to_base64::convert(const unsigned char* bfr, const int len) { // Converts from a char buffer.
  43. if(NULL == bfr || 0 >= len) { // If there's NULL or no length
  44. BadConversion = true; // that was a bad conversion.
  45. return; // lets get out of here.
  46. }
  47. int NewSize = (len / 3) * 4; // Base64 takes 4 bytes for every 3;
  48. if(0 < len % 3) NewSize += 4; // If there are more, add an other 4;
  49. reserve(NewSize); // Set aside enough memory for the job.
  50. int cursor = 0; // Starting at zero chunk it off.
  51. while(len > cursor) {
  52. // Chunk off 4 bytes into an unsigned int for conversion.
  53. enum EndGames { // Describe the end game for this
  54. OneByte, // chunk as containing either one,
  55. TwoBytes, // two,
  56. ThreeBytes // or three bytes.
  57. } EndGame; // We use this to code the end.
  58. // Byte 0
  59. unsigned long REGISTER = 0; // Start with a clear register.
  60. REGISTER += bfr[cursor]; REGISTER <<= 8; ++cursor; // Load Byte 0.
  61. EndGame = OneByte; // We've added a byte.
  62. // Byte 1
  63. if(len > cursor) { // If we've got bytes left.
  64. REGISTER += bfr[cursor]; // load the next one and
  65. ++cursor; // move the cursor.
  66. EndGame = TwoBytes; // We're up to 2 bytes.
  67. }
  68. REGISTER <<= 8; // Shift to the next byte.
  69. // Byte 2
  70. if(len > cursor) { // If we've got bytes left.
  71. REGISTER += bfr[cursor]; // load the next one and
  72. ++cursor; // move the cursor.
  73. EndGame = ThreeBytes; // That's a full house.
  74. }
  75. // No shift this time, the register is full ;-)
  76. // Now that we have 3 bytes and a characterization we can encode the
  77. // base64 bytes into our vector.
  78. const int SixBitMask = 0x0000003f; // This is how far to shift.
  79. char code3 = base64encode[(REGISTER & SixBitMask)]; REGISTER >>= 6; // Encode four characters for this
  80. char code2 = base64encode[(REGISTER & SixBitMask)]; REGISTER >>= 6; // three bytes.
  81. char code1 = base64encode[(REGISTER & SixBitMask)]; REGISTER >>= 6;
  82. char code0 = base64encode[(REGISTER & SixBitMask)];
  83. push_back(code0); // Push the first 2 encoded bytes onto
  84. push_back(code1); // the vector in the original order.
  85. switch(EndGame) { // Now handle the end game.
  86. case OneByte: { // If the end contains one valid byte
  87. push_back('='); // push back two = to indicate that
  88. push_back('='); // the last two bytes are padding.
  89. break;
  90. }
  91. case TwoBytes: { // If the end contains two valid bytes
  92. push_back(code2); // push back one more code byte and
  93. push_back('='); // push back only one = indicating one
  94. break; // byte of padding.
  95. }
  96. case ThreeBytes: // If we had the full three bytes to
  97. default: { // work with then we have no padding.
  98. push_back(code2); // Push back the remaining two
  99. push_back(code3); // code bytes to capture the full
  100. break; // encoding. This also works
  101. } // in the middle of the input.
  102. } // That's it for the end game.
  103. } // That's it for this chunk.
  104. BadConversion = false; // If we get here we've done good.
  105. }
  106. to_base64::to_base64(const vector<unsigned char>& bfr) : // Converts from a base64buffer.
  107. BadConversion(true) { // No conversion yet ;-)
  108. convert(&bfr[0], bfr.size()); // Recast the pointer and do it.
  109. }
  110. to_base64::to_base64(const vector<char>& bfr) : // Converts from a base64codec buffer.
  111. BadConversion(true) { // No conversion yet ;-)
  112. convert(reinterpret_cast<const unsigned char*>(&bfr[0]), bfr.size()); // Do this to get it done.
  113. }
  114. to_base64::to_base64(const unsigned char* bfr, const int len) : // Converts from a uchar buffer.
  115. BadConversion(true) { // No conversion yet ;-)
  116. convert(bfr, len); // Do this to get it done.
  117. }
  118. to_base64::to_base64(const char* bfr, const int len) : // Converts from a char buffer.
  119. BadConversion(true) { // No conversion yet ;-)
  120. convert(reinterpret_cast<const unsigned char*>(bfr), len); // Do this to get it done.
  121. }
  122. to_base64::to_base64(const string& s) : // Converts from a c++ string.
  123. BadConversion(true) { // No conversion yet ;-)
  124. convert(reinterpret_cast<const unsigned char*>(s.c_str()), s.length()); // Do this to get it done.
  125. }
  126. to_base64::to_base64(const char* s) : // Converts from a c string.
  127. BadConversion(true) { // No conversion yet ;-)
  128. convert(reinterpret_cast<const unsigned char*>(s), strlen(s)); // Do this to get it done.
  129. }
  130. bool to_base64::Bad() { // Look at the flag.
  131. return BadConversion;
  132. }
  133. //// from_base64 ///////////////////////////////////////////////////////////////
  134. unsigned char from_base64::NextSixBits( // Get the next base64 byte.
  135. int& cursor,
  136. const unsigned char* bfr,
  137. const int len) {
  138. while(len > cursor) { // Prepare to eat IGNR chars.
  139. unsigned char c = base64decode[bfr[cursor]]; // Get the next 6 bits.
  140. ++cursor; // Move the cursor for next time.
  141. if(IGNR == c) continue; // If we should ignore it, eat.
  142. if(XXXX == c) return c; // If it's bad, return it.
  143. return c; // If it's ordinary return it.
  144. } // If we run out of bytes
  145. return STOP; // return STOP
  146. }
  147. //// Since the BadConversion flag is set on construction, if we bail out
  148. //// of the convert() for any reason then the conversion will be bad.
  149. void from_base64::convert(const unsigned char* bfr, const int len) { // Converts bfr from base64 to plaintext.
  150. if(NULL == bfr || 0 >= len) { return; } // If there's nothing to do return bad.
  151. // Estimate our conversion buffer size.
  152. int NewSize = len / 4 * 3; // Four bytes of base64 could be 3 bytes.
  153. reserve(NewSize); // Reserve that much space for speed.
  154. // Start the conversion process.
  155. int cursor = 0;
  156. while(len > cursor) { // Go through the buffer and convert.
  157. int REGISTER = 0; // We will use these to convert as we
  158. unsigned char LOOKUP = 0; // go through the data.
  159. // First two base64 bytes
  160. const int MakeRoomFor6Bits = 6;
  161. LOOKUP = NextSixBits(cursor, bfr, len); // Grab the next six bits.
  162. if(STOP == LOOKUP) { break; } // If we ran out here it's ok.
  163. if(XXXX == LOOKUP) { return; } // If the byte is bad bail out!
  164. REGISTER += LOOKUP; REGISTER <<= MakeRoomFor6Bits; // Shift that one into place.
  165. LOOKUP = NextSixBits(cursor, bfr, len); // Grab the next six bits.
  166. if(XXXX == LOOKUP || STOP == LOOKUP) { return; } // If bad or empty here bail out!
  167. REGISTER += LOOKUP; // Load in the six bits.
  168. // Now we have 12 bits so we can grab our first byte.
  169. const int GetMS8OutOf12Bits = 4;
  170. const int BottomFourBits = 0x0000000F;
  171. push_back(REGISTER >> GetMS8OutOf12Bits); // Push back the converted byte.
  172. REGISTER = (REGISTER & BottomFourBits) << MakeRoomFor6Bits; // Make room for the next 6 bits.
  173. // Grab the next 6 bits.
  174. LOOKUP = NextSixBits(cursor, bfr, len); // Grab the next six bits.
  175. if(XXXX == LOOKUP || STOP == LOOKUP) { return; } // If bad or empty here bail out!
  176. if(PAD0 == LOOKUP) { break; } // If we've come to a pad we're done!
  177. REGISTER += LOOKUP; // Load in the six bits.
  178. // Now we have 10 bits so we can grab our Second byte.
  179. const int GetMS8OutOf10Bits = 2;
  180. const int BottomTwoBits = 0x00000003;
  181. push_back(REGISTER >> GetMS8OutOf10Bits); // Push back the converted byte.
  182. REGISTER = (REGISTER & BottomTwoBits) << MakeRoomFor6Bits; // Make room for the next 6 bits.
  183. LOOKUP = NextSixBits(cursor, bfr, len); // Grab the final six bits.
  184. if(XXXX == LOOKUP || STOP == LOOKUP) { return; } // If bad or empty here bail out!
  185. if(PAD0 == LOOKUP) { break; } // If we've come to a pad we're done!
  186. REGISTER += LOOKUP; // Load in the six bits.
  187. // Now we should have our final 8 bits :-)
  188. push_back(REGISTER); // push back the converted byte.
  189. }
  190. BadConversion = false; // If we get here we did ok.
  191. }
  192. from_base64::from_base64(const vector<unsigned char>& bfr) : // Converts from a base64buffer.
  193. BadConversion(true) { // It's bad until we've done it.
  194. convert(&bfr[0], bfr.size()); // Recast the pointer and do it.
  195. }
  196. from_base64::from_base64(const vector<char>& bfr) : // Converts from a buffer.
  197. BadConversion(true) { // It's bad until we've done it.
  198. convert(reinterpret_cast<const unsigned char*>(&bfr[0]), bfr.size()); // This is how we do it.
  199. }
  200. from_base64::from_base64(const string& s) : // Converts from a c++ string.
  201. BadConversion(true) { // It's bad until we've done it.
  202. convert(reinterpret_cast<const unsigned char*>(s.c_str()), s.length()); // This is how we do it.
  203. }
  204. from_base64::from_base64(const char* s) : // Converts from a c_string.
  205. BadConversion(true) { // It's bad until we've done it.
  206. convert(reinterpret_cast<const unsigned char*>(s), strlen(s)); // This is how we do it.
  207. }
  208. bool from_base64::Bad() { // Look at the flag.
  209. return BadConversion;
  210. }