// FilterChain.cpp // // (C) 2002-2009 MicroNeil Research Corporation // // Main code file for module FilterChain. // 20041116 _M Added UrlDecode module. The module will repeat a decoded version of // any anchor tag that it sees which contains decodable %xx bytes. Other anchor // tags are not repeated. // 20041116 _M Upgrades to the Defunker module. The module now decodes any HTML // encoded bytes that could have been normal ascii. // 20041114 _M Completed basic defunker engine which strips out all HTML and some // basic   encoding. // 20041113 _M Began heavy upgrades to this module to improve performance and // provide additional obfuscation removal. This modification will include a move // from the use of switch(State) mechanisms to the use of function pointers. This // should save a few cycles on every byte processed. #include "FilterChain.hpp" // FilterChainBase64 Methods. // GetByte() // Returns the next byte from this filter module. unsigned char FilterChainBase64::GetByte() { switch(State) { // What are we doing? case SCANNING:{ // We're scanning to turn on... // In this mode we are hunting for a reason to turn // ourselves on. If we find our startup sequence then // we will go into decoding mode. Until then, we try // to match each incoming character with our startup // sequence. while(true) { // Search for our startup string or get out. try { // Try this... x=FilterChain::GetByte(); // Get the next byte from source. } // If we get the empty signal // here, we've failed to match. catch(Empty) { // If so - and we haven't if(0==ScanIx) throw Empty("FilterChainBase64: No more data"); // started then just throw Empty. x=Base64Start[ScanIx]-1; // If we did start then make } // sure we won't match below. // It's important that no empty's get beyond this point unless // we've got a match started. Otherwise we'll return corruption. if(x!=Base64Start[ScanIx]){ // If the byte doesnt match, // and we've started matching if(0!=ScanIx) { // the sequence then save the Buffer=x; // byte for later, change to State=DEQUEING;DequeIx=0; // DEQUING mode, and return return GetByte(); // the first Dequeued byte. } // If there's no match else return x; // started then shortcut that: } // just send back the byte. // We've handled non matches, now time for the good stuff... else { // This byte matches :-) ScanIx++; // Move forward! if(ScanIx>=sizeof(Base64Start)-1){ // If we've matched it all // then prep for decoding. // At this point we've got our trigger - but we need to // eat up any extra junk before we start decoding. What // we're looking for is a blank line (CRLFCRLF) within // the next couple of lines. While we're at this if we // get an exception we'll just pass it through. ScanIx=DequeIx=0; // Let's reset our indexes. // We're SCANNING now - so if we fail to get to good base64 // stuff then we'll be starting from scratch - and that's ok. // Here we will allow some number of additional header lines // to occur before we give up on this being a base64 segment. // If we give up then we go back to scanning agian. // 20030114 _M Increased limit to 150 - lots of X- headers cause // the engine to stop decoding base64!! 30 was too small. const int LineLimit = 150; // We'll allow this many. for(int LineCount=0; LineCount lines so they will ScanIx=2; // be the first bytes decoded. } // Here we pump into the // workspace. Then we return one return x; // (usually). // The deal is, if we're decoding then we will pump in LF and // return what must be the last LF. If we're not decoding then we // end up returning the last byte we read before giving up which should // be the first byte of the next line. } } } // The above will be tried repeatedly in the first call to // this object's GetByte() until we either return a byte or // throw an exception. The result is that once we start to match // our startup sequence we will either match all of it or we will // grab as much of it as we can until we don't match - then we'll // fail and move into DEQUEING. // You may be asking yourself, why go through all that complex // Turing engine stuff when a simple line load and string comparison // would do nicely. The answer is SPEED. Without getting too deep, // the above code will identify the startup string in roughly 2 // comparisons per byte. If I were to load the entire line first // then that alone would be 2 comparisons before I got started. This // way I cut the number of comparisons down by at least 50%. break; } case DEQUEING:{ // We're recovering from a false start... // When we get here, ScanIx will be one greater than the last // matching byte. The last byte read will be stored in our buffer // so that it can be returned here as the last step. The calling // program will request each byte one at a time... starting with // the first byte coming out of this code. For all positions in our // startup string less than ScanIx, we know we had a matching input. // We start our output at the first byte. The Scanning engine should // have set our DequeIx to 0 before we got here - so that part should // be automatic. Here goes... if(DequeIx < ScanIx) { // If we're still returning a unsigned char x = // partial match, grab the next byte Base64Start[DequeIx]; // from the startup string, Increment DequeIx++; // our Deque index for next time, and return x; // return the byte that's needed. } else { // When we're done with that part, State=SCANNING; // we set our mode back to scanning, ScanIx=DequeIx=0; // reset our indexes to start again, return Buffer; // and return the unmatching byte that } // got us to DEQUEING mode. break; } case DECODING:{ // We're decoding data... // DequeIx will be used here to indicate how many decoded // bytes are ready to be delivered. This is compatible with // the normal startup for other modes. // ScanIx will be used here to indicate which byte position // we should be reading from. This combination helps to handle // pads and simplifies processing. For example, if we've got two // pads then we'll have a single byte to read starting at index // zero. // If we get an exception from up the chain while we're decoding // then we'll just pass it along. if(0==DequeIx) { // If there are no bytes ready then get some! // First Byte: // Eat anything up to the first byte that doesn't look like // a base64 digit. If we hit a '\n-' then we'll assume we've got // a segment boundary and we'll quit. Everything else will be // ignored to get us to the next line. do{ // Empty out any in-between bytes. y=x;x=FilterChain::GetByte(); // Read one byte at a time. if('-'==x && '\n'==y) { // If we get to a segment separator ScanIx=DequeIx=0; // then reset our indexes, set our State=SCANNING; // state to SCANNING... do { // Eat up the rest of this line x=FilterChain::GetByte(); // one byte at a time including } while('\n'!=x); // the at the end, then return '\n'; // return the that byte. // On the next incoming call, the scanner section "should" // return the following byte to complete the end of line. // This ensures that we put a new line at the end of our // decoded segment. Four message scanning purposes this is // desireable. If we wanted a clean segment then we'd probably // eat through the new line rather than the carriage return. } } while(XX64==Base64Table[x]); // Eat all invalid bytes. // At this point x should have the first valid byte for us :-) if('='==x) { // First byte can't be a pad. ScanIx=DequeIx=0; // If it is then we reset ourself, do{ // eat the rest of this line, y=x;x=FilterChain::GetByte(); // and then go on with scanning. }while('\n'!=x); return x; } // At this point we have a clean byte, presumably at the start // of a base64 block which we can decode. x = Base64Table[x]; // Convert the byte. // This first one we assign to clear out the register. The rest // get added to keep things in place. Workspace = // Add it to the workspace in the x << base64_seg0_shift; // correct position. // Byte number 2 of the block... x=FilterChain::GetByte(); // Grab the byte... if('='==x) { // This byte can't be a pad. ScanIx=DequeIx=0; // If it is then we reset ourself, do{ // eat the rest of this line, y=x;x=FilterChain::GetByte(); // and then go on with scanning. }while('\n'!=x); return x; } x=Base64Table[x]; // Convert the byte. if(XX64==x) { // The byte can't be invalid... ScanIx=DequeIx=0; // If it is then we reset ourself, do{ // eat the rest of this line, y=x;x=FilterChain::GetByte(); // and then go on with scanning. }while('\n'!=x); return x; } // At this point we have a clean byte... Workspace += // Add it to the workspace in the x << base64_seg1_shift; // correct position. // Byte number 3 of the block... x=FilterChain::GetByte(); // Grab the byte... // This one and the next one can be pads. Here's where we start // deciding how many bytes we have. If we have a pad in this spot // then our output bytes will only be 1. if('='==x) DequeIx = 1; // If we've got a pad here we'll only else DequeIx = 3; // have one valid output byte. Otherwise // we could have 3. x=Base64Table[x]; // Convert the byte. if(XX64==x) { // The byte can't be invalid... ScanIx=DequeIx=0; // If it is then we reset ourself, do{ // eat the rest of this line, y=x;x=FilterChain::GetByte(); // and then go on with scanning. }while('\n'!=x); return x; } // At this point we have a clean byte... Workspace += // Add it to the workspace in the x << base64_seg2_shift; // correct position. // Byte number 4 of the block... x=FilterChain::GetByte(); // Grab the byte... if('='==x && DequeIx > 2) // If we've got a pad here the most DequeIx=2; // we can have are 2 valid outputs. x=Base64Table[x]; // Convert the byte. if(XX64==x) { // The byte can't be invalid... ScanIx=DequeIx=0; // If it is then we reset ourself, do{ // eat the rest of this line, y=x;x=FilterChain::GetByte(); // and then go on with scanning. }while('\n'!=x); return x; } // At this point we have a clean byte... Workspace += // Add it to the workspace in the x << base64_seg3_shift; // correct position. // At this point we are ready to begin outputting our bytes. ScanIx=2; // Output always starts byte three. return GetByte(); // Return our first decoded byte. } else { // If there are bytes ready then spit them out. x=(Workspace >> (ScanIx * 8)) & 0xFF; // Grab the byte we want. ScanIx--; // Decrement our output index. DequeIx--; // Decrement our output count. return x; // Send back our byte. } break; } } // We should never get to this point. return 0; // Dummy to make the compiler happy. } // FilterChainQuotedPrintable Methods. // isHexDigit() // Returns true if i is a valid hex digit. bool FilterChainQuotedPrintable::isHexDigit(unsigned char i) { if( (i >= '0' && i <= '9') || // Hex digits must be 0-9 or (i >= 'A' && i <= 'F') || // A-F or (i >= 'a' && i <= 'f') // a-f if somebody used lower case. ) { return true; // If i is one of these we are true } else { return false; // IF i is not then we are false } } // convertHexDigit() // Returns an integer value for the hex digit i int FilterChainQuotedPrintable::convertHexDigit(unsigned char i) { if(i >= '0' && i <= '9') { // Digit chars convert directly. return i - '0'; } else if (i >= 'A' && i <= 'F') { // Cap A-F convert to 10 - 15 return i - 'A' + 10; } else if (i >= 'a' && i <= 'f') { // Small A-F convert to 10 - 15 return i - 'a' + 10; } return -1; // Return -1 if i was not a hex digit! } // GetByte() // Returns the next byte from this filter module. unsigned char FilterChainQuotedPrintable::GetByte() { switch(State) { // What are we doing? case SCANNING: // We're scanning to turn on... Buffer[0]=FilterChain::GetByte(); if('='== Buffer[0]) { // If we've found an = then we're on. Buffer[1]=FilterChain::GetByte(); // Fill up the decoding buffer with Buffer[2]=FilterChain::GetByte(); // the next two bytes, BufferIndex = 0; // Setup the buffer index. BufferLength = 3; // Setup the buffer length. State = DECODING; // Set our mode and get the result return GetByte(); // by calling ourselves! } else return Buffer[0]; // Otherwise just pass through. break; case DEQUEING: // We're recovering from a false start... if(BufferIndex < BufferLength) { // If we've got buffered stuff then return Buffer[BufferIndex++]; // return it and move the pointer. } else { // If we've run out of stuff then BufferIndex = 0; // Reset our index and our BufferLength = 0; // buffer length, then set our State = SCANNING; // mode to SCANNING and return return GetByte(); // the next byte from there. } break; case DECODING: // We're decoding data... // Now we are decoding quoted printable data. First we will handle the case // where this is a soft line break. In that case we simply eat the encoded bytes // and set up to dequeue the last byte. if(Buffer[1] == '\n') { // If this is a soft break the BufferIndex = 2; // point our dequeue index at the last byte State = DEQUEING; // establish our DEQUEING state and return GetByte(); // return by letteing DEQUEING do it! } // If it wasn't a soft break then we _may_ need to decode it. We will find // out by looking for hex digits in the next two locations. If they are there // we are decoding. If not then we will simply dequeue the entire buffer. if( isHexDigit(Buffer[1]) && // If the next two bytes are hex isHexDigit(Buffer[2]) // digits then we can convert them. ) { Workspace= // Set our workspace to convert the (convertHexDigit(Buffer[1]) << 4) | // two hex digits into a single (convertHexDigit(Buffer[2])); // byte. Buffer[2] = Workspace & 0xFF; // Store that byte in our buffer. BufferIndex = 2; // Set the index and change our State = DEQUEING; // state to DEQUEING then let that return GetByte(); // code spit it out! } else { // If either byte was not a valid State = DEQUEING; // hex digit DEQUEUE the entire return GetByte(); // buffer. } break; }; return FilterChain::GetByte(); // Dummy } ///////////////////////////////////////////////////////////////////////////////////////// // FilterChainDefunker ///////////////////////////////////////////////////////////////////////////////////////// const char* DefunkerPreamble = "\n----[DEFUNKER]----\n"; // Patterns to match const char* patMatchBR = "
"; const char* patMatchP = "

"; const char* patNBSP = " "; const char* patAMP = "&"; const char* patAPOS = "'"; const char* patLT = "<"; const char* patGT = ">"; const char* patQUOT = """; // SkipHeaders() waits for the headers to go by before launching Store(). unsigned char FilterChainDefunker::SkipHeaders() { // While waiting EOH... unsigned char x = FilterChain::GetByte(); // Get a byte. if(LastRawByte == '\n' && x == '\n') { // If we're at EOH Master = &FilterChainDefunker::Store; // Go to store mode. return x; // and return the byte. } // If we're not at EOH LastRawByte = x; // then remember this byte return x; // and return it. } // Store() puts the original data into the buffer for later. unsigned char FilterChainDefunker::Store() { // While in Store mode, unsigned char x; // we need a byte. try { if(DefunkerSize-10 < InputPosition) { cout << "watch this" << endl; } if(DefunkerSize <= InputPosition) throw Empty("FilterChainDefunker: No more data"); // Careful about the buffer. x = FilterChain::GetByte(); // Try getting the next byte StoreBuffer[InputPosition++] = x; // and storing it. } catch(Empty) { // When we get the Empty Master = &FilterChainDefunker::ReadOut; // signal it is time for us return GetByte(); // to read out our data. } return x; // Otherwis pass on the byte. } // ReadOut() retrieves the stored data through the state engine. unsigned char FilterChainDefunker::ReadOut() { // Read out and dedup spaces. if(LastReadOut == ' ') { // If the last byte was a space while(LastReadOut == ' ') { // then eat all of the spaces LastReadOut = SpaceConvChart[GetInternal()]; // that come next with spaces } // converted. } else { // If it was not a space then LastReadOut = SpaceConvChart[GetInternal()]; // simply read the next byte } // with spaces converted. return LastReadOut; // Output the byte we found. } // GetStore() retrieves the raw store for the state engine. unsigned char FilterChainDefunker::GetStore() { // Read from the Store. if(OutputPosition >= InputPosition) { throw Empty("FilterChainDefunker: No more data"); // If we're out of bytes throw Empty. } return LastGetStore = StoreBuffer[OutputPosition++]; // If we have more, trap and send it. } //// The following functions make up the state engine with the state maintained //// as a function pointer in the (*Internal)() handle. unsigned char FilterChainDefunker::Preamble() { // Emit the preamble. for( int p=0; // Load the preamble into DefunkerPreamble[p]; // the queue. p++) EnQueue(DefunkerPreamble[p]); Internal = &FilterChainDefunker::DeQueue; // Set up the DeQueue mode return GetInternal(); // and return the next byte. } unsigned char FilterChainDefunker::DefunkRoot() { // While in DefunkRoot state... unsigned char x = 0; // One byte at a time via x. do { // Loop through any emptiness. ReturnNothing = false; // Be ready to return a byte. x = GetStore(); // Grab the next byte to process. if(x == '<') { // If it matches < then Internal = &FilterChainDefunker::OpenTag; // go to OpenTag state and x = GetInternal(); // return the converted byte. } else if(x == '&') { // If it matches & then Internal = &FilterChainDefunker::OpenAmp; // go to OpenAnd state and EnQueue(x); // push in the amphersand. x = GetInternal(); // return the converted byte. } // If x is none of the above then x is just x. } while (true == ReturnNothing); // Returning nothing? Go again! return x; // otherwise return a funkless x. } unsigned char FilterChainDefunker::OpenTag() { // While in OpenTag state unsigned char x = GetStore(); // grab the next byte. switch(tolower(x)) { // Check the lower case of x. case 'b': // If we have a 'b' then Internal = &FilterChainDefunker::MatchBR; // our mode is MatchBR. break; case 'p': // If we have a 'p' then Internal = &FilterChainDefunker::MatchP; // our mode is MatchP. break; default: // If we did not match then Internal = &FilterChainDefunker::EatTag; // our mode is EatTag. break; } return GetInternal(); // Return the next byte. } unsigned char FilterChainDefunker::OpenAmp() { // While in OpenAmp state unsigned char x = GetStore(); // grab the next byte. if(tolower(x) == 'n') { // If it matched n then EnQueue(x); // push in the n - Internal = &FilterChainDefunker::MatchNBSP; // we are working on   return GetInternal(); // return the next byte. } else if(tolower(x) == 'a') { // If it matched a then EnQueue(x); // push in the a - Internal = &FilterChainDefunker::SwitchAMPAPOS; // is it AMP or APOS? return GetInternal(); // return the next byte. } else if(tolower(x) == 'l') { // If it matched l then EnQueue(x); // push in the l - Internal = &FilterChainDefunker::MatchLT; // we are working on < return GetInternal(); // return the next byte. } else if(tolower(x) == 'g') { // If it matched g then EnQueue(x); // push in the g - Internal = &FilterChainDefunker::MatchGT; // we are working on > return GetInternal(); // return the next byte. } else if(tolower(x) == 'q') { // If it matched q then EnQueue(x); // push in the q - Internal = &FilterChainDefunker::MatchQUOT; // we are working on " return GetInternal(); // return the next byte. } else if(x == '#') { // If it matched # then EnQueue(x); // push in the # - Internal = &FilterChainDefunker::DecodeNum; // we are working on &#...; return GetInternal(); // return the next byte. } Internal = &FilterChainDefunker::DeQueue; // If nothing matched then return GetInternal(); // punt and dequeue. } unsigned char FilterChainDefunker::MatchBR() { // If our mode is MatchBR if(MatchTagPattern(patMatchBR)) { // If we matched our pattern Internal = &FilterChainDefunker::DefunkRoot; // go to DefunkRoot state return ' '; // and return a space. } // If we did not match then Internal = &FilterChainDefunker::EatTag; // go to EatTag state and return GetInternal(); // return the next byte. } unsigned char FilterChainDefunker::MatchP() { // If our mode is MatchP if(MatchTagPattern(patMatchP)) { // if we matched our pattern Internal = &FilterChainDefunker::DefunkRoot; // go to DefunkRoot state return ' '; // and return a space. } // If we did not match then Internal = &FilterChainDefunker::EatTag; // go to EatTag state and return GetInternal(); // return the next byte. } unsigned char FilterChainDefunker::MatchNBSP() { // If our mode is MatchNBSP int pos = 2; // We've seen &n so far. while(patNBSP[pos]){ // Look through the pattern unsigned char x = GetStore(); // getting one byte at a time. EnQueue(x); // Push each into the queue. if(tolower(x)!=patNBSP[pos]) break; // If we fall off, get out. pos++; // otherwise keep going. } // At this point our pattern[pos] is either 0 (a match) or not. if(patNBSP[pos]) { // If we did not match then Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue return GetInternal(); // and return the next byte. } // If we did match the pattern ClearQueue(); // then clear the queue and Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then return ' '; // return a space. } unsigned char FilterChainDefunker::MatchLT() { // If our mode is MatchLT int pos = 2; // We've seen &l so far. while(patLT[pos]){ // Look through the pattern unsigned char x = GetStore(); // getting one byte at a time. EnQueue(x); // Push each into the queue. if(tolower(x)!=patLT[pos]) break; // If we fall off, get out. pos++; // otherwise keep going. } // At this point our pattern[pos] is either 0 (a match) or not. if(patLT[pos]) { // If we did not match then Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue return GetInternal(); // and return the next byte. } // If we did match the pattern ClearQueue(); // then clear the queue and Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then return '<'; // return a <. } unsigned char FilterChainDefunker::MatchGT() { // If our mode is MatchGT int pos = 2; // We've seen &g so far. while(patGT[pos]){ // Look through the pattern unsigned char x = GetStore(); // getting one byte at a time. EnQueue(x); // Push each into the queue. if(tolower(x)!=patGT[pos]) break; // If we fall off, get out. pos++; // otherwise keep going. } // At this point our pattern[pos] is either 0 (a match) or not. if(patGT[pos]) { // If we did not match then Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue return GetInternal(); // and return the next byte. } // If we did match the pattern ClearQueue(); // then clear the queue and Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then return '>'; // return a >. } unsigned char FilterChainDefunker::MatchQUOT() { // If our mode is MatchQUOT int pos = 2; // We've seen &q so far. while(patQUOT[pos]){ // Look through the pattern unsigned char x = GetStore(); // getting one byte at a time. EnQueue(x); // Push each into the queue. if(tolower(x)!=patQUOT[pos]) break; // If we fall off, get out. pos++; // otherwise keep going. } // At this point our pattern[pos] is either 0 (a match) or not. if(patQUOT[pos]) { // If we did not match then Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue return GetInternal(); // and return the next byte. } // If we did match the pattern ClearQueue(); // then clear the queue and Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then return '\"'; // return a quote. } unsigned char FilterChainDefunker::SwitchAMPAPOS() { // We are chosing AMP or APOS. unsigned char x = GetStore(); // Get the next byte. EnQueue(x); // Put it into the queue. if(tolower(x)=='m') { // If we matched m then we Internal = &FilterChainDefunker::MatchAMP; // are working on MatchAMP. return GetInternal(); // Go get it. } else if(tolower(x)=='p') { // If we matched p then we Internal = &FilterChainDefunker::MatchAPOS; // are working on MatchAPOS. return GetInternal(); // Go get it. } Internal = &FilterChainDefunker::DeQueue; // If we didn't match either return GetInternal(); // we punt and DeQueue. } unsigned char FilterChainDefunker::MatchAPOS() { // If our mode is MatchAPOS int pos = 3; // We've seen &ap so far. while(patAPOS[pos]){ // Look through the pattern unsigned char x = GetStore(); // getting one byte at a time. EnQueue(x); // Push each into the queue. if(tolower(x)!=patAPOS[pos]) break; // If we fall off, get out. pos++; // otherwise keep going. } // At this point our pattern[pos] is either 0 (a match) or not. if(patAMP[pos]) { // If we did not match then Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue return GetInternal(); // and return the next byte. } // If we did match the pattern ClearQueue(); // then clear the queue and Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then return '\''; // return an apostrophie. } unsigned char FilterChainDefunker::MatchAMP() { // If our mode is MatchAMP int pos = 3; // We've seen &am so far. while(patAMP[pos]){ // Look through the pattern unsigned char x = GetStore(); // getting one byte at a time. EnQueue(x); // Push each into the queue. if(tolower(x)!=patAMP[pos]) break; // If we fall off, get out. pos++; // otherwise keep going. } // At this point our pattern[pos] is either 0 (a match) or not. if(patAMP[pos]) { // If we did not match then Internal = &FilterChainDefunker::DeQueue; // set our state to dequeue return GetInternal(); // and return the next byte. } // If we did match the pattern ClearQueue(); // then clear the queue and Internal = &FilterChainDefunker::DefunkRoot; // go back to root mode then return '&'; // return an amphersand. } unsigned char FilterChainDefunker::EatTag() { // If our mode is EatTag if(LastGetStore != '>') { // and our last byte was not while(GetStore()!='>')continue; // endtag then eat through } // the end tag. Then set our ReturnNothing = true; // ReturnNothing flag, set our Internal = &FilterChainDefunker::DefunkRoot; // mode to DefunkRoot and return 0; // return 0 (nothing, really). } unsigned char FilterChainDefunker::DecodeNum() { // If our mode is DecodeNum unsigned char NumBfr[5]; // A buffer for digits. memset(NumBfr,0,sizeof(NumBfr)); // Clear the buffer. for( // Let's read the number... unsigned int i=0; // NumBfr position = 0; i<(sizeof(NumBfr)-1) && // Stay well within the NunBfr. (EnQueue(NumBfr[i]=GetStore()), // Read and EnQueue each byte. isdigit(NumBfr[i])); // Keep going if it's a digit. i++)continue; // Move the buffer pointer. // Check for a proper finish... if(LastGetStore != ';') { // If we didn't end properly Internal = &FilterChainDefunker::DeQueue; // then we will punt and return GetInternal(); // DeQueue. } // At this point, NumBfr contains a c_str of the number to be decoded. // Also, the Qbfr has each byte we read in case we want to punt. int Decoded = atol((const char*)NumBfr); // Read the number. if(Decoded < 32 || Decoded > 255) { // If the number we read is Internal = &FilterChainDefunker::DeQueue; // out of range then we return GetInternal(); // punt and DeQueue. } // If we decoded a character ClearQueue(); // that is in range of normal Internal = &FilterChainDefunker::DefunkRoot; // ascii then clear the queue, return (unsigned char) Decoded; // go back to DefunkRoot, and } // return the decoded byte. ///////////////////////////////////////////////////////////////////////////////////////// // FilterChainUrlDecode ///////////////////////////////////////////////////////////////////////////////////////// unsigned char FilterChainUrlDecode::Bypass() { // In Bypass mode... unsigned char c = FilterChain::GetByte(); // Get the raw byte. if(c == '<') { // If it was '<' we begin. Internal = &FilterChainUrlDecode::Tag; // Go to Tag mode. AddToBfr(c); // Write the byte to our buffer. } return c; // Always return the byte. } unsigned char FilterChainUrlDecode::Tag() { // In Tag mode... unsigned char c = FilterChain::GetByte(); // Get the raw byte. if(tolower(c) == 'a') { // If we're in an anchor tag Internal = &FilterChainUrlDecode::Root; // Go to Decode Root mode. AddToBfr(c); // Write the byte to our buffer. } else if(tolower(c) == 'i') { // If we might be in an img tag Internal = &FilterChainUrlDecode::Img1; // Go to Img1 mode. AddToBfr(c); // Write the byte to our buffer. } else { // If we didn't match DecodeBfr[0] = 0; // we clear out the Decode DecodeBfr[1] = 0; // buffer. (Save some bytes by DecodeLength = 0; // doing it manually) Then we Internal = &FilterChainUrlDecode::Bypass; // Go to Bypass mode again. } return c; // Always return the byte. } unsigned char FilterChainUrlDecode::Img1() { // In Img1 mode... unsigned char c = FilterChain::GetByte(); // Get the raw byte. if(tolower(c)=='m') { // If we're still in an img tag Internal = &FilterChainUrlDecode::Img2; // Go to Img2 mode. AddToBfr(c); // Write the byte to our buffer. } else { // If we didn't match DecodeBfr[0] = 0; // we clear out the Decode DecodeBfr[1] = 0; // buffer and go back to DecodeBfr[2] = 0; // Bypass mode again. DecodeLength = 0; Internal = &FilterChainUrlDecode::Bypass; } return c; // Always return the byte. } unsigned char FilterChainUrlDecode::Img2() { // In Img2 mode... unsigned char c = FilterChain::GetByte(); // Get the raw byte. if(tolower(c)=='g') { // If we're still in an img tag Internal = &FilterChainUrlDecode::Root; // Go to Decode Root mode. AddToBfr(c); // Write the byte to our buffer. } else { // If we didn't match DecodeBfr[0] = 0; // we clear out the Decode DecodeBfr[1] = 0; // buffer and go back to DecodeBfr[2] = 0; // Bypass mode again. DecodeBfr[3] = 0; DecodeLength = 0; Internal = &FilterChainUrlDecode::Bypass; } return c; // Always return the byte. } unsigned char FilterChainUrlDecode::Root() { // While in Decode Root mode... unsigned char c = FilterChain::GetByte(); // Get the raw byte. AddToBfr(c); // Push it into the buffer. // Now we will switch modes based on the byte we get. if(c == '%') { // If we have '%' then it is Internal = &FilterChainUrlDecode::GetD1; // time to start decoding. } else if(c == '>') { // If we have '>' and if(DecodeFlag) { // we did some decoding then Internal = &FilterChainUrlDecode::Inject; // it is time to inject the result. } else { // If there was no decoding then Clear(); // we clear out our buffer and Internal = &FilterChainUrlDecode::Bypass; // it is time to go to sleep. } } // This next bit protects against malformed HTML by watching for any new tag // start. If one occurs, then we throw away our current decoding and assume a state // that starts with the new open "<". if(c == '<') { // If found a new < then we Clear(); // clear the buffer, AddToBfr(c); // Add the '<' back in, and Internal = &FilterChainUrlDecode::Tag; // go back to Tag mode. } return c; // Always return the byte. } unsigned char FilterChainUrlDecode::GetD1() { // Get the first digit. unsigned char c = FilterChain::GetByte(); // Read the raw byte. AddToBfr(c); // Add it to the buffer. Internal = &FilterChainUrlDecode::GetD2; // Move to GetD2 mode. return c; // Always return the byte. } // isHexDigit() // Returns true if i is a valid hex digit. bool FilterChainUrlDecode::isHexDigit(unsigned char i) { if( (i >= '0' && i <= '9') || // Hex digits must be 0-9 or (i >= 'A' && i <= 'F') || // A-F or (i >= 'a' && i <= 'f') // a-f if somebody used lower case. ) { return true; // If i is one of these we are true } else { return false; // IF i is not then we are false } } // convertHexDigit() // Returns an integer value for the hex digit i int FilterChainUrlDecode::convertHexDigit(unsigned char i) { if(i >= '0' && i <= '9') { // Digit chars convert directly. return i - '0'; } else if (i >= 'A' && i <= 'F') { // Cap A-F convert to 10 - 15 return i - 'A' + 10; } else if (i >= 'a' && i <= 'f') { // Small A-F convert to 10 - 15 return i - 'a' + 10; } return -1; // Return -1 if i was not a hex digit! } // convertHexByte() // Returns an integer value for a hex string representing a byte. unsigned char FilterChainUrlDecode::convertHexByte(unsigned char* x) { unsigned char working = convertHexDigit(x[1]); // Convert the low order nybl. working = working + (16 * convertHexDigit(x[0])); // Convert the high order nybl. return working; // Return the result. } unsigned char FilterChainUrlDecode::GetD2() { // Get the second digit. unsigned char c = FilterChain::GetByte(); // Read the raw byte. AddToBfr(c); // Add it to the buffer. // At this point the end of our DecodeBfr has a c_str of a small hex integer (we hope) // that we can decode. If we successfully decode it then we will replace %xx in our // DecodeBfr with the character that is represented by that byte. // Do we really have an encoded byte to decode? int codepos = DecodeLength-3; // Grab the position of the hex. if( DecodeBfr[codepos]=='%' && // If the first char is % isHexDigit(DecodeBfr[codepos+1]) && // and the second is a hex digit isHexDigit(DecodeBfr[codepos+2]) // and the third is a hex digit ){ // then we can decode the string. unsigned char q = convertHexByte(DecodeBfr+codepos+1); // Decode the byte. if(q >= 32) { // If the byte is in range then DecodeBfr[codepos] = q; // Replace the % with the byte DecodeBfr[--DecodeLength] = 0; // backup over and erase the hex DecodeBfr[--DecodeLength] = 0; // digits themselves. DecodeFlag = true; // Set the decode flag. } // If we decided the byte was not decodable for some reason then the original data // remains in the buffer as it was originally read. } Internal = &FilterChainUrlDecode::Root; // Get ready to decode more. return c; // Always return the byte. } unsigned char FilterChainUrlDecode::Inject() { // Inject the decoded result. if( DecodeBfr[DecodePosition] && // If we've got more bytes DecodePosition < sizeof(DecodeBfr)) { // and we're safely in our buffer return DecodeBfr[DecodePosition++]; // then return the byte and move } // ahead. // Once the buffer is empty we Clear(); // clear out the system, and go Internal = &FilterChainUrlDecode::Bypass; // back to bypass mode. Then return GetByte(); // return the next bypassed byte. } //////////////////////////////////////////////////////////////////////////////// // FilterChainHeaderAnalysis //////////////////////////////////////////////////////////////////////////////// int FilterChainHeaderAnalysis::FollowPattern(char c) { // Follow the pattern. c = tolower(c); // Convert c to lower case. if(c != MatchPattern[MatchIndex]) { // If c doesn't match the pattern return -1; // then return -1 indicating we fell off. } else { // If it did match the pattern then MatchIndex++; // move ahead to the next byte and if(0 == MatchPattern[MatchIndex]) { // take a look. If that's all there was return 0; // then we've finished :-) } } // If we matched and there's more to do return 1; // then we return 1. } unsigned char FilterChainHeaderAnalysis::doSeekNL() { // Looking for a new line. unsigned char c = GetCheckedByte(); // Get the next byte (and check for high bits) if('\n' == c) { // If it was a new line then Mode = &FilterChainHeaderAnalysis::doSeekDispatch; // move on to the next mode } // for the next byte and return c; // return the byte we got. } unsigned char FilterChainHeaderAnalysis::doSeekDispatch() { // Looking at the first char after NL. unsigned char c = GetCheckedByte(); // Get the next byte (and check for high bits) switch(tolower(c)) { // Switch modes based on what this byte is. case '\n': { // If it is a New Line then the headers are Mode = &FilterChainHeaderAnalysis::doEndOfHeaders; // finished - so we set up our EndOfHeaders return GetByte(); // mode and return the next byte from there. break; // The extra NL will be emitted at the end. } case 'r': { // If it is an R as in (R)eceived: SetFollowPattern("eceived:"); // establish the follow pattern and Mode = &FilterChainHeaderAnalysis::doReceived; // switch to doReceived mode. break; } case 'f': { // If it is an F as in (F)rom: SetFollowPattern("rom:"); // establish the follow pattern and Mode = &FilterChainHeaderAnalysis::doFrom; // switch to doFrom mode. break; } case 't': { // If it is an T as in (T)o: SetFollowPattern("o:"); // establish the follow pattern and Mode = &FilterChainHeaderAnalysis::doTo; // switch to doTo mode. break; } case 'c': { // If it is a C as in (C)C: SetFollowPattern("c:"); // establish the follow pattern and Mode = &FilterChainHeaderAnalysis::doCC; // switch to doCC mode. break; } case 'm': { // If it is an M as in (M)essage-id: SetFollowPattern("essage-id:"); // establish the follow pattern and Mode = &FilterChainHeaderAnalysis::doMessageID; // switch to doMessageID mode. break; } case 'd': { // If it is a D as in (D)ate: SetFollowPattern("ate:"); // establish the follow pattern and Mode = &FilterChainHeaderAnalysis::doDate; // switch to doDate mode. break; } case 's': { // If it is an S as in (S)ubject: SetFollowPattern("ubject:"); // establish the follow pattern and Mode = &FilterChainHeaderAnalysis::doSubject; // switch to doSubject mode. break; } default: { // If we don't recognize the byte then Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for a new line. break; } } // Once all of our mode switching is handled return c; // we return the byte we got. } unsigned char FilterChainHeaderAnalysis::doReceived() { // Identifying a Received: header. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag. switch(FollowPattern(c)) { // See if we're still on the path. case -1: { // If we're not on the right tag then Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one. break; } case 0: { // If we've found the end of our tag (match!) Mode = &FilterChainHeaderAnalysis::doFindIP; // start looking for the IP. IPToTest = ""; // Clear the IPToTest buffer. break; } default: { // If we're still following along then break; // keep on keepin' on. } } // Once we know what we're doing we return c; // return the character we got. } unsigned char FilterChainHeaderAnalysis::doFindIP() { // Seeking the [IP] in a Received header. unsigned char c = GetCheckedByte(); // Get a checked byte. switch(c) { case '[': { // If we find the [ then Mode = &FilterChainHeaderAnalysis::doTestIP; // set up to grab and test the IP. break; } case '\n': { // If we come across a newline then Mode = &FilterChainHeaderAnalysis::doSeekNL; // we must be lost so go back to basics. break; } default: { // For anything else we keep on going. break; } } return c; // Return the byte. } //// 20070614 _M Improved IP exctaction from received headers so that if the //// apparent IP contains any unusual bytes (not digits or dots) then the //// attempt is abandoned. unsigned char FilterChainHeaderAnalysis::doTestIP() { // Gets and tests the [IP]. unsigned char c = FilterChain::GetByte(); // Get the next byte. switch(c) { case ']': { // If we come to ] we've got it! IPTester.test(IPToTest, IPTestResult); // Do the test with this IP. if(0 == IPTestResult.length()) { // If the IP test wants us to truncate throw Empty("FilterChainHeaderAnalysis: Truncate"); // the message then throw Empty! } // Otherwise, proceed as per normal... SetOutputBuffer(IPTestResult); // Put the result in the output buffer. Mode = &FilterChainHeaderAnalysis::doInjectIPTestResult; // Set the mode to inject the result. break; // That will start on the next byte. } case '0': // IPs are made of digits and dots. case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '.': { // Capture the IP between [ and ] IPToTest += c; // one byte at a time. break; } default: { // If we find anything else we must be Mode = &FilterChainHeaderAnalysis::doSeekNL; // lost so we go back to the basics. break; } } return c; } unsigned char FilterChainHeaderAnalysis::doFrom() { // Identifying a From: header. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag. switch(FollowPattern(c)) { // See if we're still on the path. case -1: { // If we're not on the right tag then Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one. break; } case 0: { // If we've found the end of our tag (match!) Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and FoundFrom = true; // record that this tag was present. break; } default: { // If we're still following along then break; // keep on keepin' on. } } // Once we know what we're doing we return c; // return the character we got. } unsigned char FilterChainHeaderAnalysis::doTo() { // Identifying a To: header. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag. switch(FollowPattern(c)) { // See if we're still on the path. case -1: { // If we're not on the right tag then Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one. break; } case 0: { // If we've found the end of our tag (match!) Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and FoundTo = true; // record that this tag was present. break; } default: { // If we're still following along then break; // keep on keepin' on. } } // Once we know what we're doing we return c; // return the character we got. } unsigned char FilterChainHeaderAnalysis::doCC() { // Identifying a CC: header. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag. switch(FollowPattern(c)) { // See if we're still on the path. case -1: { // If we're not on the right tag then Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one. break; } case 0: { // If we've found the end of our tag (match!) Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and FoundCC = true; // record that this tag was present. break; } default: { // If we're still following along then break; // keep on keepin' on. } } // Once we know what we're doing we return c; // return the character we got. } unsigned char FilterChainHeaderAnalysis::doMessageID() { // Identifying a MessageID header. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag. switch(FollowPattern(c)) { // See if we're still on the path. case -1: { // If we're not on the right tag then Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one. break; } case 0: { // If we've found the end of our tag (match!) Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and FoundMessageID = true; // record that this tag was present. break; } default: { // If we're still following along then break; // keep on keepin' on. } } // Once we know what we're doing we return c; // return the character we got. } unsigned char FilterChainHeaderAnalysis::doDate() { // Identifying a Date: header. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag. switch(FollowPattern(c)) { // See if we're still on the path. case -1: { // If we're not on the right tag then Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one. break; } case 0: { // If we've found the end of our tag (match!) Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and FoundDate = true; // record that this tag was present. break; } default: { // If we're still following along then break; // keep on keepin' on. } } // Once we know what we're doing we return c; // return the character we got. } unsigned char FilterChainHeaderAnalysis::doSubject() { // Identifying a Subject: header. unsigned char c = FilterChain::GetByte(); // Get the next byte of the header tag. switch(FollowPattern(c)) { // See if we're still on the path. case -1: { // If we're not on the right tag then Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to looking for the next one. break; } case 0: { // If we've found the end of our tag (match!) Mode = &FilterChainHeaderAnalysis::doSeekNL; // start looking for the the next tag and FoundSubject = true; // record that this tag was present. break; } default: { // If we're still following along then break; // keep on keepin' on. } } // Once we know what we're doing we return c; // return the character we got. } unsigned char FilterChainHeaderAnalysis::doEndOfHeaders() { // IdentifyEndOfHeaders & Emit Results. // We know we've reached the end of the headers so now // we have to formulate the results we want to inject and // er... inject them. EndOfHeaderResults = "X-SNFHDR: "; // Emit an X header (internal only) if(MissingCC()) { EndOfHeaderResults.append("-CC "); } // Emit -CC if no CC header. if(MissingTo()) { EndOfHeaderResults.append("-TO "); } // Emit -TO if no TO header (together no to) if(MissingFrom()) { EndOfHeaderResults.append("-FROM "); } // Emit -FROM if no FROM header. if(MissingDate()) { EndOfHeaderResults.append("-DATE "); } // Emit -DATE if no DATE header. if(MissingMessageID()) { EndOfHeaderResults.append("-MESSAGEID "); } // Emit -MESSAGEID if no MESSAGE-ID header. if(MissingSubject()) { EndOfHeaderResults.append("-SUBJECT "); } // Emit -SUBJECT if no SUBJECT header. if(HighBitCharacters()) { EndOfHeaderResults.append("+HIGHBIT"); } // Emit +HIGHBIT if non-ascii chars present. EndOfHeaderResults.append("\n\n"); // Emit the double newline - end of headers. SetOutputBuffer(EndOfHeaderResults); // Setup the output string. Mode = &FilterChainHeaderAnalysis::doInjectAnalysis; // Switch to the output injection mode. return GetByte(); // Return the first byte from there :-) } void FilterChainHeaderAnalysis::SetOutputBuffer(string& s) { // Setup the OutputBuffer. OutputBuffer = (char*) s.c_str(); OutputIndex = 0; // Capture the c_str and reset the index. } unsigned char FilterChainHeaderAnalysis::doInjectIPTestResult() { // Inject OutputBuffer and go to doSeekNL. unsigned char c = OutputBuffer[OutputIndex++]; // Get the next byte in the output buffer. if(0 == c) { // If it is the null terminator then we Mode = &FilterChainHeaderAnalysis::doSeekNL; // go back to seeking lines and return that return GetByte(); // byte instead. } // If we have a normal byte then we return c; // return it. } unsigned char FilterChainHeaderAnalysis::doInjectAnalysis() { // Inject OutputBuffer and go to doOff. unsigned char c = OutputBuffer[OutputIndex++]; // Get the next byte in the output buffer. if(0 == c) { // If it is the null terminator then we Mode = &FilterChainHeaderAnalysis::doOff; // go back to seeking lines and return that return GetByte(); // byte instead. } // If we have a normal byte then we return c; // return it. }