madscientist
/
SNFMulti


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765
							// FilterChain.hpp
//
// (C) 2002-2009 MicroNeil Research Corporation
//
// This is the base class header for FilterChain objects.
// FilterChain objects can be chained together to filter
// a byte stream. Each object produces a single character
// per call. It will also call it's source object for the
// next character as required.

// History...

// 20060822 _M
// Adding FilterChainHeaderAnalysis to identify missing headers and header
// anomalies, and to extract and test IP data.

// 20060127 _M
// Added FilterChainCBFG to accept a buffer of a specific
// length.

// 20041116 _M Added UrlDecode module. The module will repeat a decoded version of
// any anchor tag that it sees which contains decodable %xx bytes. Other anchor
// tags are not repeated.

// 20041116 _M Upgrades to the Defunker module. The module now decodes any HTML
// encoded bytes that could have been normal ascii.

// 20041114 _M Completed basic defunker engine which strips out all HTML and some
// basic &nbsp; encoding.

// 20041113 _M Began heavy upgrades to this module to improve performance and
// provide additional obfuscation removal. This modification will include a move
// from the use of switch(State) mechanisms to the use of function pointers. This
// should save a few cycles on every byte processed.

// 20021025 _M
// Added FilterChainCString to accept a Null Terminated
// String (CString). Except for the input form it operates
// exactly like the FilterChainInput form as modified below.
// This allows WebClay to deliver the message using a buffer
// rather than a file.

// 20021015 _M
// Modified FilterChainInput to eat control characters and
// <CR> bytes so that the input stream "appears" always to
// be terminated in the *nix standard \n. Tabs are also passed
// but all other low bytes are eaten.

// 20020721 _M File Created.

// This is the base class - nothing special happens here
// except defining the basic format of a FilterChain object.
// If this object is instantiated, then it will simply return
// it's source's data, or a stream of '0's if none has been
// defined.

#ifndef _MN_FilterChain
#define _MN_FilterChain

#include <stdexcept>
#include <iostream>
#include <sstream>
#include <string>
#include <cstring>
#include <cstdlib>
#include <cctype>


using namespace std;


// Define parameters for this module.

const static int ScanBufferSize = 128;   // Define the buffer size.

// Define the base class.

class FilterChain {

  private:

    FilterChain* Source;                 // Where we get our data.

  public:

    class BadSource : public invalid_argument {                                 // Bad Source Exception.
      public: BadSource(const string& w):invalid_argument(w){}
    };
    class Empty : public underflow_error {                                      // Empty Exception.
      public: Empty(const string& w):underflow_error(w){}
    };

    virtual unsigned char GetByte() {    // Return either 0
      if(NULL==Source) return 0;         // if we have no source
      else return Source->GetByte();     // otherwise it's byte.
    }

    FilterChain(){Source=NULL;}          // Default Constructor no source.

    // The next constructor throws an error if no source is defined.

    FilterChain(FilterChain* S) {
      if(NULL==S) throw BadSource("FilterChain: NULL source not valid");
      else Source = S;
    }

    virtual ~FilterChain() {}            // Stop Warns about no virt dtor
};

// FilterChainInput
// This version of FilterChain accepts an istream as a source and
// gets a single character from it at each GetByte();

class FilterChainInput : public FilterChain {

  private:

    istream* SourceIstream;

  public:

    // Here we overload the GetByte() function to get a byte
    // from the source stream. This is a litle bit special because
    // we're going to start our filtering process. Since we are
    // filtering text streams for pattern matching systems we will
    // eat any special control characters we get - including <CR>.
    // This helps us standardize on a *nix model for line ends as
    // each line end will be \n. It also gets rid of a lot of junk.

    unsigned char GetByte() {            // Get the next byte.
      char i;                            // Keep it here.

      do{                                // Loop to eat junk.

        SourceIstream->get(i);                                                  // Read the next byte...
        if(!SourceIstream->good())                                              // If something went wrong then
          throw Empty("FilterChain: No more data");                             // throw the empty exception.

        if(i >= ' ') break;              // Send all good bytes right away.
        if(i=='\n' || i=='\t') break;    // If we hit a \n or \t send it.
                                         // Otherwise quietly eat anything
      } while(true);                     // less than a space.

      return i;                          // Return the latest byte...
    }

    // Here we overload the constructor to accept a stream.

    FilterChainInput(istream* S){                                               // Build me with a stream.
      if(NULL==S) throw BadSource("FilterChainInput: Null source not valid" );  // If it's NULL that's bad.
      if(!S->good()) throw BadSource("FilterChainInput: Bad istream");          // Not good is bad.
      else SourceIstream = S;                                                   // If it's good we keep it.
    }

    FilterChainInput() {                                                        // If we don't have a source then
      throw BadSource("FilterChainInput: Source required");                     // we're no good.
    }
};

// FilterChainCString
// This version sources the data for the chain from a message buffer, or
// more precisely a null terminated string. The basic operation is identical
// to that of FilterChainInput above except that we're not working with
// a filestream as an input.

class FilterChainCString : public FilterChain {

  private:

    unsigned char* InputBuffer;
    int BufferIndex;

  public:

    // Here we overload GetByte() just like we do in FilterChainInput
    // except that we're going to get our data from a NULL terminated
    // string instead of a stream. IN FACT ... the code below was simply
    // copied from FilterChainInput and modified in place.

    unsigned char GetByte() {                                                   // Get the next byte.
      unsigned char i;                                                          // Keep it here.

      do{                                                                       // Loop to eat junk.

        i = InputBuffer[BufferIndex++];                                         // Read the next byte...
        if(0 == i)                                                              // If there's nothing left then
          throw Empty("FilterChainCString: No more data");                      // throw the empty exception.

        if(i >= ' ') break;                                                     // Send all good bytes right away.
        if(i=='\n' || i=='\t') break;                                           // If we hit a \n or \t send it.
                                                                                // Otherwise quietly eat anything
      } while(true);                                                            // less than a space.

      return i;                                                                 // Return the latest byte...
    }

    // Here we overload the constructor to accept a stream.

    FilterChainCString(unsigned char* S){                                         // Build me with a char buffer.
      if(NULL==S) throw BadSource("FilterChainCString: NULL source not valid");   // If it's NULL that's bad.
      if(0==S[0]) throw BadSource("FilterChainCString: Empty source not valid");  // Empty is bad.
      else InputBuffer = S;                                                     // If it's good we keep it.
      BufferIndex = 0;                                                          // Always start at index 0.
    }

    FilterChainCString() {                                                      // If we don't have a source then
      throw BadSource("FilterChainCString: Source required");                   // we're no good.
    }
};

// FilterChainCBFR
// This version sources the data for the chain from a message buffer, NOT
// a null terminated string. The basic operation is identical to FilterChainCString
// except that this version requires the length of the buffer and stops when that
// number of characters have been read.

class FilterChainCBFR : public FilterChain {

  private:

    unsigned char* InputBuffer;
    unsigned int BufferLength;
    unsigned int BufferIndex;

    stringstream& PrependedHeaders;

    bool PrependNotBuffer;

  public:

    // Here we overload GetByte() just like we do in FilterChainInput
    // except that we're going to get our data from a known length char
    // buffer instead of a stream. IN FACT ... the code below was simply
    // copied from FilterChainCString and modified in place.

    unsigned char GetByte() {                                                   // Get the next byte.
      unsigned char i;                                                          // Keep it here.

      if(PrependNotBuffer) {                                                    // While in prepend mode:

        if(BufferIndex < PrependedHeaders.str().length()) {                     // If there is more to get
          i = PrependedHeaders.str().at(BufferIndex);                           // then get it and move
          ++BufferIndex;                                                        // the index.
        } else {                                                                // As soon as we run out
          PrependNotBuffer = false;                                             // of prepended headers switch
          BufferIndex = 0;                                                      // to the CBFR and reset the index.
          return GetByte();                                                     // Recurse to get the next byte.
        }

      } else {                                                                  // While in buffer mode:

        do{                                                                     // Loop to eat junk.
          if(BufferLength <= BufferIndex)                                       // If there's nothing left then
            throw Empty("FilterChainCBFR: No more data");                       // throw the empty exception.

          i = InputBuffer[BufferIndex++];                                       // Read the next byte...

          if(i >= ' ') break;                                                   // Send all good bytes right away.
          if(i=='\n' || i=='\t') break;                                         // If we hit a \n or \t send it.
                                                                                // Otherwise quietly eat anything
        } while(true);                                                          // less than a space.
      }

      return i;                                                                 // Return the latest byte...
    }

    // Here we overload the constructor to accept a stream.

    FilterChainCBFR(unsigned char* S, int l, stringstream& P) :                 // Give me a bfr and a stringstream.
      InputBuffer(S),                                                           // Grab the buffer,
      BufferLength(l),                                                          // Grab the buffer length,
      BufferIndex(0),                                                           // Initialize the index to 0,
      PrependedHeaders(P),                                                      // Grab the PrependedHeaders reference.
      PrependNotBuffer(true) {                                                  // Do PrependedHeaders first.

        if(NULL==S) throw BadSource("FilterChainCBFR: NULL source not valid");  // If it's NULL that's bad.
        if(0==l && 0==P.str().length())
          throw BadSource("FilterChainCBFR: Empty source not valid");           // Empty is bad.
    }

};

// FilterChainBase64
// This version decodes base64 content in email messages. It begins
// to decode this as soon as it sees the following message and two
// blank lines indicating the coding has started.
//
// Content-Transfer-Encoding: base64
//
// Once it sees a bad character or what appears to be the start of
// a new MIME segment, the filter turns off and passes through it's
// source data.

// The startup string for this filter is below. In this case we keep the
// <LF> part of the string to ensure we will be looking at the start
// of a line when we match.

const static unsigned char Base64Start[] = "\nContent-Transfer-Encoding: base64";

// The following table makes conversion fast because it's all lookups. The
// special value XX64 is used everywhere a bad byte is found in the table.

const static unsigned char XX64 = 0xFF;

// Note the special case '=' is used for pad. It is given the value 0x00.

// The input to this table is the incoming byte. The output is either XX64
// or a valid base64 numerical value.

const static unsigned char Base64Table[256] = {

  // 0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F

  XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,  // 0
  XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,  // 1
  XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,0x3E,XX64,XX64,XX64,0x3F,  // 2
  0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,XX64,XX64,XX64,0x00,XX64,XX64,  // 3
  XX64,0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,  // 4
  0x0F,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,XX64,XX64,XX64,XX64,XX64,  // 5
  XX64,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,  // 6
  0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,0x30,0x31,0x32,0x33,XX64,XX64,XX64,XX64,XX64,  // 7
  XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,  // 8
  XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,  // 9
  XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,  // A
  XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,  // B
  XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,  // C
  XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,  // D
  XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,  // E
  XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64,XX64   // F
};

// The following constants are used to find segment positions when converting from
// 4 six bit values to 3 octets.

const static unsigned char base64_seg0_shift = 18;
const static unsigned char base64_seg1_shift = 12;
const static unsigned char base64_seg2_shift =  6;
const static unsigned char base64_seg3_shift =  0;

class FilterChainBase64 : public FilterChain {

  private:

    unsigned char x,y;                   // We need a few holding bins.
    unsigned int Workspace;              // Numerical workspace for conversion.

    enum FilterState {                   // Operating State Codes.
      SCANNING,                          // One-in = One-out, looking for startup.
      DEQUEING,                          // Delivering buffered data.
      DECODING                           // Delivering filtered data.
    } State;

    unsigned int ScanIx;                 // Scanning Index.
    unsigned int DequeIx;                // Dequeing Index.
    unsigned char Buffer;                // Define a buffer.

    bool ValidByte(unsigned char y);     // True if y can be decoded.

  public:

    unsigned char GetByte();             // Overload the main fn().

    FilterChainBase64(FilterChain* S)    // Sourced constructor...
      :FilterChain(S){                   // Call the base constructor.
      State = SCANNING;                  // Set filter inactive.
      ScanIx=DequeIx=0;                  // Reset our indexes.
    }                                    // We're all ready to start.

    FilterChainBase64() {                                                       // Don't allow any
      throw BadSource("FilterChainBase64: Source required");                    // null constructors.
    }

};

// FilterChainQuotedPrintable
// This version decodes quoted-printable content in email messages.
//
// For simplicity this one is always on. That is, whenever it sees a
// convertable quoted printable byte it will exchange it for the byte
// that is represented. This is only intended for operation preceeding the
// spam filter engine so it is safe to make these conversions.

class FilterChainQuotedPrintable : public FilterChain {

  private:

    long int Workspace;                  // Plain Text Workspace.
    enum FilterState {                   // Operating State Codes
      SCANNING,                          // One-in = One-out - looking for start-up.
      DEQUEING,                          // Delivering buffered data.
      DECODING                           // Delivering filtered data.
    } State;

    int BufferLength;                      // How full is the buffer.
    int BufferIndex;                       // What byte are we on?
    unsigned char Buffer[ScanBufferSize];  // Define the buffer.

    bool isHexDigit(unsigned char i);      // true if i is a hex digit byte.
    int convertHexDigit(unsigned char i);  // returns integer value of hex digit i.

  public:

    unsigned char GetByte();                     // Overload the main fn().

    FilterChainQuotedPrintable(FilterChain* S)   // Sourced constructor...
      :FilterChain(S){                           // Call the base constructor.
      State = SCANNING;                          // Set to the initial state.
      BufferIndex = 0;                           // Initial buffer index.
      BufferLength = 0;                          // Initial buffer length.
      Workspace = 0;                             // Clear the workspace.
    }

    FilterChainQuotedPrintable() {                                              // Don't allow any
      throw BadSource("FilterChainQuotedPrintable: Source required");           // null constructors.
    }

};


// FilterChainDefunker
// This module stores a copy of the stream containing HTML and then emits it
// at the end of the stream with all of the html elements removed and/or decoded
// to eliminate html based obfuscation.

class FilterChainDefunker;

static const int DefunkerSize = 32768;                         // Store size.
static const int DefunkerQueueSize = 24;                       // Size of defunker queue.

class FilterChainDefunker : public FilterChain {               // Class definition.

  private:

    // Occasionally when parsing a chunk of data we must return nothing and
    // instead try again for the next character. Instead of resursing we can
    // set this flag and the root state will simply try again in a loop.

    bool ReturnNothing;                         // Set true to skip this round;

    // Storeage

    unsigned char StoreBuffer[DefunkerSize];
    int InputPosition;
    int OutputPosition;

    // Nodes in the state change model are represented by functions.
    // These modes represent the state prior to getting the Empty exception.
    // During this mode, the Defunker simply stores a portion of the message
    // to be scanned later.

    unsigned char LastRawByte;                   // Last Raw Byte (for SkipHeaders);
    unsigned char SkipHeaders();                 // Skips the headers before Store();
    unsigned char Store();                       // Stores the message content for later.

    // Here is a handy Queue mechanism for recovering failed patterns.

    int QueueLength;                             // Queue Length (write position).
    int QueuePosition;                           // Queue Read Position.
    unsigned char Qbfr[DefunkerQueueSize];       // Queue Buffer.

    void ClearQueue() {                          // Clear the queue.
      memset(Qbfr,0,sizeof(Qbfr));               // Reset the buffer.
      QueueLength = 0;                           // Zero the length.
      QueuePosition = 0;                         // Zero the position.
    }

    unsigned char DeQueue() {                        // Empty the queue then back to DefunkRoot.
      if(QueuePosition >= QueueLength) {             // If the queue is empty then
        ClearQueue();                                // clear the queue,
        Internal = &FilterChainDefunker::DefunkRoot; // go back to DefunkRoot mode,
        return GetInternal();                        // and return the next byte.
      }                                              // If the queue is not empty then
      return Qbfr[QueuePosition++];                  // return the next byte from the queue.
    }

    void EnQueue(unsigned char x) {              // Add a byte to the queue.
      if(QueueLength<DefunkerQueueSize)          // If we are safely within the buffer
        Qbfr[QueueLength++] = x;                 // then add this byte to the queue.
    }

    // These modes represent the Defunker pulling data out of it's
    // stored copy so that it can be filtered and delivered to the scanner.
    // These modes get turned on once the Empty exception is read from
    // the underlying source.

    unsigned char Preamble();                    // Preamble - separates Defunked text.
    unsigned char DefunkRoot();                  // Root in Defunk mode.
    unsigned char OpenTag();                     // Open tag detected.
    unsigned char OpenAmp();                     // Open & tag.
    unsigned char MatchBR();                     // Matching <br>
    unsigned char MatchP();                      // Matching <p>
    unsigned char MatchNBSP();                   // Matching &nbps;
    unsigned char SwitchAMPAPOS();               // Looking for AMP or APOS.
    unsigned char MatchAMP();                    // Matching &amp;
    unsigned char MatchAPOS();                   // Matching &apos;
    unsigned char MatchLT();                     // Matching &lt;
    unsigned char MatchGT();                     // Matching &gt;
    unsigned char MatchQUOT();                   // Matching &quot;
    unsigned char EatTag();                      // Eating an unknown tag.
    unsigned char DecodeNum();                   // Decoding &#...number...;

    // Part of defunking is to convert all runs of whitespace into a single space.
    // It also doubles as the master output function once we're out of Store() mode.

    unsigned char SpaceConvChart[256];           // Space conversion chart.
    unsigned char LastReadOut;                   // Last ReadOut byte (for deduping spaces).
    unsigned char ReadOut();                     // Read out the store through the filter.

    unsigned char LastGetStore;                  // Last GetStore byte (for EatTag).
    unsigned char GetStore();                    // Read a byte from the store.

    // Here is a handy pattern match function for eliminating some tags.

    bool MatchTagPattern(const char* pattern) {  // Matches pattern. True if matched.
      int pos = 2;                               // Now on the third byte (index 2).
      while(pattern[pos]){                       // While we have more bytes to match
        unsigned char x = GetStore();            // grab the next byte.

        // Special case - HTML tag with a space as in <p stuff>

        if(x==' ' && pattern[pos]=='>') {        // If we have a tag with parameters.
          pos++;                                 // Move pos forward to it's null.
          while(GetStore()!='>')continue;        // Eat up to the > and then
          break;                                 // we are done.
        }

        // In the normal case follow the pattern.

        if(tolower(x)!=pattern[pos]) break;      // If we fell off then stop.
        pos++;                                   // If we didn't break move ahead.
      }

      // At this point we are either at the null in our pattern or we did not match.

      if(pattern[pos]) { return false; }         // If we're not at the end then no match.

      return true;                               // Otherwise we do have a match :-)
    }

    // These are the function pointers that map the current state of this object.

    unsigned char (FilterChainDefunker::*Master)();   // Master function for GetByte()
    unsigned char (FilterChainDefunker::*Internal)(); // Internal function for GetByte()

  public:

    unsigned char GetByte() {                      // Overload the main fn().
      return (*this.*Master)();                    // Call the master function.
    }

    unsigned char GetInternal() {                  // Internal state machine get.
      return (*this.*Internal)();                  // Call the internal function.
    }

    FilterChainDefunker(FilterChain* S)            // Sourced constructor...
      :FilterChain(S),                             // Call the base constructor.
       InputPosition(0),                           // Reset both position pointers.
       OutputPosition(0),
       LastRawByte(0),
       LastReadOut(0),
       LastGetStore(0),
       Master(&FilterChainDefunker::SkipHeaders),  // Set the initial external and
       Internal(&FilterChainDefunker::Preamble) {  // internal states.

       ClearQueue();                               // Clear the queue;

       memset(StoreBuffer,0,sizeof(StoreBuffer));  // Clear the store buffer.

       for(int i=0;i<256;i++) SpaceConvChart[i]=i; // Initialize the chart.
       SpaceConvChart[(int)'\r']=' ';              // Convert <CR> to space.
       SpaceConvChart[(int)'\n']=' ';              // Convert <LF> to space.
       SpaceConvChart[(int)'\t']=' ';              // Convert Tab to space.
    }

    FilterChainDefunker() {                                                     // Don't allow any
      throw BadSource("FilterChainDefunker: Source required");                  // null constructors.
    }

};

// FilterChainUrlDecode
// This module removes any unnecessary URL encoding within an <a...> tag. The
// cleaned up version (if different) is emitted immediately after the original
// <a...> tag so that both versions can be interpreted by the pattern scanner.
// This is designed to eliminate common obfuscation techniques.

const int UrlDecodeBfrSize = 256;                  // Decode Buffer Size.

class FilterChainUrlDecode : public FilterChain {

  private:

    unsigned char DecodeBfr[UrlDecodeBfrSize];     // Decoded anchor buffer.
    unsigned int DecodeLength;                     // Decoded anchor length.
    unsigned int DecodePosition;                   // Read (Inject) Position.
    bool DecodeFlag;                               // True if the URL was decoded.

    void Clear() {                                 // Function to clear the bfr.
      memset(DecodeBfr,0,sizeof(DecodeBfr));       // Null it out and set
      DecodeLength = 0;                            // the length to zero.
      DecodePosition = 0;                          // Reset the Read position.
      DecodeFlag = false;                          // Reset the Decode Flag.
    }

    void AddToBfr(unsigned char c) {               // Safely add to our buffer.
      if(DecodeLength < sizeof(DecodeBfr)-1)       // If we have more room then
        DecodeBfr[DecodeLength++] = c;             // write the incoming byte.
    }

    unsigned char (FilterChainUrlDecode::*Internal)(); // Internal State Fn

    bool isHexDigit(unsigned char i);                  // Is i a hex digit?
    int convertHexDigit(unsigned char i);              // Convert a single hex digit.
    unsigned char convertHexByte(unsigned char* x);    // Convert a hex byte.

    // Here are the states of the UrlDecode module...

    unsigned char Bypass();                        // Bypass - waiting for '<'
    unsigned char Tag();                           // Looks for an 'a' or 'i' after '<'
    unsigned char Img1();                          // Looks for 'm' in <img
    unsigned char Img2();                          // Looks for 'g' in <img
    unsigned char Root();                          // Root state of the decode FSM.
    unsigned char GetD1();                         // Decoding step one.
    unsigned char GetD2();                         // Decoding step two.
    unsigned char Inject();                        // Injects the bfr into the stream.

  public:

    unsigned char GetByte() {                    // Overload the main fn().
      return (*this.*Internal)();                // Call the Internal function.
    }

    FilterChainUrlDecode(FilterChain* S)         // Sourced constructor...
      :FilterChain(S),                           // Call the base constructor.
      Internal(&FilterChainUrlDecode::Bypass) {  // Set ByPass mode.
        Clear();                                 // Clear the system.
      }

    FilterChainUrlDecode() {                                                    // Don't allow any
      throw BadSource("FilterChainUrlDecode: Source required");                 // null constructors.
    }

};

// FilterChainHeaderAnalysis (and friends)
// Performs header anomaly analysis and IP extraction and analysis.
// IP Analysis is peformed via a provided class that implements the IPTester
// interface. An IP is provided to the IPTester as a [#.#.#.#] string. The
// IPTester may respond with information to be emitted into the headers for
// the pattern matching engine based on those results --- or not ;-)

class FilterChainIPTester {
  public:
    virtual string& test(string& input, string& output) = 0;
};

// The supplied test() function accepts the input string and returns the
// output string. If desired, the output string can be modified to include
// data from the tests that will be emitted into the data stream for the
// pattern analysis engine to see. Otherwise, the output string should
// remain blank. The test() function _should_ be thread safe -- that is why
// we pass it both input and output ;-)
//
// The provided tester may have any side-effects that are desired.

class FilterChainHeaderAnalysis : public FilterChain {

  private:

    unsigned char (FilterChainHeaderAnalysis::*Mode)();                         // Internal State Fn Pointer (What Mode)
    FilterChainIPTester& IPTester;                                              // This is the IP tester we use.
    string IPToTest;                                                            // String to capture IPs for testing.
    string IPTestResult;                                                        // String to receive IPtest results.

    // Header analysis output state...

    string EndOfHeaderResults;                                                  // String to capture EndOfHeaderResults.

    // OutputIndex and OutputLength are used to inject string data.
    // These are used to inject IPTestResult data and Header Analysis data.

    char* OutputBuffer;                                                         // Pointer to output injection string.
    int OutputIndex;                                                            // End of header output results index.
    void SetOutputBuffer(string& s);                                            // Setup the OutputBuffer.
    unsigned char doInjectIPTestResult();                                       // Inject OutputBuffer and go to doSeekNL.
    unsigned char doInjectAnalysis();                                           // Inject OutputBuffer and go to doOff.

    // Header seek pattern state...
    // These tools work to follow patterns for header tags.
    // SetFollowPattern resets the engine and establishes the pattern to follow.
    // FollowPattern checks c against the next byte in the pattern.
    // -1 = The pattern failed.
    //  1 = The pattern was followed.
    //  0 = The pattern is complete.

    const char* MatchPattern;                                                   // Current pattern to match.
    int MatchIndex;                                                             // Pattern match following index.
    void SetFollowPattern(const char* p) { MatchPattern = p; MatchIndex = 0; }  // Set the pattern to follow.
    int FollowPattern(char c);                                                  // Follow the pattern.

    //// Internal modes for this module...

    unsigned char doSeekNL();                                                   // Looking for a new line.
    unsigned char doSeekDispatch();                                             // Looking at the first char after NL.
    unsigned char doReceived();                                                 // Identifying a Received: header.
    unsigned char doFindIP();                                                   // Seeking the [IP] in a Received header.
    unsigned char doTestIP();                                                   // Gets and tests the [IP].
    unsigned char doFrom();                                                     // Identifying a From: header.
    unsigned char doTo();                                                       // Identifying a To: header.
    unsigned char doCC();                                                       // Identifying a CC: header.
    unsigned char doMessageID();                                                // Identifying a MessageID header.
    unsigned char doDate();                                                     // Identifying a Date: header.
    unsigned char doSubject();                                                  // Identifying a Subject: header.
    unsigned char doEndOfHeaders();                                             // IdentifyEndOfHeaders & Emit Results.

    unsigned char doOff() { return FilterChain::GetByte(); }                    // Bypass mode.

    bool FoundFrom;                                                             // True if From: was found.
    bool FoundTo;                                                               // True if To: was found.
    bool FoundCC;                                                               // True if CC: was found.
    bool FoundMessageID;                                                        // True if Message-ID: was found.
    bool FoundDate;                                                             // True if Date: was found.
    bool FoundSubject;                                                          // True if Subject: was found.
    bool FoundHighBitCharacters;                                                // True if high bit characters were found.

    unsigned char GetCheckedByte() {                                            // Internal GetByte & check for high bits.
        unsigned char x = FilterChain::GetByte();                               // Get the byte from up the chain.
        if(0 < (x & 0x80)) {                                                    // Check for a high bit byte (non-ascii).
            FoundHighBitCharacters = true;                                      // If it is found then set the flag.
        }                                                                       // If not then at least we checked ;-)
        return x;                                                               // Return the byte.
    }

  public:

    unsigned char GetByte() {                                                   // Overload the main fn().
        return (*this.*Mode)();                                                 // Call the Internal function for this mode.
    }

    FilterChainHeaderAnalysis(FilterChain* S, FilterChainIPTester& T) :         // Construct with the chain and a tester.
      FilterChain(S),                                                           // Capture the chain.
      Mode(&FilterChainHeaderAnalysis::doSeekDispatch),                         // Start in SeekDispatch() mode
      IPTester(T),                                                              // Capture the tester.
      IPToTest(""),                                                             // IPToTest and
      IPTestResult(""),                                                         // IPTestResult are both empty to start.
      FoundFrom(false),                                                         // Set all of the "found" bits to false.
      FoundTo(false),
      FoundCC(false),
      FoundMessageID(false),
      FoundDate(false),
      FoundSubject(false),
      FoundHighBitCharacters(false) {
    }                                                                           // -- first byte of a new line ;-)

    bool MissingFrom() { return (!FoundFrom); }                                 // True if missing From header.
    bool MissingTo() { return (!FoundTo); }                                     // True if missing To header.
    bool MissingCC() { return (!FoundCC); }                                     // True if missing CC header.
    bool MissingSubject() { return (!FoundSubject); }                           // True if missing Subject header.
    bool MissingDate() { return (!FoundDate); }                                 // True if missing Date header.
    bool MissingMessageID() { return (!FoundDate); }                            // True if missing MessageID header.
    bool HighBitCharacters() { return (FoundHighBitCharacters); }               // True if High bit characters were found.

};

#endif