madscientist
/
SNFMulti

// snf_engine.hpp
//
// (C) 1985-2004 MicroNeil Research Corporation
// (C) 2005-2009 ARM Research Labs, LLC.
//
// Derived from original work on cellular automation for complex pattern
// reflex engine 1985 Pete McNeil (Madscientist)
//
// Derived from rapid scripting engine (token matrix) implementation 1987
//

// This is the header file for the sniffer pattern matching engine.

// 20080305 _M - Added FlipEndian() function to convert rulebases from their
// native little-endian format to big-endian format for CPUs that need it. See
// additional work in SNFMulti to call the FlipEndian() function AFTER the
// rulebase has been authenticated but before it is put into use.

// 20070606 _M - Refactored exceptions to use base std::exception and improved
// the evaluator code to reduce the strength of safety testing from 3 compares
// per byte to 1.

// 20060531 _M - Added evaluator caching to save a few cycles by not allocating
// new memory and performing a complete initialization of an evaluator if there
// is already one handy from a previous use.

// 20021030 _M - Created.

#ifndef _MN_SNF_ENGINE
#define _MN_SNF_ENGINE

#include <cassert>
#include <stdexcept>
#include <unistd.h>
#include <cstdio>
#include <cctype>
#include <ctime>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
#include <exception>
#include "../CodeDweller/mangler.hpp"
//#include "../nvwa-0.6/nvwa/debug_new.h"

using namespace std;

// 20030929 _M SYMBOL_RANGE moved to snf_engine.hpp as part of augmenting the
// capability of a match record. Match records now can decode themselves.

const int SYMBOL_RANGE = 256;       // Symbol result coding modulator.

// Let's create our utility classes and structures.

// The Token class.
// This class represents the structure of a token. The rule file is, in fact,
// a token matrix. Tokens within the matrix allow the sniffer to navigate through
// a state change matrix attempting to locate special positions that indicate the
// termination of a path, or more specifically, the recognition of a string that
// has been evaluated along that path.
//
// IT IS IMPORTANT TO NOTE THAT AS THESE PROGRAMS ARE WRITTEN IT ASSUMES WE ARE IN
// A 32 BIT INTEL ENVIRONMENT SO THAT THE TOKEN MATRIX CAN BE LOADED IN A SINGLE PASS
// USING A BINARY INPUT STREAM.

////////////////////////////////////////////////////////////////////////////////////////
// Token Declaration ///////////////////////////////////////////////////////////////////

class Token { // Token class for defining and interpreting nodes within the matrix.

  public: // Beginning of Public stuff.

    int Check;                                                                  // The first int is a check character.
    int Vector;                                                                 // The second int is a vector.

    // isUnused() Returns true if the token is in an unused state.

    int isUnused() {
      return (Check==-1 && Vector==0) ? true : false;
    }

    // isTermination() Returns true if the token is in a termination state.

    int isTermination() {
      if(Check==0 && Vector > 0)
        return true;
      else
        return false;
    }

    // Symbol() Returns the symbol value for the token.

    int Symbol() { return Vector; }

    // Character() Returns the check character for this token.

    int Character() { return Check; }

  // End of Public stuff.
  // Note that no constructor is needed because the default constructor will do nicely.

};

////////////////////////////////////////////////////////////////////////////////////////
// Token Matrix Declaration ////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////
//
// The Token Matrix loads, verifies, and maintains an array of tokens for the evaluators
// to live in. This class provides safe access to the token matrix.
//
////////////////////////////////////////////////////////////////////////////////////////

class TokenMatrix {

  private:

    Token* Matrix;                                                              // Where we hold the token matrix.
    int MatrixSize;                                                             // What size is the matrix.

  public:

    // Exceptions...

    class BadAllocation : public runtime_error {                                // Exception for a bad memory allocation.
        public: BadAllocation(const string& w):runtime_error(w) {}
    };
    class BadMatrix : public runtime_error {                                    // Exception for invalid matrix loads.
        public: BadMatrix(const string& w):runtime_error(w) {}
    };
    class BadFile : public runtime_error {                                      // Exception for missing rulebase files.
        public: BadFile(const string& w):runtime_error(w) {}
    };
    class OutOfRange : public runtime_error {                                   // Exception for indexes out of range.
        public: OutOfRange(const string& w):runtime_error(w) {}
    };

    // Standards...

    static const int SecuritySegmentSize = 1024;                                // File Authentication Segment
    static const int SecurityKeyBufferSize = 32;                                // Security Key Pad Block Size
    static const int RulebaseDigestSize = 64;                                   // Number of bytes in digest.

    static const int MinimumValidMatrix =                                       // Establish the smallest valid
      SecuritySegmentSize * 2 / SecurityKeyBufferSize;                          // matrix size

    // The first interface component checks the range and gives up the token.

    Token at(int x) {                                                           // Get the token at x
      if(x<0 || x>=MatrixSize)                                                  // Check to see if we're in bounds.
        throw OutOfRange("(x<0 || x>=MatrixSize)");                             // If we're not then throw an exception.
      return Matrix[x];                                                         // If we are then give it to them.
    }

    // The second interface component delivers the Matrix if it's valid so that other
    // code can manipulate it more efficiently (without constantly checking bounds.

    Token* getMatrix() {                                                        // Return the matrix.
      if(MatrixSize==0 || Matrix==NULL)                                         // If the matrix isn't ready then
        throw BadMatrix("(MatrixSize==0 || Matrix==NULL)");                     // throw an exception. If it is
      return Matrix;                                                            // ready then send it out.
    }

    // For simplicity we simply extend the underlying Token functions by taking a
    // position reference, checking it's range, and returning the result.

    int isUnused(int x) {                                                       // Extend Token.isUnused()
      return at(x).isUnused();
    }

    int isTermination(int x) {                                                  // Extend Token.isTermination()
      return at(x).isTermination();
    }

    int Symbol(int x) {                                                         // Exetend Token.Symbol()
      return at(x).Symbol();
    }

    int Character(int x) {                                                      // Extend Token.Character()
      return at(x).Character();
    }

    // Utility functions...

    int Size() { return MatrixSize; }                                           // Returns the size of the matrix.

    void Load(const char* FileName);                                            // Loads the matrix from a file name.

    void Load(string& FileName);                                                // Loads the matrix from a file name string.

    void Load(ifstream& F);                                                     // Loads the token matrix from the file.

    void Validate(string& SecurityKey);                                         // Validates the matrix with a key string.

    void Verify(string& SecurityKey);                                           // Verifies the matrix digest.

    void FlipEndian();                                                          // Converts big/little endian tokens.

    // Constructors...

    TokenMatrix() :
      Matrix(NULL),
      MatrixSize(0) { }

    TokenMatrix(ifstream& F) :
      Matrix(NULL),
      MatrixSize(0) {
      Load(F);
    }

    ~TokenMatrix() {                                                            // The Distructor...
      MatrixSize = 0;                                                           // Set the size to zero.
      if(Matrix) { delete [] Matrix; Matrix = NULL; }                           // If we have a matrix, remove it.
    }

};

/////////////////////////////////////////////////////////////////////////////////////////
// End Token Work ///////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////////////////

// Having defined the token matrix, I now define the Evaluator class which
// be used to follow any matching rule threads as the program scans a a file.
// A new evaluator is started at each position in the input stream making all
// of the rules in the token matrix global.

// The following two values are returned by the Evaluator at every step.

const int WILD_WHITESPACE = 1; // Token code for whitespace wildcards.
const int WILD_DIGIT = 2;      // Token code for digit wildcards.
const int WILD_LETTER = 3;     // Token code for letter wildcards.
const int WILD_NONWHITE = 4;   // Token code for non-whitespace wildcards.
const int WILD_ANYTHING = 5;   // Token code for any character.
const int WILD_INLINE = 6;     // Token code for any character except new line.

const int RUN_GATEWAY = 8;     // Token code for run-loop gateways.

// Here are some tuning parameters

const int MaxWildRunLength = 4096;  // Maximum span of "any number" wildcards.
const int MAX_EVALS = 2048;         // Maximum number of evaluators.

//////////////////////////////////////////////////////////////////////////////////////////
// Evaluators and the Evaluation Matrix
//////////////////////////////////////////////////////////////////////////////////////////

class EvaluationMatrix;         // We've got to pre-declare this for some compilers.

class Evaluator { // Evaluator class for following threads through the matrix.

  private:

    EvaluationMatrix* myEvaluationMatrix;                                       // The evaluation matrix I live in.
    Token* Matrix;                                                              // The raw token matrix I walk in.
    int MatrixSize;                                                             // Size of raw token matrix.

    // 20070606 _M Optimized Evaluator code by reducing the strength of the
    // safety check from 3 comparisons to 1.

    unsigned int PositionLimit;                                                 // Largest CurrentPosition.

    // 20030216 _M Optimization conversions

    inline int i_lower();                                                       // { return myEvaluationMatrix->i_lower; }
    inline bool i_isDigit();                                                    // { return myEvaluationMatrix->i_isDigit; }
    inline bool i_isSpace();                                                    // { return myEvaluationMatrix->i_isSpace; }
    inline bool i_isAlpha();                                                    // { return myEvaluationMatrix->i_isAphpa; }

  public:

    // Standard Values...

    enum States {                                                               // These are the posible coditions.
      OUT_OF_RANGE,                                                             // We're outside the matrix - very bad.
      FALLEN_OFF,                                                               // We've fallen off the path and are lost.
      DOING_OK,                                                                 // We're doing ok and following along.
      TERMINATED                                                                // We've reached the end of our path.
    };

    // Attributes...

    States Condition;                                                           // What state am I in? How's my health?

    Evaluator* NextEvaluator;                                                   // Linked List Pointer.
    unsigned int StreamStartPosition;                                           // Indexes the position where we started.
    unsigned int CurrentPosition;                                               // Indexes the node we are surfing.

    int WildRunLength;                                                          // Wildcard run length so far.

    // EvaluateThis() assumes it is being given the next character along the
    // path of a thread in the token matrix. It follows that thread and evaluates
    // it's condition.

    States EvaluateThis(unsigned short int i);                                  // Follow the next byte.

    // isNoDuplicate() is used to keep us from allocating identical evaluators. This is
    // key to creating buddies when working with wildcards. It prevents us from recursively
    // proliferating evaluators at each new character when running in a wildcard loop.

    bool isNoDuplicate(unsigned int Position) {                                 // Returns false if there is a duplicate.
      if(CurrentPosition == Position)                                           // Obviously, if I match, then there's a dup.
        return false;
                                                                                // If I don't match and I'm the last one then
      if(NextEvaluator==NULL)                                                   // it must be true there are no dups. If there
        return true;                                                            // are more to ask then I'll let them answer.
      else
        return NextEvaluator->isNoDuplicate(Position);
    }

    Evaluator(unsigned int s, EvaluationMatrix* m);                             // Constructor...

    ~Evaluator(){
      if(NextEvaluator!=NULL){                                                  // If there's more to this list then
        delete NextEvaluator;                                                   // delete it.
      }
      NextEvaluator = NULL;                                                     // Always null on exit.
    }

};

// A MatchRecord is created each time a new rule match occurrs. These records form a
// linked list within the Evaluation Matrix that can be spit out after the process is
// over for reporting purposes.

class MatchRecord {
  public:
    int MatchStartPosition;     // Where in the data stream did the match start?
    int MatchEndPosition;       // Where in the data stream did the match end?
    int MatchSymbol;            // What symbol was attached to the match rule?

    inline int RuleId(){return (MatchSymbol/SYMBOL_RANGE);}                     // Decode RuleID
    inline int RuleGroup(){return (MatchSymbol%SYMBOL_RANGE);}                  // Decode GroupID

    MatchRecord* NextMatchRecord;

  MatchRecord(int sp, int ep, int sym) {                                        // When constructing a MatchRecord,
    MatchStartPosition = sp;                                                    // you must provide all of it's data.
    MatchEndPosition = ep;
    MatchSymbol = sym;
                                                                                // Since match records are always added to
    NextMatchRecord = NULL;                                                     // the end our next pointer is always NULL.
  }

  ~MatchRecord(){
    if(NextMatchRecord != NULL)                                                 // If there's more list, then delete it.
      delete NextMatchRecord;
    NextMatchRecord = NULL;                                                     // Clean up our pointer before leaving.
  }
};

// Now that we've created our utility classes, we'll create another class (with an instance)
// that builds a matrix to evaluate all incoming characters, manage the list, and keeps
// statistics and results from the execution process.

class EvaluationMatrix {

  private:

    TokenMatrix* myTokenMatrix;                                                 // Token Matrix that I evaluate with.

    Evaluator* EvaluatorList;                                                   // Linked list of Evaluators.

    Evaluator* CurrentEvaluator;                                                // Current Evaluator (when checking)
    Evaluator* PreviousEvaluator;                                               // Previous Evaluator (when checking)

    // Evaluator Caching Mechanism.

    Evaluator* EvaluatorCache;                                                  // List of cached, ready evaluators.
    Evaluator* SourceEvaluator(int s, EvaluationMatrix* m);                     // Get a cached or new evaluator.
    void CacheEvaluator(Evaluator* e);                                          // Cache a used evaluator.

    int CountOfEvaluators;                                                      // Current count of evaluators.

    int PassResult;                                                             // Result of the latest evaluation pass.

    MatchRecord* LastResultInList;                                              // Keeps track of the end of the result list.

    MatchRecord* AddMatchRecord(int sp, int ep, int sym);                       // Add a match result.

    // DropEvaluator() is called by the EvaluateThis() method whenever an evaluator
    // reports the FALLEN_OFF result. The EvaluateThis() method keeps two values up
    // to date - one is the current evaluator (which will be dropped) and the other is
    // the previous evaluator (which will be updated to heal the list).

    // When we've finished this function, the CurrentEvaluator will be on the next
    // evaluator node if it exists. Therefore, the caller should skip it's normal
    // list itteration code when this function has been called.

    void DropEvaluator();

  public:

    // Exception classes...

    class BadAllocation : public runtime_error {                                // Allocation failed exception.
        public: BadAllocation(const string& w):runtime_error(w) {}
    };
    class MaxEvalsExceeded : public runtime_error {                             // Too many evaluators exception.
        public: MaxEvalsExceeded(const string& w):runtime_error(w) {}
    };
    class OutOfRange : public runtime_error {                                   // Out of range exception.
        public: OutOfRange(const string& w):runtime_error(w) {}
    };

    // Attributes...

    int CountOfCharacters;                                                      // How many characters have been evaluated.
    int MaximumCountOfEvaluators;                                               // Largest matrix size reached.

    MatchRecord* ResultList;                                                    // List of match results.

    int DeepSwitch;                                                             // true if we're doing a deep scans.

    // 20030216 _M High Level Conversion Optimizers...

    int i_lower;                                                                // Lower case version of byte under test.
    bool i_isDigit;                                                             // true if i is a digit.
    bool i_isSpace;                                                             // true if i is whitespace.
    bool i_isAlpha;                                                             // true if i is alpha.

    // AddEvaluator() is made public because the Evaluator object must have access
    // to it in order to handle the creation of buddies as it evaluates it's rules.
    // Similarly the getTokens is public because evaluators must use this when they
    // initialize. In a later version we will clean this up so that all of this stuff
    // can be handled somewhat more privately.

    Token* getTokens() {                                                        // Deliver the raw token matrix
      return myTokenMatrix->getMatrix();                                        // for use when creating evaluators.
    }

    int getMatrixSize() {                                                       // Deliver the raw matrix size
      return myTokenMatrix->Size();                                             // for use when creating evaluators.
    }

    Evaluator* AddEvaluator(int s, unsigned int m);                             // Adds a new evaluator to the top.

    Evaluator* InsEvaluator(int s, unsigned int m);                             // Inserts a new evaluator after the
                                                                                // current evaluator. (Only called by
                                                                                // an existing evaluator in process...)


    // isNoDuplicate(int p) checks for duplicate evaulators

    bool isNoDuplicate(unsigned int p) {                                        // If there's no list there can be no
      if(EvaluatorList == NULL)                                                 // duplicates so we're true. If there is
        return true;                                                            // a list then we'll let the list answer.
      else
        return EvaluatorList->isNoDuplicate(p);
    }

    // EvaluateThis() Moves each evaluator with the current character and creates a new
    // evaluator for the current spot in the input file to make all rules global.

    int EvaluateThis(unsigned short int i);

    EvaluationMatrix(TokenMatrix* m) { // Constructor w/ pointer to Token Matrix...

      myTokenMatrix = m;                                                        // Grab my TokenMatrix.

      EvaluatorList = NULL;                                                     // Start off with no evaluators.
      EvaluatorCache = NULL;                                                    // Start off with no evaluator cache.

      CurrentEvaluator = NULL;                                                  // NULL means starting at the top.
      PreviousEvaluator = NULL;                                                 // NULL means previous is the top.

      ResultList = NULL;                                                        // Start off with no results in our list.
      LastResultInList = NULL;

      CountOfCharacters = 0;                                                    // The count of characters will be zero and
      MaximumCountOfEvaluators = 0;                                             // the maximum Evaluator count will be zero
      CountOfEvaluators = 0;                                                    // and the current count will also be zero.

      PassResult = 0;                                                           // Initialize expecting no matches.

    }

    ~EvaluationMatrix(){ // Destructor to clean up memory allocations.

      myTokenMatrix = NULL;                                                     // Stop pointing at the TokenMatrix

      // Both of these lists konw how to delete themselves.
      // 20060531_M Fixed possible crash by checking for NULL before
      // deleting these lists. Also added cleanup for the EvaluatorCache.

      if(NULL!=EvaluatorCache) {
        delete EvaluatorCache;                                                  // Delete the evaluator cache.
        EvaluatorCache = NULL;                                                  // Then clear it's pointer.
      }

      if(NULL!=EvaluatorList) {
        delete EvaluatorList;                                                   // Delete the evaluator list.
        EvaluatorList = NULL;                                                   // Then clear it's pointer.
      }

      if(NULL!=ResultList) {
        delete ResultList;                                                      // Delete the result list.
        ResultList = NULL;                                                      // Then clear it's pointer.
      }
    }

};

// 20060531_M Implementation of the evaluator cache is all inline.
// In place of new Evaluator() we now can use SourceEvaluator()
// In place of delete Evaluator() we now can use CacheEvaluator()
// The effect is to store previously allocaed evaluators in the EvaluatorCache
// list so that they can be reused. This avoids the frequen use of
// new and delete and allows us to skip a few extra cycles for initialization
// because much of the constructor work for a new evaluator is already done
// in any cached evaluator.
//
// In practice, at least one evaluator is likely to be created and destroyed
// for each byte that is scanned. This new mechanism significantly reduces the
// number of cycles that would normally be associated with those operations by
// eliminating them most of the time. Instead of returning used memory to the
// heap during delete, the evaulator is simply added to the cache list. Instead
// of allocating new space from the heap and initializing the object, a chached
// evaluator is simply moved from the cache into production. Moving into and
// out of the cache is roughly as simple as changing a couple of pointers.

// In place of new Evaluator, we do this...

inline Evaluator* EvaluationMatrix::SourceEvaluator(int s, EvaluationMatrix* m) {   // Get a cached or new evaluator.
    if(NULL==EvaluatorCache) return new Evaluator(s,m);                             // If we have no cache, use new!
    Evaluator* reuse = EvaluatorCache;                                              // Otherwise grab a reusable one.
    EvaluatorCache = reuse->NextEvaluator;                                          // Collaps the cache by one.
    reuse->NextEvaluator = NULL;                                                    // Clean it up a bit.
    reuse->StreamStartPosition = s;                                                 // Record our starting point.
    reuse->CurrentPosition = 0;                                                     // Reset the Current Position.
    reuse->WildRunLength = 0;                                                       // Reset the run length.
    reuse->Condition = Evaluator::DOING_OK;                                         // Reset the condition.
    return reuse;                                                                   // Return the reusable unit.
}

// In place of delete Evaluator, we do this...

inline void EvaluationMatrix::CacheEvaluator(Evaluator* e) {                        // Cache a used evaluator.
    e->NextEvaluator = EvaluatorCache;                                              // Link the used evaluator
    EvaluatorCache = e;                                                             // into the cache;
}

// In the above, the first evaluator added will get NULL as it's NextEvaluator.
// When that first evaulator is used, the NULL pointer will return to the root
// of the EvaluatorCache list. In this regard the cache acts like a stack.

#endif