// snf_engine.hpp // // (C) 1985-2004 MicroNeil Research Corporation // (C) 2005-2009 ARM Research Labs, LLC. // // Derived from original work on cellular automation for complex pattern // reflex engine 1985 Pete McNeil (Madscientist) // // Derived from rapid scripting engine (token matrix) implementation 1987 // // This is the header file for the sniffer pattern matching engine. // 20080305 _M - Added FlipEndian() function to convert rulebases from their // native little-endian format to big-endian format for CPUs that need it. See // additional work in SNFMulti to call the FlipEndian() function AFTER the // rulebase has been authenticated but before it is put into use. // 20070606 _M - Refactored exceptions to use base std::exception and improved // the evaluator code to reduce the strength of safety testing from 3 compares // per byte to 1. // 20060531 _M - Added evaluator caching to save a few cycles by not allocating // new memory and performing a complete initialization of an evaluator if there // is already one handy from a previous use. // 20021030 _M - Created. #ifndef _MN_SNF_ENGINE #define _MN_SNF_ENGINE #include #include #include #include #include #include #include #include #include #include #include #include "../CodeDweller/mangler.hpp" //#include "../nvwa-0.6/nvwa/debug_new.h" using namespace std; // 20030929 _M SYMBOL_RANGE moved to snf_engine.hpp as part of augmenting the // capability of a match record. Match records now can decode themselves. const int SYMBOL_RANGE = 256; // Symbol result coding modulator. // Let's create our utility classes and structures. // The Token class. // This class represents the structure of a token. The rule file is, in fact, // a token matrix. Tokens within the matrix allow the sniffer to navigate through // a state change matrix attempting to locate special positions that indicate the // termination of a path, or more specifically, the recognition of a string that // has been evaluated along that path. // // IT IS IMPORTANT TO NOTE THAT AS THESE PROGRAMS ARE WRITTEN IT ASSUMES WE ARE IN // A 32 BIT INTEL ENVIRONMENT SO THAT THE TOKEN MATRIX CAN BE LOADED IN A SINGLE PASS // USING A BINARY INPUT STREAM. //////////////////////////////////////////////////////////////////////////////////////// // Token Declaration /////////////////////////////////////////////////////////////////// class Token { // Token class for defining and interpreting nodes within the matrix. public: // Beginning of Public stuff. int Check; // The first int is a check character. int Vector; // The second int is a vector. // isUnused() Returns true if the token is in an unused state. int isUnused() { return (Check==-1 && Vector==0) ? true : false; } // isTermination() Returns true if the token is in a termination state. int isTermination() { if(Check==0 && Vector > 0) return true; else return false; } // Symbol() Returns the symbol value for the token. int Symbol() { return Vector; } // Character() Returns the check character for this token. int Character() { return Check; } // End of Public stuff. // Note that no constructor is needed because the default constructor will do nicely. }; //////////////////////////////////////////////////////////////////////////////////////// // Token Matrix Declaration //////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////// // // The Token Matrix loads, verifies, and maintains an array of tokens for the evaluators // to live in. This class provides safe access to the token matrix. // //////////////////////////////////////////////////////////////////////////////////////// class TokenMatrix { private: Token* Matrix; // Where we hold the token matrix. int MatrixSize; // What size is the matrix. public: // Exceptions... class BadAllocation : public runtime_error { // Exception for a bad memory allocation. public: BadAllocation(const string& w):runtime_error(w) {} }; class BadMatrix : public runtime_error { // Exception for invalid matrix loads. public: BadMatrix(const string& w):runtime_error(w) {} }; class BadFile : public runtime_error { // Exception for missing rulebase files. public: BadFile(const string& w):runtime_error(w) {} }; class OutOfRange : public runtime_error { // Exception for indexes out of range. public: OutOfRange(const string& w):runtime_error(w) {} }; // Standards... static const int SecuritySegmentSize = 1024; // File Authentication Segment static const int SecurityKeyBufferSize = 32; // Security Key Pad Block Size static const int RulebaseDigestSize = 64; // Number of bytes in digest. static const int MinimumValidMatrix = // Establish the smallest valid SecuritySegmentSize * 2 / SecurityKeyBufferSize; // matrix size // The first interface component checks the range and gives up the token. Token at(int x) { // Get the token at x if(x<0 || x>=MatrixSize) // Check to see if we're in bounds. throw OutOfRange("(x<0 || x>=MatrixSize)"); // If we're not then throw an exception. return Matrix[x]; // If we are then give it to them. } // The second interface component delivers the Matrix if it's valid so that other // code can manipulate it more efficiently (without constantly checking bounds. Token* getMatrix() { // Return the matrix. if(MatrixSize==0 || Matrix==NULL) // If the matrix isn't ready then throw BadMatrix("(MatrixSize==0 || Matrix==NULL)"); // throw an exception. If it is return Matrix; // ready then send it out. } // For simplicity we simply extend the underlying Token functions by taking a // position reference, checking it's range, and returning the result. int isUnused(int x) { // Extend Token.isUnused() return at(x).isUnused(); } int isTermination(int x) { // Extend Token.isTermination() return at(x).isTermination(); } int Symbol(int x) { // Exetend Token.Symbol() return at(x).Symbol(); } int Character(int x) { // Extend Token.Character() return at(x).Character(); } // Utility functions... int Size() { return MatrixSize; } // Returns the size of the matrix. void Load(const char* FileName); // Loads the matrix from a file name. void Load(string& FileName); // Loads the matrix from a file name string. void Load(ifstream& F); // Loads the token matrix from the file. void Validate(string& SecurityKey); // Validates the matrix with a key string. void Verify(string& SecurityKey); // Verifies the matrix digest. void FlipEndian(); // Converts big/little endian tokens. // Constructors... TokenMatrix() : Matrix(NULL), MatrixSize(0) { } TokenMatrix(ifstream& F) : Matrix(NULL), MatrixSize(0) { Load(F); } ~TokenMatrix() { // The Distructor... MatrixSize = 0; // Set the size to zero. if(Matrix) { delete [] Matrix; Matrix = NULL; } // If we have a matrix, remove it. } }; ///////////////////////////////////////////////////////////////////////////////////////// // End Token Work /////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// // Having defined the token matrix, I now define the Evaluator class which // be used to follow any matching rule threads as the program scans a a file. // A new evaluator is started at each position in the input stream making all // of the rules in the token matrix global. // The following two values are returned by the Evaluator at every step. const int WILD_WHITESPACE = 1; // Token code for whitespace wildcards. const int WILD_DIGIT = 2; // Token code for digit wildcards. const int WILD_LETTER = 3; // Token code for letter wildcards. const int WILD_NONWHITE = 4; // Token code for non-whitespace wildcards. const int WILD_ANYTHING = 5; // Token code for any character. const int WILD_INLINE = 6; // Token code for any character except new line. const int RUN_GATEWAY = 8; // Token code for run-loop gateways. // Here are some tuning parameters const int MaxWildRunLength = 4096; // Maximum span of "any number" wildcards. const int MAX_EVALS = 2048; // Maximum number of evaluators. ////////////////////////////////////////////////////////////////////////////////////////// // Evaluators and the Evaluation Matrix ////////////////////////////////////////////////////////////////////////////////////////// class EvaluationMatrix; // We've got to pre-declare this for some compilers. class Evaluator { // Evaluator class for following threads through the matrix. private: EvaluationMatrix* myEvaluationMatrix; // The evaluation matrix I live in. Token* Matrix; // The raw token matrix I walk in. int MatrixSize; // Size of raw token matrix. // 20070606 _M Optimized Evaluator code by reducing the strength of the // safety check from 3 comparisons to 1. unsigned int PositionLimit; // Largest CurrentPosition. // 20030216 _M Optimization conversions inline int i_lower(); // { return myEvaluationMatrix->i_lower; } inline bool i_isDigit(); // { return myEvaluationMatrix->i_isDigit; } inline bool i_isSpace(); // { return myEvaluationMatrix->i_isSpace; } inline bool i_isAlpha(); // { return myEvaluationMatrix->i_isAphpa; } public: // Standard Values... enum States { // These are the posible coditions. OUT_OF_RANGE, // We're outside the matrix - very bad. FALLEN_OFF, // We've fallen off the path and are lost. DOING_OK, // We're doing ok and following along. TERMINATED // We've reached the end of our path. }; // Attributes... States Condition; // What state am I in? How's my health? Evaluator* NextEvaluator; // Linked List Pointer. unsigned int StreamStartPosition; // Indexes the position where we started. unsigned int CurrentPosition; // Indexes the node we are surfing. int WildRunLength; // Wildcard run length so far. // EvaluateThis() assumes it is being given the next character along the // path of a thread in the token matrix. It follows that thread and evaluates // it's condition. States EvaluateThis(unsigned short int i); // Follow the next byte. // isNoDuplicate() is used to keep us from allocating identical evaluators. This is // key to creating buddies when working with wildcards. It prevents us from recursively // proliferating evaluators at each new character when running in a wildcard loop. bool isNoDuplicate(unsigned int Position) { // Returns false if there is a duplicate. if(CurrentPosition == Position) // Obviously, if I match, then there's a dup. return false; // If I don't match and I'm the last one then if(NextEvaluator==NULL) // it must be true there are no dups. If there return true; // are more to ask then I'll let them answer. else return NextEvaluator->isNoDuplicate(Position); } Evaluator(unsigned int s, EvaluationMatrix* m); // Constructor... ~Evaluator(){ if(NextEvaluator!=NULL){ // If there's more to this list then delete NextEvaluator; // delete it. } NextEvaluator = NULL; // Always null on exit. } }; // A MatchRecord is created each time a new rule match occurrs. These records form a // linked list within the Evaluation Matrix that can be spit out after the process is // over for reporting purposes. class MatchRecord { public: int MatchStartPosition; // Where in the data stream did the match start? int MatchEndPosition; // Where in the data stream did the match end? int MatchSymbol; // What symbol was attached to the match rule? inline int RuleId(){return (MatchSymbol/SYMBOL_RANGE);} // Decode RuleID inline int RuleGroup(){return (MatchSymbol%SYMBOL_RANGE);} // Decode GroupID MatchRecord* NextMatchRecord; MatchRecord(int sp, int ep, int sym) { // When constructing a MatchRecord, MatchStartPosition = sp; // you must provide all of it's data. MatchEndPosition = ep; MatchSymbol = sym; // Since match records are always added to NextMatchRecord = NULL; // the end our next pointer is always NULL. } ~MatchRecord(){ if(NextMatchRecord != NULL) // If there's more list, then delete it. delete NextMatchRecord; NextMatchRecord = NULL; // Clean up our pointer before leaving. } }; // Now that we've created our utility classes, we'll create another class (with an instance) // that builds a matrix to evaluate all incoming characters, manage the list, and keeps // statistics and results from the execution process. class EvaluationMatrix { private: TokenMatrix* myTokenMatrix; // Token Matrix that I evaluate with. Evaluator* EvaluatorList; // Linked list of Evaluators. Evaluator* CurrentEvaluator; // Current Evaluator (when checking) Evaluator* PreviousEvaluator; // Previous Evaluator (when checking) // Evaluator Caching Mechanism. Evaluator* EvaluatorCache; // List of cached, ready evaluators. Evaluator* SourceEvaluator(int s, EvaluationMatrix* m); // Get a cached or new evaluator. void CacheEvaluator(Evaluator* e); // Cache a used evaluator. int CountOfEvaluators; // Current count of evaluators. int PassResult; // Result of the latest evaluation pass. MatchRecord* LastResultInList; // Keeps track of the end of the result list. MatchRecord* AddMatchRecord(int sp, int ep, int sym); // Add a match result. // DropEvaluator() is called by the EvaluateThis() method whenever an evaluator // reports the FALLEN_OFF result. The EvaluateThis() method keeps two values up // to date - one is the current evaluator (which will be dropped) and the other is // the previous evaluator (which will be updated to heal the list). // When we've finished this function, the CurrentEvaluator will be on the next // evaluator node if it exists. Therefore, the caller should skip it's normal // list itteration code when this function has been called. void DropEvaluator(); public: // Exception classes... class BadAllocation : public runtime_error { // Allocation failed exception. public: BadAllocation(const string& w):runtime_error(w) {} }; class MaxEvalsExceeded : public runtime_error { // Too many evaluators exception. public: MaxEvalsExceeded(const string& w):runtime_error(w) {} }; class OutOfRange : public runtime_error { // Out of range exception. public: OutOfRange(const string& w):runtime_error(w) {} }; // Attributes... int CountOfCharacters; // How many characters have been evaluated. int MaximumCountOfEvaluators; // Largest matrix size reached. MatchRecord* ResultList; // List of match results. int DeepSwitch; // true if we're doing a deep scans. // 20030216 _M High Level Conversion Optimizers... int i_lower; // Lower case version of byte under test. bool i_isDigit; // true if i is a digit. bool i_isSpace; // true if i is whitespace. bool i_isAlpha; // true if i is alpha. // AddEvaluator() is made public because the Evaluator object must have access // to it in order to handle the creation of buddies as it evaluates it's rules. // Similarly the getTokens is public because evaluators must use this when they // initialize. In a later version we will clean this up so that all of this stuff // can be handled somewhat more privately. Token* getTokens() { // Deliver the raw token matrix return myTokenMatrix->getMatrix(); // for use when creating evaluators. } int getMatrixSize() { // Deliver the raw matrix size return myTokenMatrix->Size(); // for use when creating evaluators. } Evaluator* AddEvaluator(int s, unsigned int m); // Adds a new evaluator to the top. Evaluator* InsEvaluator(int s, unsigned int m); // Inserts a new evaluator after the // current evaluator. (Only called by // an existing evaluator in process...) // isNoDuplicate(int p) checks for duplicate evaulators bool isNoDuplicate(unsigned int p) { // If there's no list there can be no if(EvaluatorList == NULL) // duplicates so we're true. If there is return true; // a list then we'll let the list answer. else return EvaluatorList->isNoDuplicate(p); } // EvaluateThis() Moves each evaluator with the current character and creates a new // evaluator for the current spot in the input file to make all rules global. int EvaluateThis(unsigned short int i); EvaluationMatrix(TokenMatrix* m) { // Constructor w/ pointer to Token Matrix... myTokenMatrix = m; // Grab my TokenMatrix. EvaluatorList = NULL; // Start off with no evaluators. EvaluatorCache = NULL; // Start off with no evaluator cache. CurrentEvaluator = NULL; // NULL means starting at the top. PreviousEvaluator = NULL; // NULL means previous is the top. ResultList = NULL; // Start off with no results in our list. LastResultInList = NULL; CountOfCharacters = 0; // The count of characters will be zero and MaximumCountOfEvaluators = 0; // the maximum Evaluator count will be zero CountOfEvaluators = 0; // and the current count will also be zero. PassResult = 0; // Initialize expecting no matches. } ~EvaluationMatrix(){ // Destructor to clean up memory allocations. myTokenMatrix = NULL; // Stop pointing at the TokenMatrix // Both of these lists konw how to delete themselves. // 20060531_M Fixed possible crash by checking for NULL before // deleting these lists. Also added cleanup for the EvaluatorCache. if(NULL!=EvaluatorCache) { delete EvaluatorCache; // Delete the evaluator cache. EvaluatorCache = NULL; // Then clear it's pointer. } if(NULL!=EvaluatorList) { delete EvaluatorList; // Delete the evaluator list. EvaluatorList = NULL; // Then clear it's pointer. } if(NULL!=ResultList) { delete ResultList; // Delete the result list. ResultList = NULL; // Then clear it's pointer. } } }; // 20060531_M Implementation of the evaluator cache is all inline. // In place of new Evaluator() we now can use SourceEvaluator() // In place of delete Evaluator() we now can use CacheEvaluator() // The effect is to store previously allocaed evaluators in the EvaluatorCache // list so that they can be reused. This avoids the frequen use of // new and delete and allows us to skip a few extra cycles for initialization // because much of the constructor work for a new evaluator is already done // in any cached evaluator. // // In practice, at least one evaluator is likely to be created and destroyed // for each byte that is scanned. This new mechanism significantly reduces the // number of cycles that would normally be associated with those operations by // eliminating them most of the time. Instead of returning used memory to the // heap during delete, the evaulator is simply added to the cache list. Instead // of allocating new space from the heap and initializing the object, a chached // evaluator is simply moved from the cache into production. Moving into and // out of the cache is roughly as simple as changing a couple of pointers. // In place of new Evaluator, we do this... inline Evaluator* EvaluationMatrix::SourceEvaluator(int s, EvaluationMatrix* m) { // Get a cached or new evaluator. if(NULL==EvaluatorCache) return new Evaluator(s,m); // If we have no cache, use new! Evaluator* reuse = EvaluatorCache; // Otherwise grab a reusable one. EvaluatorCache = reuse->NextEvaluator; // Collaps the cache by one. reuse->NextEvaluator = NULL; // Clean it up a bit. reuse->StreamStartPosition = s; // Record our starting point. reuse->CurrentPosition = 0; // Reset the Current Position. reuse->WildRunLength = 0; // Reset the run length. reuse->Condition = Evaluator::DOING_OK; // Reset the condition. return reuse; // Return the reusable unit. } // In place of delete Evaluator, we do this... inline void EvaluationMatrix::CacheEvaluator(Evaluator* e) { // Cache a used evaluator. e->NextEvaluator = EvaluatorCache; // Link the used evaluator EvaluatorCache = e; // into the cache; } // In the above, the first evaluator added will get NULL as it's NextEvaluator. // When that first evaulator is used, the NULL pointer will return to the root // of the EvaluatorCache list. In this regard the cache acts like a stack. #endif