madscientist
/
SNFMulti

// snf_engine.cpp
//
// (C) 1985-2004 MicroNeil Research Corporation
// (C) 2005-2009 ARM Research Labs, LLC
// See www.armresearch.com for the copyright terms.
//
// Derived from original work on cellular automation for complex pattern
// reflex engine 1985 Pete McNeil (Madscientist)
//
// Derived from rapid scripting engine (token matrix) implementation 1987
//

// 20040419 _M Adding Verify() method. Beginning with version 2-3 of Message Sniffer
// we are embedding a Mangler digest of the rulebase file. The Verify() method reconstructs
// the digest and compares it. This ensures that no part of the rulebase file can be
// corrupted without the snf2check utility detecting the problem. Prior to this version
// it was possible to have undetected corruption in the middle of the rulebase file. The
// Mangler digest will prevent that.

// 20030130 _M Added testing section in TokenMatrix to throw an exeption if the file
// is too small to be a valid matrix. The value is calculated based on the idea that a
// valid matrix will have been encrypted in two segments so the file must be at least
// as large as these two segments. This is intended to solve the zero-length-rulebase
// bug where an access violation would occur if the file was of zero length.

// 20021030 _M Creation of snf_engine module by dragging the sniffer pattern matching engine out
// of the sniffer.cpp file.

#include <unistd.h>
#include <cstdio>
#include <cctype>
#include <ctime>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
#include "mangler.hpp"
#include "snf_engine.hpp"

using namespace std;

///////////////////////////////////////////////////////////////////////////////////////////
// BEGIN IMPLEMENTATIONS //////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////

///////////////////////////////////////////////////////////////////////////////////////////

// Token Matrix Implementations ///////////////////////////////////////////////////////////

// TokenMatrix::Load(filename)

void TokenMatrix::Load(string& FileName) {                                      // Initialize using a string for file name.
  Load(FileName.c_str());                                                       // Convert the string to a null terminated
}                                                                               // char* and call the function below.

void TokenMatrix::Load(const char* FileName) {                                  // Initializes the token matrix by file name.

  ifstream MatrixFile(FileName,ios::binary);                                    // Open the file.
  if(MatrixFile == NULL || MatrixFile.bad())                                    // If anything is wrong with the file
    throw BadFile("TokenMatrix::Load()(MatrixFile==NULL || MatrixFile.bad())"); // then throw a bad file exception.

  Load(MatrixFile);                                                             // Load the matrix from the file.
  MatrixFile.close();                                                           // Be nice and clean up our file.
}

// TokenMatrix::Load(stream)

void TokenMatrix::Load(ifstream& F) {                                           // Initializes the token matrix from a file.

  MatrixSize = 0;                                                               // Clear out the old Matrix Size and array.
  if(Matrix) delete Matrix;                                                     // that is, if there is an array.

  F.seekg(0,ios::end);                                                          // Find the end of the file.
  MatrixSize = F.tellg() / sizeof(Token);                                       // Calculate how many tokens.
  F.seekg(0);                                                                   // Go back to the beginning.

  if(MatrixSize < MinimumValidMatrix)                                           // If the matrix file is too small then
    throw BadMatrix("TokenMatrix::Load() (MatrixSize < MinimumValidMatrix)");   // we must reject it.

  Matrix = new Token[MatrixSize];                                               // Allocate an array of tokens.

  if(Matrix == NULL)                                                            // Check for an allocation error.
    throw BadAllocation("TokenMatrix::Load() Matrix == NULL)");                 // and throw an exception if it happens.

  F.read(                                                                       // Now read the file into the allocated
    reinterpret_cast<char*>(Matrix),                                            // matrix by recasting it as a character
    (MatrixSize * sizeof(Token)));                                              // buffer of the correct size.

  if(F.bad())                                                                   // If there were any problems reading the
    throw BadMatrix("TokenMatrix::Load() (F.bad())");                           // matrix then report the bad matrix.
}

// TokenMatrix::Validate(key)

void TokenMatrix::Validate(string& SecurityKey) {                               // Decrypts and validates the matrix.

  MANGLER ValidationChecker;                                                    // Create a mangler engine for validation.

  // In order to do the validation we must look at the token matrix as a sequence of bytes.
  // We will be decrypting the first and last SecurtySegmentSize of this sequence and then
  // detecting wether the appropriate security key has been properly encrypted in the end.
  // If we find everything as it should be then we can be sure that the two segments have
  // not been tampered with and that we have the correct security key.

  unsigned char* TokensAsBytes = reinterpret_cast<unsigned char*>(Matrix);
  int BytesInTokenMatrix = (MatrixSize * sizeof(Token));

  // Now that we have all of that stuff let's initialize our ValidationChecker.

  // Note that the length of our security key is always 24 bytes. The license
  // id is 8 bytes, the authentication code is 16 bytes. We don't bother to check
  // here because if it's wrong then nothing will decrypt and we'll have essentially
  // the same result. Note also that on the end of the rule file we pad this
  // encrypted security id with nulls so that we can create a string from it easily
  // and so that we have precisely 32 bytes which is the same size as 4 tokens.
  //
  // Note: The 32 byte value is in SecurityKeyBufferSize. This means that we can
  // accept security keys up to 31 bytes in length. We need the ending null to
  // assure our null terminated string is as expected. The security key block must
  // match up with the edges of tokens in the matrix so we pad the end with nulls
  // when encoding the security key in the encoded file.

  int SecurityKeyLength = SecurityKey.length();                                 // For the length of our key
  for(int a=0;a<SecurityKeyLength;a++)                                          // feed each byte through the
    ValidationChecker.Encrypt(SecurityKey.at(a));                               // mangler to evolve the key
                                                                                // state.

  // Now we're ready to decrypt the matrix... We start with the first segment.

  for(int a=0;a<SecuritySegmentSize;a++)                                        // For the length of the segment
    TokensAsBytes[a] =                                                          // replace each byte with the
    ValidationChecker.Decrypt(TokensAsBytes[a]);                                // decrypted byte.

  // Next we decrypt the last security segment...

  for(int a= BytesInTokenMatrix - SecuritySegmentSize; a<BytesInTokenMatrix; a++)
    TokensAsBytes[a] =
    ValidationChecker.Decrypt(TokensAsBytes[a]);

  // Now that we've done this we should find that our SecurityKey is at the end
  // of the loaded token matrix... Let's look and find out shall we?!!!

  unsigned char* SecurityCheckKey =                                             // Reference the check
    & TokensAsBytes[BytesInTokenMatrix-SecurityKeyBufferSize];                  // space in the matrix.

  SecurityCheckKey[SecurityKeyBufferSize-1] = 0;                                // Add a safety null just in case.

  string SecurityCheck((char*)SecurityCheckKey);                                // Make a string.

  // By now we should have a SecurityCheck string to compare to our SecurityKey.
  // If they match then we know everything worked out and that our token matrix has
  // been decrypted properly. This is also a good indication that our token matrix
  // is not incomplete since if it were the decryption wouldn't work. Saddly, we
  // don't have the computing cycles to decrypt the entire file - so we won't be
  // doing that until we can load it in a server/daemon and then reuse it over and
  // over... Once that happens we will be able to detect tampering also.

  if(SecurityKey != SecurityCheck)                                              // If the security keys don't match
    throw BadMatrix("TokenMatrix::Validate() (SecurityKey != SecurityCheck)");  // then we have an invalid matrix.
}

// TokenMatrix::Verify(key)

void TokenMatrix::Verify(string& SecurityKey) {                                 // Builds and verifies a file digest.

  MANGLER DigestChecker;                                                        // Create a mangler for the digest.

  // Gain access to our token matrix as bytes.

  unsigned char* TokensAsBytes = reinterpret_cast<unsigned char*>(Matrix);
  int BytesInTokenMatrix = (MatrixSize * sizeof(Token));

  // Initialize our digest engine with the security key.

  int SecurityKeyLength = SecurityKey.length();                                 // For the length of our key
  for(int a=0;a<SecurityKeyLength;a++)                                          // feed each byte through the
    DigestChecker.Encrypt(SecurityKey.at(a));                                   // mangler to evolve the key
                                                                                // state.
  // Build the digest.

  int IndexOfDigest =                                                           // Find the index of the digest by
    BytesInTokenMatrix -                                                        // starting at the end of the matrix,
    SecurityKeyBufferSize -                                                     // backing up past the security key,
    RulebaseDigestSize;                                                         // then past the digest.

  int a=0;                                                                      // Keep track of where we are.
  for(;a<IndexOfDigest;a++)                                                     // Loop through up to the digest and
    DigestChecker.Encrypt(TokensAsBytes[a]);                                    // pump the file through the mangler.

  // Now that the digest is built we must test it.
  // The original was emitted by encrypting 0s so if we do the same thing we will match.

  for(int b=0;b<RulebaseDigestSize;b++)                                         // Loop through the digest and compare
    if(DigestChecker.Encrypt(0)!=TokensAsBytes[a+b])                            // our digest to the stored digest. If
      throw BadMatrix("TokenMatrix::Verify() Bad Digest");                      // any byte doesn't match it's bad!

  // If we made it through all of that then we're valid :-)

}

void TokenMatrix::FlipEndian() {                                                // Converts big/little endian tokens.
    assert(sizeof(unsigned int)==4);                                            // Check our assumptions.
    unsigned int* UInts = reinterpret_cast<unsigned int*>(Matrix);              // Grab the matrix as uints.
    int Length = ((MatrixSize * sizeof(Token)) / sizeof(unsigned int));         // Calculate it's size.
    for(int i = 0; i < Length; i++) {                                           // Loop through the array of u ints
        unsigned int x = UInts[i];                                              // and re-order the bytes in each
        x = ((x & 0xff000000) >> 24) |                                          // one to swap from big/little endian
            ((x & 0x00ff0000) >> 8) |                                           // to little/big endian.
            ((x & 0x0000ff00) << 8) |
            ((x & 0x000000ff) << 24);
        UInts[i] = x;                                                           // Put the flipped int back.
    }
}

// Evaluator Implementations //////////////////////////////////////////////////////////////

// 20030216 _M Optimization conversions

inline int Evaluator::i_lower()        { return myEvaluationMatrix->i_lower; }
inline bool Evaluator::i_isDigit()     { return myEvaluationMatrix->i_isDigit; }
inline bool Evaluator::i_isSpace()     { return myEvaluationMatrix->i_isSpace; }
inline bool Evaluator::i_isAlpha()     { return myEvaluationMatrix->i_isAlpha; }


// Evaluator::Evaluator(position,evalmatrix) Constructor

Evaluator::Evaluator(int s, EvaluationMatrix* m) {                              // Constructor...

    myEvaluationMatrix = m;                                                     // Capture the matrix I live in.
    Matrix = myEvaluationMatrix->getTokens();                                   // Capture the token matrix I walk in.
    MatrixSize = myEvaluationMatrix->getMatrixSize();                           // And get it's size.
    PositionLimit = MatrixSize - 256;                                           // Calculate the safety limit.

    StreamStartPosition = s;                                                    // Always record our starting point.
    NextEvaluator = NULL;                                                       // Allways start off with no extensions.
    CurrentPosition = 0;                                                        // Always start at the root of the matrix;
    WildRunLength = 0;                                                          // No run length when new.

    Condition = DOING_OK;                                                       // Start off being ok.
}

// Evaluator::EvaluateThis()

Evaluator::States Evaluator::EvaluateThis(unsigned short int i) {               // Follow the this byte.

  Condition = FALLEN_OFF;                                                       // Start off guessing we'll fall off.

  // First upgrade will be to DOING_OK, after that we launch buddies.

  // In order to handle wildcard characters, this evaluation function must actually
  // compare the character to a number of possibilities in most-specific to least-
  // specific order to see if any match. In order to support overlapping rule sets,
  // if more than one wildcard matches at this node, an additional evaluator will be
  // placed in line already _AT THIS PATH POINT_ so that both possibilities will be
  // explored. New evaluators are always added at the TOP of the list so we are always
  // guaranteed not to overdrive an evaluator and end up in a recursive race condition.

  // 20030216 _M Optimizations. In order to reduce the number of instructions per byte
  // the parent Evaluation Matrix will now translate the byte i into boolean flags
  // indicating if they are digits, white, letters, etc... and converting to lower
  // case etc... This conversion is then done only once so that thereafter only a simple
  // comparison need be made. This should eliminate many function calls and a collection
  // of numeric comparisons.
  //
  // I am also moving the simple comparisons to the front of each logical section so
  // that failures there can short-circuit subsequent logic to view the state of the
  // matrix regardin that character. The matrix lookup is likely to be more expensive
  // than a single binary comparison.

  // For safety, we check our evaluation position here - If xNoCase is out of range
  // then we will return OUT_OF_RANGE to indicate the problem rather than accessing
  // data beyone our token matrix's limits.

  /*** 20070606 _M Reduced the strength of this check from 3 comparisons to 1.
  **** CurrentPosition is now an unsigned int so it cannot be negative. The limit
  **** is now calculated once in the constructor as PositionLimit.

  if(
    CurrentPosition < 0 ||                              // Position should never be < 0
    xPrecise >= MatrixSize ||                           // nor xPrecise over the top.
    xNoCase >= MatrixSize                               // nor NoCase over the top.
    )                                                   // If either occur we have a
    return Condition = OUT_OF_RANGE;                    // bad matrix.
  ***/

  if(CurrentPosition >= PositionLimit) return Condition = OUT_OF_RANGE;

  // All of the positions calculated below are guaranteed to be within the ranges checked
  // above so we're safe if we get to this point.

  // So, at this point it's safe to check and see if I'm terminated. Note that if I
  // am at a termination point, my path has terminated and I have a symbol so I don't
  // need to resolve any more characters - even the current one.

  if(Matrix[CurrentPosition].isTermination()) return Condition = TERMINATED;

  // NOTE: The above is written for sudden-death termination. Eventually we will want
  // to support deep - filters which will show every rule match and this will need to
  // be rewritten.

  // Evaluation order, most-to-least specific:

  int xPrecise = CurrentPosition + i;                                           // Match Precise Character
  int xNoCase = CurrentPosition + i_lower();                                    // Match Case insensitive

  // Of course I may need to resolve some of the following
  // wildcard characters.

  int xLetter = CurrentPosition + WILD_LETTER;                                  // Match Any letter.
  int xDigit = CurrentPosition + WILD_DIGIT;                                    // Match Any digit.
  int xNonWhite = CurrentPosition + WILD_NONWHITE;                              // Match Any non-whitespace.
  int xWhiteSpace = CurrentPosition + WILD_WHITESPACE;                          // Match Any whitespace.
  int xAnyInline = CurrentPosition + WILD_INLINE;                               // Match Any byte but new line.
  int xAnything = CurrentPosition + WILD_ANYTHING;                              // Match Any character at all.
  int xRunGateway = CurrentPosition + RUN_GATEWAY;                              // Match the run-loop gateway.

  // Try to match the precise character.

  if(Matrix[xPrecise].Character() == i) {                                       // If we've matched our path
    Condition = DOING_OK;                                                       // upgrade to doing ok.
    CurrentPosition = xPrecise +
      Matrix[xPrecise].Vector;                                                  // Move myself along this path.
  }

  // Try to match the case insensitive character.

  if(i_lower()!=i && Matrix[xNoCase].Character()==i_lower()){

                                                                                // If we've matched our path
                                                                                // with a compromized case then
    if(Condition==FALLEN_OFF) {                                                 // check: if no matches yet,
      Condition = DOING_OK;                                                     // upgrade to doing ok.
      CurrentPosition = xNoCase +
        Matrix[xNoCase].Vector;                                                 // Move myself along this path.
    }
                                                                                // If we more than one match then
    else {                                                                      // lets try to make a buddy...

      // If there's no duplicate buddy like this already, then we'll create one.
      // To create a buddy, add an evaluator at the top of the list (behind us) and
      // set it's position as if it had been here all along and had matched the current
      // character. Next time we evaluate it will be just like all the others.

      myEvaluationMatrix->
        AddEvaluator(StreamStartPosition,Matrix[xNoCase].Vector+xNoCase);

    }
  }

  // Start looking at wildcards... Here's where we must limit run length.

  if(Condition == DOING_OK)                                                     // If we matched above we'll
    WildRunLength = 0;                                                          // reset our wild run count.
                                                                                // If not then we need to keep
  else {                                                                        // track of our run length.

     ++WildRunLength;                                                           // Count up the run length.
     if(WildRunLength >= MaxWildRunLength)                                      // If we exceed the max then
       return Condition = FALLEN_OFF;                                           // we've fallen off the path
  }                                                                             // and we do it immediately.

  // WILD_LETTER
  // If that didn't do it for us...
  // Try to match any letter character.

  // The way this next one works (and the rest of the wildcards) is we look into
  // the token matrix to see if the wildcard is part of the current path... If it
  // is then we compare the incoming character to that wildcard evaluation function
  // and if it is true, then we've got a match.

  if(i_isAlpha() && Matrix[xLetter].Character()==WILD_LETTER){

                                                                                // If we've matched our path
                                                                                // with any letter then
    if(Condition==FALLEN_OFF) {                                                 // check: if no matches yet,
      Condition = DOING_OK;                                                     // upgrade to doing ok.
      CurrentPosition = xLetter +
        Matrix[xLetter].Vector;                                                 // Move myself along this path.
    }

    else {                                                                      // Otherwise make a buddy...

      // If there's no duplicate buddy like this already, then we'll create one.
      // To create a buddy, add an evaluator at the top of the list (behind us) and
      // set it's position as if it had been here all along and had matched the current
      // character. Next time we evaluate it will be just like all the others.

      myEvaluationMatrix->
        AddEvaluator(StreamStartPosition,Matrix[xLetter].Vector+xLetter);

    }
  }

  // WILD_DIGIT
  // If that didn't do it for us...
  // Try to match any digit character.

  if(i_isDigit() && Matrix[xDigit].Character()==WILD_DIGIT){

                                                                                // If we've matched our path
                                                                                // with any letter then
    if(Condition==FALLEN_OFF) {                                                 // check: if no matches yet,
      Condition = DOING_OK;                                                     // upgrade to doing ok.
      CurrentPosition = xDigit +
        Matrix[xDigit].Vector;                                                  // Move myself along this path.
    }

    else {                                                                      // Otherwise make a buddy...

      // If there's no duplicate buddy like this already, then we'll create one.
      // To create a buddy, add an evaluator at the top of the list (behind us) and
      // set it's position as if it had been here all along and had matched the current
      // character. Next time we evaluate it will be just like all the others.

      myEvaluationMatrix->
        AddEvaluator(StreamStartPosition,Matrix[xDigit].Vector+xDigit);

    }
  }

  // WILD_NONWHITE
  // If that didn't do it for us...
  // Try to match any non-whitespace character.

  if(!i_isSpace() && Matrix[xNonWhite].Character()==WILD_NONWHITE){

                                                                                // If we've matched our path
                                                                                // with any letter then
    if(Condition==FALLEN_OFF) {                                                 // check: if no matches yet,
      Condition = DOING_OK;                                                     // upgrade to doing ok.
      CurrentPosition = xNonWhite +
        Matrix[xNonWhite].Vector;                                               // Move myself along this path.
    }

    else {                                                                      // Otherwise make a buddy...

      // If there's no duplicate buddy like this already, then we'll create one.
      // To create a buddy, add an evaluator at the top of the list (behind us) and
      // set it's position as if it had been here all along and had matched the current
      // character. Next time we evaluate it will be just like all the others.

      myEvaluationMatrix->
        AddEvaluator(StreamStartPosition,Matrix[xNonWhite].Vector+xNonWhite);

    }
  }

  // WILD_WHITESPACE
  // If that didn't do it for us...
  // Try to match any whitespace character.

  if(i_isSpace() && Matrix[xWhiteSpace].Character()==WILD_WHITESPACE){

                                                                                // If we've matched our path
                                                                                // with any whitespace then
    if(Condition==FALLEN_OFF) {                                                 // check: if no matches yet,
      Condition = DOING_OK;                                                     // upgrade to doing ok.
      CurrentPosition = xWhiteSpace +
        Matrix[xWhiteSpace].Vector;                                             // Move myself along this path.
    }

    else {                                                                      // Otherwise make a buddy...

      // If there's no duplicate buddy like this already, then we'll create one.
      // To create a buddy, add an evaluator at the top of the list (behind us) and
      // set it's position as if it had been here all along and had matched the current
      // character. Next time we evaluate it will be just like all the others.

      myEvaluationMatrix->
        AddEvaluator(StreamStartPosition,Matrix[xWhiteSpace].Vector+xWhiteSpace);

    }
  }

  // WILD_INLINE
  // If that didn't do it for us...
  // Try to match any character EXCEPT a new line.

  if(i != '\n' && Matrix[xAnyInline].Character()==WILD_INLINE){

                                                                                // If we've matched our path
                                                                                // with any byte but \n then
    if(Condition==FALLEN_OFF) {                                                 // check: if no matches yet,
      Condition = DOING_OK;                                                     // upgrade to doing ok.
      CurrentPosition = xAnyInline +
        Matrix[xAnyInline].Vector;                                              // Move myself along this path.
    }

    else {                                                                      // Otherwise make a buddy...

      // If there's no duplicate buddy like this already, then we'll create one.
      // To create a buddy, add an evaluator at the top of the list (behind us) and
      // set it's position as if it had been here all along and had matched the current
      // character. Next time we evaluate it will be just like all the others.

      myEvaluationMatrix->
        AddEvaluator(StreamStartPosition,Matrix[xAnyInline].Vector+xAnyInline);

    }
  }

  // WILD_ANYTHING
  // If that didn't do it for us...
  // Try to match any character.

  if(Matrix[xAnything].Character()==WILD_ANYTHING){

                                                                                // If we've matched our path
                                                                                // with any letter then
    if(Condition==FALLEN_OFF) {                                                 // check: if no matches yet,
      Condition = DOING_OK;                                                     // upgrade to doing ok.
      CurrentPosition = xAnything +
        Matrix[xAnything].Vector;                                               // Move myself along this path.
    }

    else {                                                                      // Otherwise make a buddy...

      // If there's no duplicate buddy like this already, then we'll create one.
      // To create a buddy, add an evaluator at the top of the list (behind us) and
      // set it's position as if it had been here all along and had matched the current
      // character. Next time we evaluate it will be just like all the others.

      myEvaluationMatrix->
        AddEvaluator(StreamStartPosition,Matrix[xAnything].Vector+xAnything);

    }
  }

  // 20021112 _M
  // Beginning with version 2 of Message Sniffer we've implemented a new construct
  // for run-loops that prevents any interference between rules where run-loops might
  // appear in locations coinciding with standard match bytes. The new methodology
  // uses a special run-loop-gateway character to isolate any run loops from standard
  // nodes in the matrix. Whenever a run-loop gateway is present at a node a buddy is
  // inserted AFTER the current evaluator so that it will evaluate the current character
  // from the position of the run-loop gateway. This allows run loops to occupy the same
  // positional space as standard matches while maintaining isolation between their paths
  // in the matrix.

  // We don't want to launch any run loop buddies unless we matched this far. If we did
  // match up to this point and the next character in a pattern includes a run loop then
  // we will find a gateway byte at this point representing the path to any run loops.

  // If we made it this far launch a buddy for any run-loop gateway that's present.
  // Of course, the buddy must be evaluated after this evaluator during this pass because
  // he will have shown up late... That is, we don't detect a run gateway until we're
  // sitting on a new node looking for a result... The very result we may be looking for
  // could be behind the gateway - so we launch the buddy behind us and he will be able
  // to match anything in this pass that we missed when looking for a non-run match.

  if(Matrix[xRunGateway].Character() == RUN_GATEWAY)
    myEvaluationMatrix->
      InsEvaluator(StreamStartPosition,Matrix[xRunGateway].Vector+xRunGateway);

  // At this point, we've tried all of our rules, and created any buddies we needed.
  // If we got a match, we terminated long ago. If we didn't, then we either stayed
  // on the path or we fell off. Either way, the flag is in Condition so we can send
  // it on.

  return Condition;

}

///////////////////////////////////////////////////////////////////////////////////////////
// EvaluationMatrix Implementations ///////////////////////////////////////////////////////

// EvaluationMatrix::AddMatchRecord(int sp, int ep, int sym)

// Most of this functionality is about deep scans - which have been put on hold for now
// due to the complexity and the scope of the current application. For now, although
// we will use this reporting mechanism, it will generally record only one event.

MatchRecord* EvaluationMatrix::AddMatchRecord(int sp, int ep, int sym) {

  // 20030216 _M Added range check code to watch for corruption. Some systems have
  // reported matches with zero length indicating an undetected corruption. This
  // range check will detect and report it.

  if(sp==ep)                                                                    // Check that we're in range - no zero
    throw OutOfRange("sp==ep");                                                 // length pattern matches allowed!

  MatchRecord* NewMatchRecord =                                                 // Then, create the new result object
    new MatchRecord(sp,ep,sym);                                                 // by passing it the important parts.

  if(NewMatchRecord==NULL)                                                      // Check for a bad allocation and throw
    throw BadAllocation("NewMatchRecord==NULL");                                // an exception if that happens.

  if(ResultList == NULL) {                                                      // If this is our first result we simply
    ResultList = NewMatchRecord;                                                // add the result to our list, and of course
    LastResultInList = NewMatchRecord;                                          // it is the end of the list as well.
  } else {                                                                      // If we already have some results, then
    LastResultInList->NextMatchRecord =                                         // we add the new record to the result list
      NewMatchRecord;                                                           // and record that the new record is now the
    LastResultInList = NewMatchRecord;                                          // last result in the list.
  }

  return NewMatchRecord;                                                        // Return our new match record.
}


// EvaluationMatrix::AddEvaluator()

// 20021112 _M
// This function has be modified to include a check for duplicates as well as setting
// the mount point for the new evaluator. This eliminates a good deal of code elsewhere
// and encapsulates the complete operation. If a duplicate evaluator is found then the
// function returns NULL indicating that nothing was done. In practic, no check is made
// since any serious error conditions cause errors to be thrown from within this function
// call. These notes apply to some extent to InsEvaluator which is copied from this function
// and which has the only difference of putting the new evaluator after the current one
// in the chain in order to support branch-out operations for loop sequences in the matrix.

Evaluator* EvaluationMatrix::AddEvaluator(int s, int m) {                       // Adds a new evaluator at top.

  if(!isNoDuplicate(m)) return NULL;                                            // If there is a duplicate do nothing.

  if(CountOfEvaluators >= MAX_EVALS)                                            // If we've exceeded our population size
    throw MaxEvalsExceeded("Add:CountOfEvaluators >= MAX_EVALS");               // then throw an exception.

  Evaluator* NewEvaluator = SourceEvaluator(s,this);                            // Make up a new evaluator.

  if(NewEvaluator == NULL)                                                      // Check for a bad allocation and throw
    throw BadAllocation("Add:NewEvaluator == NULL");                            // an exception if it happens.

  NewEvaluator->NextEvaluator = EvaluatorList;                                  // Point the new evaluator to the list.
  EvaluatorList = NewEvaluator;                                                 // Then point the list head to
                                                                                // the new evaluator.

  NewEvaluator->CurrentPosition = m;                                            // Esablish the mount point.

  ++CountOfEvaluators;                                                          // Add one to our evaluator count.
  if(CountOfEvaluators > MaximumCountOfEvaluators)                              // If the count is the biggest we
    MaximumCountOfEvaluators = CountOfEvaluators;                               // have seen then keep track of it.

  return NewEvaluator;                                                          // Return the new evaluator.
}

// EvaluationMatrix::InsEvaluator()

Evaluator* EvaluationMatrix::InsEvaluator(int s, int m) {                       // Inserts a new evaluator.

  if(!isNoDuplicate(m)) return NULL;                                            // If there is a duplicate do nothing.

  if(CountOfEvaluators >= MAX_EVALS)                                            // If we've exceeded our population size
    throw MaxEvalsExceeded("Ins:CountOfEvaluators >= MAX_EVALS");               // then throw an exception.

  Evaluator* NewEvaluator = SourceEvaluator(s,this);                            // Make up a new evaluator.

  if(NewEvaluator == NULL)                                                      // Check for a bad allocation and throw
    throw BadAllocation("Ins:NewEvaluator == NULL");                            // an exception if it happens.

  NewEvaluator->NextEvaluator =                                                 // Point the new evaluator where the
    CurrentEvaluator->NextEvaluator;                                            // current evalautor points... then point
  CurrentEvaluator->NextEvaluator =                                             // the current evaluator to this one. This
    NewEvaluator;                                                               // accomplishes the insert operation.

  NewEvaluator->CurrentPosition = m;                                            // Esablish the mount point.

  ++CountOfEvaluators;                                                          // Add one to our evaluator count.
  if(CountOfEvaluators > MaximumCountOfEvaluators)                              // If the count is the biggest we
    MaximumCountOfEvaluators = CountOfEvaluators;                               // have seen then keep track of it.

  return NewEvaluator;                                                          // Return the new evaluator.
}

// EvaluationMatrix::DropEvaluator()

void EvaluationMatrix::DropEvaluator() { // Drops the current evaluator from the matrix.

  Evaluator* WhereTo = CurrentEvaluator->NextEvaluator;                         // Where do we go from here?

  // First step is to heal the list as if the current evaluator were not present.
  // If there is no previous evaluator - meaning this should be the first one in the
  // list - then we point the list head to the next evaluator on the list (WhereTo)

  if(PreviousEvaluator != NULL)                                                 // If we have a Previous then
    PreviousEvaluator->NextEvaluator = WhereTo;                                 // our next is it's next.
  else                                                                          // If we don't then our next
    EvaluatorList = WhereTo;                                                    // is the first in the list.

  // Now that our list is properly healed, it's time to drop the dead evaluator and
  // get on with our lives...

  CurrentEvaluator->NextEvaluator = NULL;                                       // Disconnect from any list.
  CacheEvaluator(CurrentEvaluator);                                             // Drop the current eval.

  CurrentEvaluator = WhereTo;                                                   // Move on.

  --CountOfEvaluators;                                                          // Reduce our evaluator count.

}


// EvaluationMatrix::EvaluateThis()
//
// This function returns the number of matches that were found. It is possible for more
// than one evaluator to match on a single character.
//
// 0 indicates no matches were found.
// >0 indicates some matches were found.
// If there is a problem then an exception will be thrown.

int EvaluationMatrix::EvaluateThis(unsigned short int i) {

  AddEvaluator(CountOfCharacters,0);    // First, add a new Evaluator at the root of the
                                        // matrix for the current position in the scan
                                        // stream.

  // The new evaluator is now at the top of our list.
  // If there was a problem then an exception will have been thrown.
  // If our allocation worked ok, then we'll be here and ready to start scanning
  // the rule set with our current character.

  PassResult = 0;                                                               // Start by assuming we won't match.
  CurrentEvaluator = EvaluatorList;                                             // Start at the top of the list.
  PreviousEvaluator = NULL;                                                     // NULL means previous is the top.

  // 20030216 _M
  // Next do some basic conversions and evaluations so they don't need to be done
  // again within the evaluators. From now on the evaluators will look here for basic
  // conversions and boolean check values rather than performing the checks themselves.

  i_lower = tolower(i);                                                         // Convert i to lower case.
  i_isDigit = isdigit(i);                                                       // Check for a digit.
  i_isSpace = isspace(i);                                                       // Check for whitespace.
  i_isAlpha = isalpha(i);                                                       // Check for letters.

  // Next, loop through the list and pass the incoming character to
  // each evaluator. Drop those that fall off, and record those that terminate. The
  // rest of them stick around to walk their paths until they meet their fate.

  while(CurrentEvaluator != NULL) {                                             // While there are more evaluators...
                                                                                // go through the list and evaluate
    switch(CurrentEvaluator->EvaluateThis(i)) {                                 // the current character against each.

      case Evaluator::FALLEN_OFF: {                                             // If we've fallen off the path
        DropEvaluator();                                                        // drop the current evaluator and
        break;                                                                  // move on with our lives.
      }

      case Evaluator::DOING_OK: {                                               // If we're still going then...
        PreviousEvaluator = CurrentEvaluator;                                   // keep track of where we've been and
        CurrentEvaluator =                                                      // move forward to the next evaluator
          CurrentEvaluator->NextEvaluator;                                      // in the list.
        break;
      }

      case Evaluator::TERMINATED: {                                             // If we've terminated a path...
        ++PassResult;                                                           // Record our PassResult.

        // Create a new match result using the data in the current evaluator.
        // If there is a problem adding the match an exception will be thrown.

        AddMatchRecord(
           CurrentEvaluator->StreamStartPosition,
           CountOfCharacters - 1,
           myTokenMatrix->Symbol(CurrentEvaluator->CurrentPosition)
           );

        // From Version 2 onward we're always doing deep scans...
        // Having successfully recorded the result of this critter we can kill them off.

        DropEvaluator();                                                        // He's dead.
        break;                                                                  // Now let's keep looking.
      }

      case Evaluator::OUT_OF_RANGE: {                                           // This result is really bad and
        throw OutOfRange("case Evaluator::OUT_OF_RANGE:");                      // probably means we have a bad matrix.
        break;

        // The reason we don't throw OutOfRange from within the evaluator is that we
        // may want to take some other action in the future... So, we allow the evaluator
        // to tell us we sent it out of range and then we decide what to do about it.

      }
    }
  }

  // At the end of this function our PassResult is either an error (which is
  // reported immediately), or it is a match condition. We start out by assuming
  // there will be no match. If we find one, then we reset that result... so at
  // this point, all we need do is report our findings.

  ++CountOfCharacters; // Add one to our Character Count statistic.

  // Note that from this point on, the index in the stream is one less than the
  // CountOfCharacters... for example, if I've evaluated (am evaluating) one character
  // the it's index is 0. This will be important when we create any match records.

  return PassResult;  // When we're finished, return the last known result.
}