// snf_engine.cpp // // (C) 1985-2004 MicroNeil Research Corporation // (C) 2005-2009 ARM Research Labs, LLC // See www.armresearch.com for the copyright terms. // // Derived from original work on cellular automation for complex pattern // reflex engine 1985 Pete McNeil (Madscientist) // // Derived from rapid scripting engine (token matrix) implementation 1987 // // 20040419 _M Adding Verify() method. Beginning with version 2-3 of Message Sniffer // we are embedding a Mangler digest of the rulebase file. The Verify() method reconstructs // the digest and compares it. This ensures that no part of the rulebase file can be // corrupted without the snf2check utility detecting the problem. Prior to this version // it was possible to have undetected corruption in the middle of the rulebase file. The // Mangler digest will prevent that. // 20030130 _M Added testing section in TokenMatrix to throw an exeption if the file // is too small to be a valid matrix. The value is calculated based on the idea that a // valid matrix will have been encrypted in two segments so the file must be at least // as large as these two segments. This is intended to solve the zero-length-rulebase // bug where an access violation would occur if the file was of zero length. // 20021030 _M Creation of snf_engine module by dragging the sniffer pattern matching engine out // of the sniffer.cpp file. #include #include #include #include #include #include #include #include #include #include "../CodeDweller/mangler.hpp" #include "snf_engine.hpp" namespace cd = codedweller; /////////////////////////////////////////////////////////////////////////////////////////// // BEGIN IMPLEMENTATIONS ////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////// // Token Matrix Implementations /////////////////////////////////////////////////////////// // TokenMatrix::Load(filename) void TokenMatrix::Load(string& FileName) { // Initialize using a string for file name. Load(FileName.c_str()); // Convert the string to a null terminated } // char* and call the function below. void TokenMatrix::Load(const char* FileName) { // Initializes the token matrix by file name. ifstream MatrixFile(FileName,ios::binary); // Open the file. if(MatrixFile.bad()) // If anything is wrong with the file throw BadFile("TokenMatrix::Load() finds MatrixFile.bad()"); // then throw a bad file exception. Load(MatrixFile); // Load the matrix from the file. MatrixFile.close(); // Be nice and clean up our file. } // TokenMatrix::Load(stream) const cd::AbortCheck CompatibleIntSizeCheck("TokenMatrix::Load():CompatibleIntSizeCheck(sizeof(unsigned int)==4)"); void TokenMatrix::Load(ifstream& F) { // Initializes the token matrix from a file. CompatibleIntSizeCheck(sizeof(unsigned int)==4); // Check our assumptions. MatrixSize = 0; // Clear out the old Matrix Size and array. if(Matrix) delete Matrix; // that is, if there is an array. F.seekg(0,ios::end); // Find the end of the file. MatrixSize = F.tellg() / sizeof(Token); // Calculate how many tokens. F.seekg(0); // Go back to the beginning. if(MatrixSize < MinimumValidMatrix) // If the matrix file is too small then throw BadMatrix("TokenMatrix::Load() (MatrixSize < MinimumValidMatrix)"); // we must reject it. Matrix = new Token[MatrixSize]; // Allocate an array of tokens. if(Matrix == NULL) // Check for an allocation error. throw BadAllocation("TokenMatrix::Load() Matrix == NULL)"); // and throw an exception if it happens. F.read( // Now read the file into the allocated reinterpret_cast(Matrix), // matrix by recasting it as a character (MatrixSize * sizeof(Token))); // buffer of the correct size. if(F.bad()) // If there were any problems reading the throw BadMatrix("TokenMatrix::Load() (F.bad())"); // matrix then report the bad matrix. } // TokenMatrix::Validate(key) void TokenMatrix::Validate(string& SecurityKey) { // Decrypts and validates the matrix. MANGLER ValidationChecker; // Create a mangler engine for validation. // In order to do the validation we must look at the token matrix as a sequence of bytes. // We will be decrypting the first and last SecurtySegmentSize of this sequence and then // detecting wether the appropriate security key has been properly encrypted in the end. // If we find everything as it should be then we can be sure that the two segments have // not been tampered with and that we have the correct security key. unsigned char* TokensAsBytes = reinterpret_cast(Matrix); int BytesInTokenMatrix = (MatrixSize * sizeof(Token)); // Now that we have all of that stuff let's initialize our ValidationChecker. // Note that the length of our security key is always 24 bytes. The license // id is 8 bytes, the authentication code is 16 bytes. We don't bother to check // here because if it's wrong then nothing will decrypt and we'll have essentially // the same result. Note also that on the end of the rule file we pad this // encrypted security id with nulls so that we can create a string from it easily // and so that we have precisely 32 bytes which is the same size as 4 tokens. // // Note: The 32 byte value is in SecurityKeyBufferSize. This means that we can // accept security keys up to 31 bytes in length. We need the ending null to // assure our null terminated string is as expected. The security key block must // match up with the edges of tokens in the matrix so we pad the end with nulls // when encoding the security key in the encoded file. int SecurityKeyLength = SecurityKey.length(); // For the length of our key for(int a=0;a(Matrix); int BytesInTokenMatrix = (MatrixSize * sizeof(Token)); // Initialize our digest engine with the security key. int SecurityKeyLength = SecurityKey.length(); // For the length of our key for(int a=0;a(Matrix); // Grab the matrix as uints. int Length = ((MatrixSize * sizeof(Token)) / sizeof(unsigned int)); // Calculate it's size. for(int i = 0; i < Length; i++) { // Loop through the array of u ints unsigned int x = UInts[i]; // and re-order the bytes in each x = ((x & 0xff000000) >> 24) | // one to swap from big/little endian ((x & 0x00ff0000) >> 8) | // to little/big endian. ((x & 0x0000ff00) << 8) | ((x & 0x000000ff) << 24); UInts[i] = x; // Put the flipped int back. } } // Evaluator Implementations ////////////////////////////////////////////////////////////// // 20030216 _M Optimization conversions // 20140119 _M Deprecated by jump table in evaluator // inline int Evaluator::i_lower() { return myEvaluationMatrix->i_lower; } // inline bool Evaluator::i_isDigit() { return myEvaluationMatrix->i_isDigit; } // inline bool Evaluator::i_isSpace() { return myEvaluationMatrix->i_isSpace; } // inline bool Evaluator::i_isAlpha() { return myEvaluationMatrix->i_isAlpha; } // Evaluator::Evaluator(position,evalmatrix) Constructor Evaluator::Evaluator(unsigned int s, EvaluationMatrix* m) : myEvaluationMatrix(m), JumpPoint(0), Condition(DOING_OK), NextEvaluator(NULL), StreamStartPosition(s), CurrentPosition(0), WildRunLength(0) { // Constructor... Matrix = myEvaluationMatrix->getTokens(); // Capture the token matrix I walk in. MatrixSize = myEvaluationMatrix->getMatrixSize(); // And get it's size. PositionLimit = MatrixSize - 256; } // Of course I may need to resolve some of the following // wildcard characters. int Evaluator::xLetter() { return (JumpPoint + WILD_LETTER); } // Match Any letter. int Evaluator::xDigit() { return (JumpPoint + WILD_DIGIT); } // Match Any digit. int Evaluator::xNonWhite() { return (JumpPoint + WILD_NONWHITE); } // Match Any non-whitespace. int Evaluator::xWhiteSpace() { return (JumpPoint + WILD_WHITESPACE); } // Match Any whitespace. int Evaluator::xAnyInline() { return (JumpPoint + WILD_INLINE); } // Match Any byte but new line. int Evaluator::xAnything() { return (JumpPoint + WILD_ANYTHING); } // Match Any character at all. int Evaluator::xRunGateway() { return (JumpPoint + RUN_GATEWAY); } // Match the run-loop gateway. // void Evaluator::doFollowOrMakeBuddy() void Evaluator::doFollowOrMakeBuddy(int xKey) { bool shouldFollow = (FALLEN_OFF == Condition); // What should we do? if(shouldFollow) { // This is how we follow Condition = DOING_OK; CurrentPosition = xKey + Matrix[xKey].Vector; } else { // This is how we make a buddy myEvaluationMatrix-> AddEvaluator(StreamStartPosition,Matrix[xKey].Vector+xKey); } } void Evaluator::tryFollowingPrecisePath(unsigned short int i) { int xPrecise = JumpPoint + i; // Match Precise Character if(Matrix[xPrecise].Character() == i) { // If we've matched our path doFollowOrMakeBuddy(xPrecise); } if(DOING_OK == Condition) WildRunLength = 0; } void Evaluator::tryFollowingNoCasePath(unsigned short int i) { i = tolower(i); int xNoCase = JumpPoint + i; // Match caps to lower (case insensitive) if(Matrix[xNoCase].Character()==i){ doFollowOrMakeBuddy(xNoCase); } if(DOING_OK == Condition) WildRunLength = 0; } void Evaluator::tryFollowingWildAlphaPath() { if(Matrix[xLetter()].Character()==WILD_LETTER){ doFollowOrMakeBuddy(xLetter()); } } void Evaluator::tryFollowingWildDigitPath() { if(Matrix[xDigit()].Character()==WILD_DIGIT){ doFollowOrMakeBuddy(xDigit()); } } void Evaluator::tryFollowingWildNonWhitePath() { if(Matrix[xNonWhite()].Character()==WILD_NONWHITE){ doFollowOrMakeBuddy(xNonWhite()); } } void Evaluator::tryFollowingWildWhitePath() { if(Matrix[xWhiteSpace()].Character()==WILD_WHITESPACE){ doFollowOrMakeBuddy(xWhiteSpace()); } } void Evaluator::tryFollowingWildInlinePath() { if(Matrix[xAnyInline()].Character()==WILD_INLINE){ doFollowOrMakeBuddy(xAnyInline()); } } void Evaluator::tryFollowingWildAnythingPath() { if(Matrix[xAnything()].Character()==WILD_ANYTHING){ doFollowOrMakeBuddy(xAnything()); } } void Evaluator::doFollowerJumpTable(unsigned short int i) { // tryFollowingPrecisePath(i); // tryFollowingUppercasePath(); 0x41 - 0x5A // tryFollowingWildAlphaPath(); 0x61 - 0x7A // tryFollowingWildDigitPath(); 0x30 - 0x39 // tryFollowingWildWhitePath(); 0x09 - 0x0D, 0x20 // tryFollowingWildNonWhitePath(); > 0x20 // tryFollowingWildInlinePath(); Not 0x0A, or 0x0D switch(i) { // These nnly match WildAnything because they conflict with special check values... // NUL, SOH, STX, ETX, EOT, ENQ, ACK, BEL, BS, TAB, LF, VT, FF, CR, SO, SI case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07: case 0x08: { break; } // tab case 0x09: { tryFollowingPrecisePath(i); tryFollowingWildWhitePath(); tryFollowingWildInlinePath(); break; } // LF, VT, FF, CR, SO, SI case 0x0A: case 0x0B: case 0x0C: case 0x0D: case 0x0E: case 0x0F: // DLE, DC1, DC2, DC3, DC4, NAK, SYN, ETB, CAN, EM, SUB, ESC, FS, GS, RS, US case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17: case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F: { tryFollowingPrecisePath(i); tryFollowingWildWhitePath(); break; } // the final fronteer case 0x20: { tryFollowingPrecisePath(i); tryFollowingWildWhitePath(); tryFollowingWildInlinePath(); break; } // ! " # $ % & ' ( ) * + , - . / case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x26: case 0x27: case 0x28: case 0x29: case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E: case 0x2F: { tryFollowingPrecisePath(i); tryFollowingWildNonWhitePath(); tryFollowingWildInlinePath(); break; } // 0 - 9 case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37: case 0x38: case 0x39: { tryFollowingPrecisePath(i); tryFollowingWildDigitPath(); tryFollowingWildNonWhitePath(); tryFollowingWildInlinePath(); break; } // : ; < = > ? @ case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F: case 0x40: { tryFollowingPrecisePath(i); tryFollowingWildNonWhitePath(); tryFollowingWildInlinePath(); break; } // A - Z case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F: case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: case 0x58: case 0x59: case 0x5A: { tryFollowingPrecisePath(i); tryFollowingNoCasePath(i); tryFollowingWildAlphaPath(); tryFollowingWildNonWhitePath(); tryFollowingWildInlinePath(); break; } // [ \ ] ^ _ ` case 0x5B: case 0x5C: case 0x5D: case 0x5E: case 0x5F: case 0x60: { tryFollowingPrecisePath(i); tryFollowingWildNonWhitePath(); tryFollowingWildInlinePath(); break; } // a - z case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67: case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F: case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: case 0x78: case 0x79: case 0x7A: { tryFollowingPrecisePath(i); tryFollowingWildAlphaPath(); tryFollowingWildNonWhitePath(); tryFollowingWildInlinePath(); break; } // { | } ~ case 0x7B: case 0x7C: case 0x7D: case 0x7E: case 0x7F: { tryFollowingPrecisePath(i); tryFollowingWildNonWhitePath(); tryFollowingWildInlinePath(); } // high ascii case 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: case 0x87: case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: case 0x8E: case 0x8F: case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: case 0x95: case 0x96: case 0x97: case 0x98: case 0x99: case 0x9A: case 0x9B: case 0x9C: case 0x9D: case 0x9E: case 0x9F: case 0xA0: case 0xA1: case 0xA2: case 0xA3: case 0xA4: case 0xA5: case 0xA6: case 0xA7: case 0xA8: case 0xA9: case 0xAA: case 0xAB: case 0xAC: case 0xAD: case 0xAE: case 0xAF: case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7: case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF: case 0xC0: case 0xC1: case 0xC2: case 0xC3: case 0xC4: case 0xC5: case 0xC6: case 0xC7: case 0xC8: case 0xC9: case 0xCA: case 0xCB: case 0xCC: case 0xCD: case 0xCE: case 0xCF: case 0xD0: case 0xD1: case 0xD2: case 0xD3: case 0xD4: case 0xD5: case 0xD6: case 0xD7: case 0xD8: case 0xD9: case 0xDA: case 0xDB: case 0xDC: case 0xDD: case 0xDE: case 0xDF: case 0xE0: case 0xE1: case 0xE2: case 0xE3: case 0xE4: case 0xE5: case 0xE6: case 0xE7: case 0xE8: case 0xE9: case 0xEA: case 0xEB: case 0xEC: case 0xED: case 0xEE: case 0xEF: case 0xF0: case 0xF1: case 0xF2: case 0xF3: case 0xF4: case 0xF5: case 0xF6: case 0xF7: case 0xF8: case 0xF9: case 0xFA: case 0xFB: case 0xFC: case 0xFD: case 0xFE: case 0xFF: { tryFollowingPrecisePath(i); tryFollowingWildNonWhitePath(); tryFollowingWildInlinePath(); break; } } tryFollowingWildAnythingPath(); } // Evaluator::EvaluateThis() Evaluator::States Evaluator::EvaluateThis(unsigned short int i) { // Follow the this byte. // First upgrade will be to DOING_OK, after that we launch buddies. Condition = FALLEN_OFF; // Start off guessing we'll fall off. // In order to handle wildcard characters, this evaluation function must actually // compare the character to a number of possibilities in most-specific to least- // specific order to see if any match. In order to support overlapping rule sets, // if more than one wildcard matches at this node, an additional evaluator will be // placed in line already _AT THIS PATH POINT_ so that both possibilities will be // explored. New evaluators are always added at the TOP of the list so we are always // guaranteed not to overdrive an evaluator and end up in a recursive race condition. // 20140121_M The previous optimization with binary flags has been replaced with // a jump table implementation. Now, each byte only excites behaviors that are // possible for the current byte so only those paths will be tested. if(CurrentPosition >= PositionLimit) return Condition = OUT_OF_RANGE; // All of the positions calculated below are guaranteed to be within the ranges checked // above so we're safe if we get to this point. // So, at this point it's safe to check and see if I'm terminated. Note that if I // am at a termination point, my path has terminated and I have a symbol so I don't // need to resolve any more characters - even the current one. if(Matrix[CurrentPosition].isTermination()) return Condition = TERMINATED; // NOTE: The above is written for sudden-death termination. Eventually we will want // to support deep - filters which will show every rule match and this will need to // be rewritten. // Evaluation order, most-to-least specific with what is possible for that byte. JumpPoint = CurrentPosition; doFollowerJumpTable(i); // Excite followers based on this byte. { // Precise matches reset the wild run counter. ++WildRunLength; // Count up the run length. if(WildRunLength >= MaxWildRunLength) // If we exceed the max then return Condition = FALLEN_OFF; // we've fallen off the path } // and we do it immediately. // 20021112 _M // Beginning with version 2 of Message Sniffer we've implemented a new construct // for run-loops that prevents any interference between rules where run-loops might // appear in locations coinciding with standard match bytes. The new methodology // uses a special run-loop-gateway character to isolate any run loops from standard // nodes in the matrix. Whenever a run-loop gateway is present at a node a buddy is // inserted AFTER the current evaluator so that it will evaluate the current character // from the position of the run-loop gateway. This allows run loops to occupy the same // positional space as standard matches while maintaining isolation between their paths // in the matrix. // We don't want to launch any run loop buddies unless we matched this far. If we did // match up to this point and the next character in a pattern includes a run loop then // we will find a gateway byte at this point representing the path to any run loops. // If we made it this far launch a buddy for any run-loop gateway that's present. // Of course, the buddy must be evaluated after this evaluator during this pass because // he will have shown up late... That is, we don't detect a run gateway until we're // sitting on a new node looking for a result... The very result we may be looking for // could be behind the gateway - so we launch the buddy behind us and he will be able // to match anything in this pass that we missed when looking for a non-run match. if(Matrix[xRunGateway()].Character() == RUN_GATEWAY) myEvaluationMatrix-> InsEvaluator(StreamStartPosition,Matrix[xRunGateway()].Vector+xRunGateway()); // At this point, we've tried all of our rules, and created any buddies we needed. // If we got a match, we terminated long ago. If we didn't, then we either stayed // on the path or we fell off. Either way, the flag is in Condition so we can send // it on. return Condition; } /////////////////////////////////////////////////////////////////////////////////////////// // EvaluationMatrix Implementations /////////////////////////////////////////////////////// // EvaluationMatrix::AddMatchRecord(int sp, int ep, int sym) // Most of this functionality is about deep scans - which have been put on hold for now // due to the complexity and the scope of the current application. For now, although // we will use this reporting mechanism, it will generally record only one event. MatchRecord* EvaluationMatrix::AddMatchRecord(int sp, int ep, int sym) { // 20030216 _M Added range check code to watch for corruption. Some systems have // reported matches with zero length indicating an undetected corruption. This // range check will detect and report it. if(sp==ep) // Check that we're in range - no zero throw OutOfRange("sp==ep"); // length pattern matches allowed! MatchRecord* NewMatchRecord = // Then, create the new result object new MatchRecord(sp,ep,sym); // by passing it the important parts. if(NewMatchRecord==NULL) // Check for a bad allocation and throw throw BadAllocation("NewMatchRecord==NULL"); // an exception if that happens. if(ResultList == NULL) { // If this is our first result we simply ResultList = NewMatchRecord; // add the result to our list, and of course LastResultInList = NewMatchRecord; // it is the end of the list as well. } else { // If we already have some results, then LastResultInList->NextMatchRecord = // we add the new record to the result list NewMatchRecord; // and record that the new record is now the LastResultInList = NewMatchRecord; // last result in the list. } return NewMatchRecord; // Return our new match record. } // EvaluationMatrix::AddEvaluator() // 20021112 _M // This function has be modified to include a check for duplicates as well as setting // the mount point for the new evaluator. This eliminates a good deal of code elsewhere // and encapsulates the complete operation. If a duplicate evaluator is found then the // function returns NULL indicating that nothing was done. In practic, no check is made // since any serious error conditions cause errors to be thrown from within this function // call. These notes apply to some extent to InsEvaluator which is copied from this function // and which has the only difference of putting the new evaluator after the current one // in the chain in order to support branch-out operations for loop sequences in the matrix. Evaluator* EvaluationMatrix::AddEvaluator(int s, unsigned int m) { // Adds a new evaluator at top. if(!isNoDuplicate(m)) return NULL; // If there is a duplicate do nothing. if(CountOfEvaluators >= MAX_EVALS) // If we've exceeded our population size throw MaxEvalsExceeded("Add:CountOfEvaluators >= MAX_EVALS"); // then throw an exception. Evaluator* NewEvaluator = SourceEvaluator(s,this); // Make up a new evaluator. if(NewEvaluator == NULL) // Check for a bad allocation and throw throw BadAllocation("Add:NewEvaluator == NULL"); // an exception if it happens. NewEvaluator->NextEvaluator = EvaluatorList; // Point the new evaluator to the list. EvaluatorList = NewEvaluator; // Then point the list head to // the new evaluator. NewEvaluator->CurrentPosition = m; // Esablish the mount point. ++CountOfEvaluators; // Add one to our evaluator count. if(CountOfEvaluators > MaximumCountOfEvaluators) // If the count is the biggest we MaximumCountOfEvaluators = CountOfEvaluators; // have seen then keep track of it. return NewEvaluator; // Return the new evaluator. } // EvaluationMatrix::InsEvaluator() Evaluator* EvaluationMatrix::InsEvaluator(int s, unsigned int m) { // Inserts a new evaluator. if(!isNoDuplicate(m)) return NULL; // If there is a duplicate do nothing. if(CountOfEvaluators >= MAX_EVALS) // If we've exceeded our population size throw MaxEvalsExceeded("Ins:CountOfEvaluators >= MAX_EVALS"); // then throw an exception. Evaluator* NewEvaluator = SourceEvaluator(s,this); // Make up a new evaluator. if(NewEvaluator == NULL) // Check for a bad allocation and throw throw BadAllocation("Ins:NewEvaluator == NULL"); // an exception if it happens. NewEvaluator->NextEvaluator = // Point the new evaluator where the CurrentEvaluator->NextEvaluator; // current evalautor points... then point CurrentEvaluator->NextEvaluator = // the current evaluator to this one. This NewEvaluator; // accomplishes the insert operation. NewEvaluator->CurrentPosition = m; // Esablish the mount point. ++CountOfEvaluators; // Add one to our evaluator count. if(CountOfEvaluators > MaximumCountOfEvaluators) // If the count is the biggest we MaximumCountOfEvaluators = CountOfEvaluators; // have seen then keep track of it. return NewEvaluator; // Return the new evaluator. } // EvaluationMatrix::DropEvaluator() void EvaluationMatrix::DropEvaluator() { // Drops the current evaluator from the matrix. Evaluator* WhereTo = CurrentEvaluator->NextEvaluator; // Where do we go from here? // First step is to heal the list as if the current evaluator were not present. // If there is no previous evaluator - meaning this should be the first one in the // list - then we point the list head to the next evaluator on the list (WhereTo) if(PreviousEvaluator != NULL) // If we have a Previous then PreviousEvaluator->NextEvaluator = WhereTo; // our next is it's next. else // If we don't then our next EvaluatorList = WhereTo; // is the first in the list. // Now that our list is properly healed, it's time to drop the dead evaluator and // get on with our lives... CurrentEvaluator->NextEvaluator = NULL; // Disconnect from any list. CacheEvaluator(CurrentEvaluator); // Drop the current eval. CurrentEvaluator = WhereTo; // Move on. --CountOfEvaluators; // Reduce our evaluator count. } Evaluator* findEvaluatorListTail(Evaluator* head) { Evaluator* next = head; while(NULL != (next->NextEvaluator)) next = next->NextEvaluator; return next; } void EvaluationMatrix::dropAllEvaluators() { bool haveActiveEvaluators = (NULL != EvaluatorList); if(haveActiveEvaluators) { Evaluator* tail = findEvaluatorListTail(EvaluatorList); tail->NextEvaluator = EvaluatorCache; EvaluatorCache = EvaluatorList; } PreviousEvaluator = NULL; CurrentEvaluator = NULL; EvaluatorList = NULL; CountOfEvaluators = 0; } void EvaluationMatrix::restartEngineAt(int newCharacterCount) { dropAllEvaluators(); CountOfCharacters = newCharacterCount; } // EvaluationMatrix::EvaluateThis() // // This function returns the number of matches that were found. It is possible for more // than one evaluator to match on a single character. // // 0 indicates no matches were found. // >0 indicates some matches were found. // If there is a problem then an exception will be thrown. int EvaluationMatrix::EvaluateThis(unsigned short int i) { AddEvaluator(CountOfCharacters,0); // First, add a new Evaluator at the root of the // matrix for the current position in the scan // stream. // The new evaluator is now at the top of our list. // If there was a problem then an exception will have been thrown. // If our allocation worked ok, then we'll be here and ready to start scanning // the rule set with our current character. PassResult = 0; // Start by assuming we won't match. CurrentEvaluator = EvaluatorList; // Start at the top of the list. PreviousEvaluator = NULL; // NULL means previous is the top. // 20030216 _M // Next do some basic conversions and evaluations so they don't need to be done // again within the evaluators. From now on the evaluators will look here for basic // conversions and boolean check values rather than performing the checks themselves. // 20140119 _M deprecated by jump table in evaluator // i_lower = tolower(i); // Convert i to lower case. // i_isDigit = isdigit(i); // Check for a digit. // i_isSpace = isspace(i); // Check for whitespace. // i_isAlpha = isalpha(i); // Check for letters. // Next, loop through the list and pass the incoming character to // each evaluator. Drop those that fall off, and record those that terminate. The // rest of them stick around to walk their paths until they meet their fate. while(CurrentEvaluator != NULL) { // While there are more evaluators... // go through the list and evaluate switch(CurrentEvaluator->EvaluateThis(i)) { // the current character against each. case Evaluator::FALLEN_OFF: { // If we've fallen off the path DropEvaluator(); // drop the current evaluator and break; // move on with our lives. } case Evaluator::DOING_OK: { // If we're still going then... PreviousEvaluator = CurrentEvaluator; // keep track of where we've been and CurrentEvaluator = // move forward to the next evaluator CurrentEvaluator->NextEvaluator; // in the list. break; } case Evaluator::TERMINATED: { // If we've terminated a path... ++PassResult; // Record our PassResult. // Create a new match result using the data in the current evaluator. // If there is a problem adding the match an exception will be thrown. AddMatchRecord( CurrentEvaluator->StreamStartPosition, CountOfCharacters - 1, myTokenMatrix->Symbol(CurrentEvaluator->CurrentPosition) ); // From Version 2 onward we're always doing deep scans... // Having successfully recorded the result of this critter we can kill them off. DropEvaluator(); // He's dead. break; // Now let's keep looking. } case Evaluator::OUT_OF_RANGE: { // This result is really bad and throw OutOfRange("case Evaluator::OUT_OF_RANGE:"); // probably means we have a bad matrix. break; // The reason we don't throw OutOfRange from within the evaluator is that we // may want to take some other action in the future... So, we allow the evaluator // to tell us we sent it out of range and then we decide what to do about it. } } } // At the end of this function our PassResult is either an error (which is // reported immediately), or it is a match condition. We start out by assuming // there will be no match. If we find one, then we reset that result... so at // this point, all we need do is report our findings. ++CountOfCharacters; // Add one to our Character Count statistic. // Note that from this point on, the index in the stream is one less than the // CountOfCharacters... for example, if I've evaluated (am evaluating) one character // the it's index is 0. This will be important when we create any match records. return PassResult; // When we're finished, return the last known result. } void EvaluationMatrix::evaluateSegment(vector& data, unsigned int start, unsigned int finish) { restartEngineAt(start); finish = (finish < data.size()) ? finish : data.size(); for(unsigned int a = start; a < finish; a++) EvaluateThis(data[a]); }