You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

snf_engine.cpp 44KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793
  1. // snf_engine.cpp
  2. //
  3. // (C) 1985-2004 MicroNeil Research Corporation
  4. // (C) 2005-2009 ARM Research Labs, LLC
  5. // See www.armresearch.com for the copyright terms.
  6. //
  7. // Derived from original work on cellular automation for complex pattern
  8. // reflex engine 1985 Pete McNeil (Madscientist)
  9. //
  10. // Derived from rapid scripting engine (token matrix) implementation 1987
  11. //
  12. // 20040419 _M Adding Verify() method. Beginning with version 2-3 of Message Sniffer
  13. // we are embedding a Mangler digest of the rulebase file. The Verify() method reconstructs
  14. // the digest and compares it. This ensures that no part of the rulebase file can be
  15. // corrupted without the snf2check utility detecting the problem. Prior to this version
  16. // it was possible to have undetected corruption in the middle of the rulebase file. The
  17. // Mangler digest will prevent that.
  18. // 20030130 _M Added testing section in TokenMatrix to throw an exeption if the file
  19. // is too small to be a valid matrix. The value is calculated based on the idea that a
  20. // valid matrix will have been encrypted in two segments so the file must be at least
  21. // as large as these two segments. This is intended to solve the zero-length-rulebase
  22. // bug where an access violation would occur if the file was of zero length.
  23. // 20021030 _M Creation of snf_engine module by dragging the sniffer pattern matching engine out
  24. // of the sniffer.cpp file.
  25. #include <unistd.h>
  26. #include <cstdio>
  27. #include <cctype>
  28. #include <ctime>
  29. #include <cstdlib>
  30. #include <fstream>
  31. #include <iostream>
  32. #include <string>
  33. #include "../CodeDweller/mangler.hpp"
  34. #include "snf_engine.hpp"
  35. using namespace std;
  36. ///////////////////////////////////////////////////////////////////////////////////////////
  37. // BEGIN IMPLEMENTATIONS //////////////////////////////////////////////////////////////////
  38. ///////////////////////////////////////////////////////////////////////////////////////////
  39. ///////////////////////////////////////////////////////////////////////////////////////////
  40. // Token Matrix Implementations ///////////////////////////////////////////////////////////
  41. // TokenMatrix::Load(filename)
  42. void TokenMatrix::Load(string& FileName) { // Initialize using a string for file name.
  43. Load(FileName.c_str()); // Convert the string to a null terminated
  44. } // char* and call the function below.
  45. void TokenMatrix::Load(const char* FileName) { // Initializes the token matrix by file name.
  46. ifstream MatrixFile(FileName,ios::binary); // Open the file.
  47. if(MatrixFile == NULL || MatrixFile.bad()) // If anything is wrong with the file
  48. throw BadFile("TokenMatrix::Load()(MatrixFile==NULL || MatrixFile.bad())"); // then throw a bad file exception.
  49. Load(MatrixFile); // Load the matrix from the file.
  50. MatrixFile.close(); // Be nice and clean up our file.
  51. }
  52. // TokenMatrix::Load(stream)
  53. const AbortCheck CompatibleIntSizeCheck("TokenMatrix::Load():CompatibleIntSizeCheck(sizeof(unsigned int)==4)");
  54. void TokenMatrix::Load(ifstream& F) { // Initializes the token matrix from a file.
  55. CompatibleIntSizeCheck(sizeof(unsigned int)==4); // Check our assumptions.
  56. MatrixSize = 0; // Clear out the old Matrix Size and array.
  57. if(Matrix) delete Matrix; // that is, if there is an array.
  58. F.seekg(0,ios::end); // Find the end of the file.
  59. MatrixSize = F.tellg() / sizeof(Token); // Calculate how many tokens.
  60. F.seekg(0); // Go back to the beginning.
  61. if(MatrixSize < MinimumValidMatrix) // If the matrix file is too small then
  62. throw BadMatrix("TokenMatrix::Load() (MatrixSize < MinimumValidMatrix)"); // we must reject it.
  63. Matrix = new Token[MatrixSize]; // Allocate an array of tokens.
  64. if(Matrix == NULL) // Check for an allocation error.
  65. throw BadAllocation("TokenMatrix::Load() Matrix == NULL)"); // and throw an exception if it happens.
  66. F.read( // Now read the file into the allocated
  67. reinterpret_cast<char*>(Matrix), // matrix by recasting it as a character
  68. (MatrixSize * sizeof(Token))); // buffer of the correct size.
  69. if(F.bad()) // If there were any problems reading the
  70. throw BadMatrix("TokenMatrix::Load() (F.bad())"); // matrix then report the bad matrix.
  71. }
  72. // TokenMatrix::Validate(key)
  73. void TokenMatrix::Validate(string& SecurityKey) { // Decrypts and validates the matrix.
  74. MANGLER ValidationChecker; // Create a mangler engine for validation.
  75. // In order to do the validation we must look at the token matrix as a sequence of bytes.
  76. // We will be decrypting the first and last SecurtySegmentSize of this sequence and then
  77. // detecting wether the appropriate security key has been properly encrypted in the end.
  78. // If we find everything as it should be then we can be sure that the two segments have
  79. // not been tampered with and that we have the correct security key.
  80. unsigned char* TokensAsBytes = reinterpret_cast<unsigned char*>(Matrix);
  81. int BytesInTokenMatrix = (MatrixSize * sizeof(Token));
  82. // Now that we have all of that stuff let's initialize our ValidationChecker.
  83. // Note that the length of our security key is always 24 bytes. The license
  84. // id is 8 bytes, the authentication code is 16 bytes. We don't bother to check
  85. // here because if it's wrong then nothing will decrypt and we'll have essentially
  86. // the same result. Note also that on the end of the rule file we pad this
  87. // encrypted security id with nulls so that we can create a string from it easily
  88. // and so that we have precisely 32 bytes which is the same size as 4 tokens.
  89. //
  90. // Note: The 32 byte value is in SecurityKeyBufferSize. This means that we can
  91. // accept security keys up to 31 bytes in length. We need the ending null to
  92. // assure our null terminated string is as expected. The security key block must
  93. // match up with the edges of tokens in the matrix so we pad the end with nulls
  94. // when encoding the security key in the encoded file.
  95. int SecurityKeyLength = SecurityKey.length(); // For the length of our key
  96. for(int a=0;a<SecurityKeyLength;a++) // feed each byte through the
  97. ValidationChecker.Encrypt(SecurityKey.at(a)); // mangler to evolve the key
  98. // state.
  99. // Now we're ready to decrypt the matrix... We start with the first segment.
  100. for(int a=0;a<SecuritySegmentSize;a++) // For the length of the segment
  101. TokensAsBytes[a] = // replace each byte with the
  102. ValidationChecker.Decrypt(TokensAsBytes[a]); // decrypted byte.
  103. // Next we decrypt the last security segment...
  104. for(int a= BytesInTokenMatrix - SecuritySegmentSize; a<BytesInTokenMatrix; a++)
  105. TokensAsBytes[a] =
  106. ValidationChecker.Decrypt(TokensAsBytes[a]);
  107. // Now that we've done this we should find that our SecurityKey is at the end
  108. // of the loaded token matrix... Let's look and find out shall we?!!!
  109. unsigned char* SecurityCheckKey = // Reference the check
  110. & TokensAsBytes[BytesInTokenMatrix-SecurityKeyBufferSize]; // space in the matrix.
  111. SecurityCheckKey[SecurityKeyBufferSize-1] = 0; // Add a safety null just in case.
  112. string SecurityCheck((char*)SecurityCheckKey); // Make a string.
  113. // By now we should have a SecurityCheck string to compare to our SecurityKey.
  114. // If they match then we know everything worked out and that our token matrix has
  115. // been decrypted properly. This is also a good indication that our token matrix
  116. // is not incomplete since if it were the decryption wouldn't work. Saddly, we
  117. // don't have the computing cycles to decrypt the entire file - so we won't be
  118. // doing that until we can load it in a server/daemon and then reuse it over and
  119. // over... Once that happens we will be able to detect tampering also.
  120. if(SecurityKey != SecurityCheck) // If the security keys don't match
  121. throw BadMatrix("TokenMatrix::Validate() (SecurityKey != SecurityCheck)"); // then we have an invalid matrix.
  122. }
  123. // TokenMatrix::Verify(key)
  124. void TokenMatrix::Verify(string& SecurityKey) { // Builds and verifies a file digest.
  125. MANGLER DigestChecker; // Create a mangler for the digest.
  126. // Gain access to our token matrix as bytes.
  127. unsigned char* TokensAsBytes = reinterpret_cast<unsigned char*>(Matrix);
  128. int BytesInTokenMatrix = (MatrixSize * sizeof(Token));
  129. // Initialize our digest engine with the security key.
  130. int SecurityKeyLength = SecurityKey.length(); // For the length of our key
  131. for(int a=0;a<SecurityKeyLength;a++) // feed each byte through the
  132. DigestChecker.Encrypt(SecurityKey.at(a)); // mangler to evolve the key
  133. // state.
  134. // Build the digest.
  135. int IndexOfDigest = // Find the index of the digest by
  136. BytesInTokenMatrix - // starting at the end of the matrix,
  137. SecurityKeyBufferSize - // backing up past the security key,
  138. RulebaseDigestSize; // then past the digest.
  139. int a=0; // Keep track of where we are.
  140. for(;a<IndexOfDigest;a++) // Loop through up to the digest and
  141. DigestChecker.Encrypt(TokensAsBytes[a]); // pump the file through the mangler.
  142. // Now that the digest is built we must test it.
  143. // The original was emitted by encrypting 0s so if we do the same thing we will match.
  144. for(int b=0;b<RulebaseDigestSize;b++) // Loop through the digest and compare
  145. if(DigestChecker.Encrypt(0)!=TokensAsBytes[a+b]) // our digest to the stored digest. If
  146. throw BadMatrix("TokenMatrix::Verify() Bad Digest"); // any byte doesn't match it's bad!
  147. // If we made it through all of that then we're valid :-)
  148. }
  149. void TokenMatrix::FlipEndian() { // Converts big/little endian tokens.
  150. unsigned int* UInts = reinterpret_cast<unsigned int*>(Matrix); // Grab the matrix as uints.
  151. int Length = ((MatrixSize * sizeof(Token)) / sizeof(unsigned int)); // Calculate it's size.
  152. for(int i = 0; i < Length; i++) { // Loop through the array of u ints
  153. unsigned int x = UInts[i]; // and re-order the bytes in each
  154. x = ((x & 0xff000000) >> 24) | // one to swap from big/little endian
  155. ((x & 0x00ff0000) >> 8) | // to little/big endian.
  156. ((x & 0x0000ff00) << 8) |
  157. ((x & 0x000000ff) << 24);
  158. UInts[i] = x; // Put the flipped int back.
  159. }
  160. }
  161. // Evaluator Implementations //////////////////////////////////////////////////////////////
  162. // 20030216 _M Optimization conversions
  163. inline int Evaluator::i_lower() { return myEvaluationMatrix->i_lower; }
  164. inline bool Evaluator::i_isDigit() { return myEvaluationMatrix->i_isDigit; }
  165. inline bool Evaluator::i_isSpace() { return myEvaluationMatrix->i_isSpace; }
  166. inline bool Evaluator::i_isAlpha() { return myEvaluationMatrix->i_isAlpha; }
  167. // Evaluator::Evaluator(position,evalmatrix) Constructor
  168. Evaluator::Evaluator(unsigned int s, EvaluationMatrix* m) { // Constructor...
  169. myEvaluationMatrix = m; // Capture the matrix I live in.
  170. Matrix = myEvaluationMatrix->getTokens(); // Capture the token matrix I walk in.
  171. MatrixSize = myEvaluationMatrix->getMatrixSize(); // And get it's size.
  172. PositionLimit = MatrixSize - 256; // Calculate the safety limit.
  173. StreamStartPosition = s; // Always record our starting point.
  174. NextEvaluator = NULL; // Allways start off with no extensions.
  175. CurrentPosition = 0; // Always start at the root of the matrix;
  176. WildRunLength = 0; // No run length when new.
  177. Condition = DOING_OK; // Start off being ok.
  178. }
  179. // Evaluator::EvaluateThis()
  180. Evaluator::States Evaluator::EvaluateThis(unsigned short int i) { // Follow the this byte.
  181. Condition = FALLEN_OFF; // Start off guessing we'll fall off.
  182. // First upgrade will be to DOING_OK, after that we launch buddies.
  183. // In order to handle wildcard characters, this evaluation function must actually
  184. // compare the character to a number of possibilities in most-specific to least-
  185. // specific order to see if any match. In order to support overlapping rule sets,
  186. // if more than one wildcard matches at this node, an additional evaluator will be
  187. // placed in line already _AT THIS PATH POINT_ so that both possibilities will be
  188. // explored. New evaluators are always added at the TOP of the list so we are always
  189. // guaranteed not to overdrive an evaluator and end up in a recursive race condition.
  190. // 20030216 _M Optimizations. In order to reduce the number of instructions per byte
  191. // the parent Evaluation Matrix will now translate the byte i into boolean flags
  192. // indicating if they are digits, white, letters, etc... and converting to lower
  193. // case etc... This conversion is then done only once so that thereafter only a simple
  194. // comparison need be made. This should eliminate many function calls and a collection
  195. // of numeric comparisons.
  196. //
  197. // I am also moving the simple comparisons to the front of each logical section so
  198. // that failures there can short-circuit subsequent logic to view the state of the
  199. // matrix regardin that character. The matrix lookup is likely to be more expensive
  200. // than a single binary comparison.
  201. // For safety, we check our evaluation position here - If xNoCase is out of range
  202. // then we will return OUT_OF_RANGE to indicate the problem rather than accessing
  203. // data beyone our token matrix's limits.
  204. /*** 20070606 _M Reduced the strength of this check from 3 comparisons to 1.
  205. **** CurrentPosition is now an unsigned int so it cannot be negative. The limit
  206. **** is now calculated once in the constructor as PositionLimit.
  207. if(
  208. CurrentPosition < 0 || // Position should never be < 0
  209. xPrecise >= MatrixSize || // nor xPrecise over the top.
  210. xNoCase >= MatrixSize // nor NoCase over the top.
  211. ) // If either occur we have a
  212. return Condition = OUT_OF_RANGE; // bad matrix.
  213. ***/
  214. if(CurrentPosition >= PositionLimit) return Condition = OUT_OF_RANGE;
  215. // All of the positions calculated below are guaranteed to be within the ranges checked
  216. // above so we're safe if we get to this point.
  217. // So, at this point it's safe to check and see if I'm terminated. Note that if I
  218. // am at a termination point, my path has terminated and I have a symbol so I don't
  219. // need to resolve any more characters - even the current one.
  220. if(Matrix[CurrentPosition].isTermination()) return Condition = TERMINATED;
  221. // NOTE: The above is written for sudden-death termination. Eventually we will want
  222. // to support deep - filters which will show every rule match and this will need to
  223. // be rewritten.
  224. // Evaluation order, most-to-least specific:
  225. int xPrecise = CurrentPosition + i; // Match Precise Character
  226. int xNoCase = CurrentPosition + i_lower(); // Match Case insensitive
  227. // Of course I may need to resolve some of the following
  228. // wildcard characters.
  229. int xLetter = CurrentPosition + WILD_LETTER; // Match Any letter.
  230. int xDigit = CurrentPosition + WILD_DIGIT; // Match Any digit.
  231. int xNonWhite = CurrentPosition + WILD_NONWHITE; // Match Any non-whitespace.
  232. int xWhiteSpace = CurrentPosition + WILD_WHITESPACE; // Match Any whitespace.
  233. int xAnyInline = CurrentPosition + WILD_INLINE; // Match Any byte but new line.
  234. int xAnything = CurrentPosition + WILD_ANYTHING; // Match Any character at all.
  235. int xRunGateway = CurrentPosition + RUN_GATEWAY; // Match the run-loop gateway.
  236. // Try to match the precise character.
  237. if(Matrix[xPrecise].Character() == i) { // If we've matched our path
  238. Condition = DOING_OK; // upgrade to doing ok.
  239. CurrentPosition = xPrecise +
  240. Matrix[xPrecise].Vector; // Move myself along this path.
  241. }
  242. // Try to match the case insensitive character.
  243. if(i_lower()!=i && Matrix[xNoCase].Character()==i_lower()){
  244. // If we've matched our path
  245. // with a compromized case then
  246. if(Condition==FALLEN_OFF) { // check: if no matches yet,
  247. Condition = DOING_OK; // upgrade to doing ok.
  248. CurrentPosition = xNoCase +
  249. Matrix[xNoCase].Vector; // Move myself along this path.
  250. }
  251. // If we more than one match then
  252. else { // lets try to make a buddy...
  253. // If there's no duplicate buddy like this already, then we'll create one.
  254. // To create a buddy, add an evaluator at the top of the list (behind us) and
  255. // set it's position as if it had been here all along and had matched the current
  256. // character. Next time we evaluate it will be just like all the others.
  257. myEvaluationMatrix->
  258. AddEvaluator(StreamStartPosition,Matrix[xNoCase].Vector+xNoCase);
  259. }
  260. }
  261. // Start looking at wildcards... Here's where we must limit run length.
  262. if(Condition == DOING_OK) // If we matched above we'll
  263. WildRunLength = 0; // reset our wild run count.
  264. // If not then we need to keep
  265. else { // track of our run length.
  266. ++WildRunLength; // Count up the run length.
  267. if(WildRunLength >= MaxWildRunLength) // If we exceed the max then
  268. return Condition = FALLEN_OFF; // we've fallen off the path
  269. } // and we do it immediately.
  270. // WILD_LETTER
  271. // If that didn't do it for us...
  272. // Try to match any letter character.
  273. // The way this next one works (and the rest of the wildcards) is we look into
  274. // the token matrix to see if the wildcard is part of the current path... If it
  275. // is then we compare the incoming character to that wildcard evaluation function
  276. // and if it is true, then we've got a match.
  277. if(i_isAlpha() && Matrix[xLetter].Character()==WILD_LETTER){
  278. // If we've matched our path
  279. // with any letter then
  280. if(Condition==FALLEN_OFF) { // check: if no matches yet,
  281. Condition = DOING_OK; // upgrade to doing ok.
  282. CurrentPosition = xLetter +
  283. Matrix[xLetter].Vector; // Move myself along this path.
  284. }
  285. else { // Otherwise make a buddy...
  286. // If there's no duplicate buddy like this already, then we'll create one.
  287. // To create a buddy, add an evaluator at the top of the list (behind us) and
  288. // set it's position as if it had been here all along and had matched the current
  289. // character. Next time we evaluate it will be just like all the others.
  290. myEvaluationMatrix->
  291. AddEvaluator(StreamStartPosition,Matrix[xLetter].Vector+xLetter);
  292. }
  293. }
  294. // WILD_DIGIT
  295. // If that didn't do it for us...
  296. // Try to match any digit character.
  297. if(i_isDigit() && Matrix[xDigit].Character()==WILD_DIGIT){
  298. // If we've matched our path
  299. // with any letter then
  300. if(Condition==FALLEN_OFF) { // check: if no matches yet,
  301. Condition = DOING_OK; // upgrade to doing ok.
  302. CurrentPosition = xDigit +
  303. Matrix[xDigit].Vector; // Move myself along this path.
  304. }
  305. else { // Otherwise make a buddy...
  306. // If there's no duplicate buddy like this already, then we'll create one.
  307. // To create a buddy, add an evaluator at the top of the list (behind us) and
  308. // set it's position as if it had been here all along and had matched the current
  309. // character. Next time we evaluate it will be just like all the others.
  310. myEvaluationMatrix->
  311. AddEvaluator(StreamStartPosition,Matrix[xDigit].Vector+xDigit);
  312. }
  313. }
  314. // WILD_NONWHITE
  315. // If that didn't do it for us...
  316. // Try to match any non-whitespace character.
  317. if(!i_isSpace() && Matrix[xNonWhite].Character()==WILD_NONWHITE){
  318. // If we've matched our path
  319. // with any letter then
  320. if(Condition==FALLEN_OFF) { // check: if no matches yet,
  321. Condition = DOING_OK; // upgrade to doing ok.
  322. CurrentPosition = xNonWhite +
  323. Matrix[xNonWhite].Vector; // Move myself along this path.
  324. }
  325. else { // Otherwise make a buddy...
  326. // If there's no duplicate buddy like this already, then we'll create one.
  327. // To create a buddy, add an evaluator at the top of the list (behind us) and
  328. // set it's position as if it had been here all along and had matched the current
  329. // character. Next time we evaluate it will be just like all the others.
  330. myEvaluationMatrix->
  331. AddEvaluator(StreamStartPosition,Matrix[xNonWhite].Vector+xNonWhite);
  332. }
  333. }
  334. // WILD_WHITESPACE
  335. // If that didn't do it for us...
  336. // Try to match any whitespace character.
  337. if(i_isSpace() && Matrix[xWhiteSpace].Character()==WILD_WHITESPACE){
  338. // If we've matched our path
  339. // with any whitespace then
  340. if(Condition==FALLEN_OFF) { // check: if no matches yet,
  341. Condition = DOING_OK; // upgrade to doing ok.
  342. CurrentPosition = xWhiteSpace +
  343. Matrix[xWhiteSpace].Vector; // Move myself along this path.
  344. }
  345. else { // Otherwise make a buddy...
  346. // If there's no duplicate buddy like this already, then we'll create one.
  347. // To create a buddy, add an evaluator at the top of the list (behind us) and
  348. // set it's position as if it had been here all along and had matched the current
  349. // character. Next time we evaluate it will be just like all the others.
  350. myEvaluationMatrix->
  351. AddEvaluator(StreamStartPosition,Matrix[xWhiteSpace].Vector+xWhiteSpace);
  352. }
  353. }
  354. // WILD_INLINE
  355. // If that didn't do it for us...
  356. // Try to match any character EXCEPT a new line.
  357. if(i != '\n' && Matrix[xAnyInline].Character()==WILD_INLINE){
  358. // If we've matched our path
  359. // with any byte but \n then
  360. if(Condition==FALLEN_OFF) { // check: if no matches yet,
  361. Condition = DOING_OK; // upgrade to doing ok.
  362. CurrentPosition = xAnyInline +
  363. Matrix[xAnyInline].Vector; // Move myself along this path.
  364. }
  365. else { // Otherwise make a buddy...
  366. // If there's no duplicate buddy like this already, then we'll create one.
  367. // To create a buddy, add an evaluator at the top of the list (behind us) and
  368. // set it's position as if it had been here all along and had matched the current
  369. // character. Next time we evaluate it will be just like all the others.
  370. myEvaluationMatrix->
  371. AddEvaluator(StreamStartPosition,Matrix[xAnyInline].Vector+xAnyInline);
  372. }
  373. }
  374. // WILD_ANYTHING
  375. // If that didn't do it for us...
  376. // Try to match any character.
  377. if(Matrix[xAnything].Character()==WILD_ANYTHING){
  378. // If we've matched our path
  379. // with any letter then
  380. if(Condition==FALLEN_OFF) { // check: if no matches yet,
  381. Condition = DOING_OK; // upgrade to doing ok.
  382. CurrentPosition = xAnything +
  383. Matrix[xAnything].Vector; // Move myself along this path.
  384. }
  385. else { // Otherwise make a buddy...
  386. // If there's no duplicate buddy like this already, then we'll create one.
  387. // To create a buddy, add an evaluator at the top of the list (behind us) and
  388. // set it's position as if it had been here all along and had matched the current
  389. // character. Next time we evaluate it will be just like all the others.
  390. myEvaluationMatrix->
  391. AddEvaluator(StreamStartPosition,Matrix[xAnything].Vector+xAnything);
  392. }
  393. }
  394. // 20021112 _M
  395. // Beginning with version 2 of Message Sniffer we've implemented a new construct
  396. // for run-loops that prevents any interference between rules where run-loops might
  397. // appear in locations coinciding with standard match bytes. The new methodology
  398. // uses a special run-loop-gateway character to isolate any run loops from standard
  399. // nodes in the matrix. Whenever a run-loop gateway is present at a node a buddy is
  400. // inserted AFTER the current evaluator so that it will evaluate the current character
  401. // from the position of the run-loop gateway. This allows run loops to occupy the same
  402. // positional space as standard matches while maintaining isolation between their paths
  403. // in the matrix.
  404. // We don't want to launch any run loop buddies unless we matched this far. If we did
  405. // match up to this point and the next character in a pattern includes a run loop then
  406. // we will find a gateway byte at this point representing the path to any run loops.
  407. // If we made it this far launch a buddy for any run-loop gateway that's present.
  408. // Of course, the buddy must be evaluated after this evaluator during this pass because
  409. // he will have shown up late... That is, we don't detect a run gateway until we're
  410. // sitting on a new node looking for a result... The very result we may be looking for
  411. // could be behind the gateway - so we launch the buddy behind us and he will be able
  412. // to match anything in this pass that we missed when looking for a non-run match.
  413. if(Matrix[xRunGateway].Character() == RUN_GATEWAY)
  414. myEvaluationMatrix->
  415. InsEvaluator(StreamStartPosition,Matrix[xRunGateway].Vector+xRunGateway);
  416. // At this point, we've tried all of our rules, and created any buddies we needed.
  417. // If we got a match, we terminated long ago. If we didn't, then we either stayed
  418. // on the path or we fell off. Either way, the flag is in Condition so we can send
  419. // it on.
  420. return Condition;
  421. }
  422. ///////////////////////////////////////////////////////////////////////////////////////////
  423. // EvaluationMatrix Implementations ///////////////////////////////////////////////////////
  424. // EvaluationMatrix::AddMatchRecord(int sp, int ep, int sym)
  425. // Most of this functionality is about deep scans - which have been put on hold for now
  426. // due to the complexity and the scope of the current application. For now, although
  427. // we will use this reporting mechanism, it will generally record only one event.
  428. MatchRecord* EvaluationMatrix::AddMatchRecord(int sp, int ep, int sym) {
  429. // 20030216 _M Added range check code to watch for corruption. Some systems have
  430. // reported matches with zero length indicating an undetected corruption. This
  431. // range check will detect and report it.
  432. if(sp==ep) // Check that we're in range - no zero
  433. throw OutOfRange("sp==ep"); // length pattern matches allowed!
  434. MatchRecord* NewMatchRecord = // Then, create the new result object
  435. new MatchRecord(sp,ep,sym); // by passing it the important parts.
  436. if(NewMatchRecord==NULL) // Check for a bad allocation and throw
  437. throw BadAllocation("NewMatchRecord==NULL"); // an exception if that happens.
  438. if(ResultList == NULL) { // If this is our first result we simply
  439. ResultList = NewMatchRecord; // add the result to our list, and of course
  440. LastResultInList = NewMatchRecord; // it is the end of the list as well.
  441. } else { // If we already have some results, then
  442. LastResultInList->NextMatchRecord = // we add the new record to the result list
  443. NewMatchRecord; // and record that the new record is now the
  444. LastResultInList = NewMatchRecord; // last result in the list.
  445. }
  446. return NewMatchRecord; // Return our new match record.
  447. }
  448. // EvaluationMatrix::AddEvaluator()
  449. // 20021112 _M
  450. // This function has be modified to include a check for duplicates as well as setting
  451. // the mount point for the new evaluator. This eliminates a good deal of code elsewhere
  452. // and encapsulates the complete operation. If a duplicate evaluator is found then the
  453. // function returns NULL indicating that nothing was done. In practic, no check is made
  454. // since any serious error conditions cause errors to be thrown from within this function
  455. // call. These notes apply to some extent to InsEvaluator which is copied from this function
  456. // and which has the only difference of putting the new evaluator after the current one
  457. // in the chain in order to support branch-out operations for loop sequences in the matrix.
  458. Evaluator* EvaluationMatrix::AddEvaluator(int s, unsigned int m) { // Adds a new evaluator at top.
  459. if(!isNoDuplicate(m)) return NULL; // If there is a duplicate do nothing.
  460. if(CountOfEvaluators >= MAX_EVALS) // If we've exceeded our population size
  461. throw MaxEvalsExceeded("Add:CountOfEvaluators >= MAX_EVALS"); // then throw an exception.
  462. Evaluator* NewEvaluator = SourceEvaluator(s,this); // Make up a new evaluator.
  463. if(NewEvaluator == NULL) // Check for a bad allocation and throw
  464. throw BadAllocation("Add:NewEvaluator == NULL"); // an exception if it happens.
  465. NewEvaluator->NextEvaluator = EvaluatorList; // Point the new evaluator to the list.
  466. EvaluatorList = NewEvaluator; // Then point the list head to
  467. // the new evaluator.
  468. NewEvaluator->CurrentPosition = m; // Esablish the mount point.
  469. ++CountOfEvaluators; // Add one to our evaluator count.
  470. if(CountOfEvaluators > MaximumCountOfEvaluators) // If the count is the biggest we
  471. MaximumCountOfEvaluators = CountOfEvaluators; // have seen then keep track of it.
  472. return NewEvaluator; // Return the new evaluator.
  473. }
  474. // EvaluationMatrix::InsEvaluator()
  475. Evaluator* EvaluationMatrix::InsEvaluator(int s, unsigned int m) { // Inserts a new evaluator.
  476. if(!isNoDuplicate(m)) return NULL; // If there is a duplicate do nothing.
  477. if(CountOfEvaluators >= MAX_EVALS) // If we've exceeded our population size
  478. throw MaxEvalsExceeded("Ins:CountOfEvaluators >= MAX_EVALS"); // then throw an exception.
  479. Evaluator* NewEvaluator = SourceEvaluator(s,this); // Make up a new evaluator.
  480. if(NewEvaluator == NULL) // Check for a bad allocation and throw
  481. throw BadAllocation("Ins:NewEvaluator == NULL"); // an exception if it happens.
  482. NewEvaluator->NextEvaluator = // Point the new evaluator where the
  483. CurrentEvaluator->NextEvaluator; // current evalautor points... then point
  484. CurrentEvaluator->NextEvaluator = // the current evaluator to this one. This
  485. NewEvaluator; // accomplishes the insert operation.
  486. NewEvaluator->CurrentPosition = m; // Esablish the mount point.
  487. ++CountOfEvaluators; // Add one to our evaluator count.
  488. if(CountOfEvaluators > MaximumCountOfEvaluators) // If the count is the biggest we
  489. MaximumCountOfEvaluators = CountOfEvaluators; // have seen then keep track of it.
  490. return NewEvaluator; // Return the new evaluator.
  491. }
  492. // EvaluationMatrix::DropEvaluator()
  493. void EvaluationMatrix::DropEvaluator() { // Drops the current evaluator from the matrix.
  494. Evaluator* WhereTo = CurrentEvaluator->NextEvaluator; // Where do we go from here?
  495. // First step is to heal the list as if the current evaluator were not present.
  496. // If there is no previous evaluator - meaning this should be the first one in the
  497. // list - then we point the list head to the next evaluator on the list (WhereTo)
  498. if(PreviousEvaluator != NULL) // If we have a Previous then
  499. PreviousEvaluator->NextEvaluator = WhereTo; // our next is it's next.
  500. else // If we don't then our next
  501. EvaluatorList = WhereTo; // is the first in the list.
  502. // Now that our list is properly healed, it's time to drop the dead evaluator and
  503. // get on with our lives...
  504. CurrentEvaluator->NextEvaluator = NULL; // Disconnect from any list.
  505. CacheEvaluator(CurrentEvaluator); // Drop the current eval.
  506. CurrentEvaluator = WhereTo; // Move on.
  507. --CountOfEvaluators; // Reduce our evaluator count.
  508. }
  509. // EvaluationMatrix::EvaluateThis()
  510. //
  511. // This function returns the number of matches that were found. It is possible for more
  512. // than one evaluator to match on a single character.
  513. //
  514. // 0 indicates no matches were found.
  515. // >0 indicates some matches were found.
  516. // If there is a problem then an exception will be thrown.
  517. int EvaluationMatrix::EvaluateThis(unsigned short int i) {
  518. AddEvaluator(CountOfCharacters,0); // First, add a new Evaluator at the root of the
  519. // matrix for the current position in the scan
  520. // stream.
  521. // The new evaluator is now at the top of our list.
  522. // If there was a problem then an exception will have been thrown.
  523. // If our allocation worked ok, then we'll be here and ready to start scanning
  524. // the rule set with our current character.
  525. PassResult = 0; // Start by assuming we won't match.
  526. CurrentEvaluator = EvaluatorList; // Start at the top of the list.
  527. PreviousEvaluator = NULL; // NULL means previous is the top.
  528. // 20030216 _M
  529. // Next do some basic conversions and evaluations so they don't need to be done
  530. // again within the evaluators. From now on the evaluators will look here for basic
  531. // conversions and boolean check values rather than performing the checks themselves.
  532. i_lower = tolower(i); // Convert i to lower case.
  533. i_isDigit = isdigit(i); // Check for a digit.
  534. i_isSpace = isspace(i); // Check for whitespace.
  535. i_isAlpha = isalpha(i); // Check for letters.
  536. // Next, loop through the list and pass the incoming character to
  537. // each evaluator. Drop those that fall off, and record those that terminate. The
  538. // rest of them stick around to walk their paths until they meet their fate.
  539. while(CurrentEvaluator != NULL) { // While there are more evaluators...
  540. // go through the list and evaluate
  541. switch(CurrentEvaluator->EvaluateThis(i)) { // the current character against each.
  542. case Evaluator::FALLEN_OFF: { // If we've fallen off the path
  543. DropEvaluator(); // drop the current evaluator and
  544. break; // move on with our lives.
  545. }
  546. case Evaluator::DOING_OK: { // If we're still going then...
  547. PreviousEvaluator = CurrentEvaluator; // keep track of where we've been and
  548. CurrentEvaluator = // move forward to the next evaluator
  549. CurrentEvaluator->NextEvaluator; // in the list.
  550. break;
  551. }
  552. case Evaluator::TERMINATED: { // If we've terminated a path...
  553. ++PassResult; // Record our PassResult.
  554. // Create a new match result using the data in the current evaluator.
  555. // If there is a problem adding the match an exception will be thrown.
  556. AddMatchRecord(
  557. CurrentEvaluator->StreamStartPosition,
  558. CountOfCharacters - 1,
  559. myTokenMatrix->Symbol(CurrentEvaluator->CurrentPosition)
  560. );
  561. // From Version 2 onward we're always doing deep scans...
  562. // Having successfully recorded the result of this critter we can kill them off.
  563. DropEvaluator(); // He's dead.
  564. break; // Now let's keep looking.
  565. }
  566. case Evaluator::OUT_OF_RANGE: { // This result is really bad and
  567. throw OutOfRange("case Evaluator::OUT_OF_RANGE:"); // probably means we have a bad matrix.
  568. break;
  569. // The reason we don't throw OutOfRange from within the evaluator is that we
  570. // may want to take some other action in the future... So, we allow the evaluator
  571. // to tell us we sent it out of range and then we decide what to do about it.
  572. }
  573. }
  574. }
  575. // At the end of this function our PassResult is either an error (which is
  576. // reported immediately), or it is a match condition. We start out by assuming
  577. // there will be no match. If we find one, then we reset that result... so at
  578. // this point, all we need do is report our findings.
  579. ++CountOfCharacters; // Add one to our Character Count statistic.
  580. // Note that from this point on, the index in the stream is one less than the
  581. // CountOfCharacters... for example, if I've evaluated (am evaluating) one character
  582. // the it's index is 0. This will be important when we create any match records.
  583. return PassResult; // When we're finished, return the last known result.
  584. }