Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

snf_engine.cpp 41KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807
  1. // snf_engine.cpp
  2. //
  3. // (C) 1985-2004 MicroNeil Research Corporation
  4. // (C) 2005-2009 ARM Research Labs, LLC
  5. // See www.armresearch.com for the copyright terms.
  6. //
  7. // Derived from original work on cellular automation for complex pattern
  8. // reflex engine 1985 Pete McNeil (Madscientist)
  9. //
  10. // Derived from rapid scripting engine (token matrix) implementation 1987
  11. //
  12. // 20040419 _M Adding Verify() method. Beginning with version 2-3 of Message Sniffer
  13. // we are embedding a Mangler digest of the rulebase file. The Verify() method reconstructs
  14. // the digest and compares it. This ensures that no part of the rulebase file can be
  15. // corrupted without the snf2check utility detecting the problem. Prior to this version
  16. // it was possible to have undetected corruption in the middle of the rulebase file. The
  17. // Mangler digest will prevent that.
  18. // 20030130 _M Added testing section in TokenMatrix to throw an exeption if the file
  19. // is too small to be a valid matrix. The value is calculated based on the idea that a
  20. // valid matrix will have been encrypted in two segments so the file must be at least
  21. // as large as these two segments. This is intended to solve the zero-length-rulebase
  22. // bug where an access violation would occur if the file was of zero length.
  23. // 20021030 _M Creation of snf_engine module by dragging the sniffer pattern matching engine out
  24. // of the sniffer.cpp file.
  25. #include <unistd.h>
  26. #include <cstdio>
  27. #include <cctype>
  28. #include <ctime>
  29. #include <cstdlib>
  30. #include <fstream>
  31. #include <iostream>
  32. #include <string>
  33. #include <vector>
  34. #include "../CodeDweller/mangler.hpp"
  35. #include "snf_engine.hpp"
  36. namespace cd = codedweller;
  37. ///////////////////////////////////////////////////////////////////////////////////////////
  38. // BEGIN IMPLEMENTATIONS //////////////////////////////////////////////////////////////////
  39. ///////////////////////////////////////////////////////////////////////////////////////////
  40. ///////////////////////////////////////////////////////////////////////////////////////////
  41. // Token Matrix Implementations ///////////////////////////////////////////////////////////
  42. // TokenMatrix::Load(filename)
  43. void TokenMatrix::Load(std::string& FileName) { // Initialize using a string for file name.
  44. Load(FileName.c_str()); // Convert the string to a null terminated
  45. } // char* and call the function below.
  46. void TokenMatrix::Load(const char* FileName) { // Initializes the token matrix by file name.
  47. std::ifstream MatrixFile(FileName,std::ios::binary); // Open the file.
  48. if(MatrixFile.bad()) // If anything is wrong with the file
  49. throw BadFile("TokenMatrix::Load() finds MatrixFile.bad()"); // then throw a bad file exception.
  50. Load(MatrixFile); // Load the matrix from the file.
  51. MatrixFile.close(); // Be nice and clean up our file.
  52. }
  53. // TokenMatrix::Load(stream)
  54. const cd::AbortCheck CompatibleIntSizeCheck("TokenMatrix::Load():CompatibleIntSizeCheck(sizeof(unsigned int)==4)");
  55. void TokenMatrix::Load(std::ifstream& F) { // Initializes the token matrix from a file.
  56. CompatibleIntSizeCheck(sizeof(unsigned int)==4); // Check our assumptions.
  57. MatrixSize = 0; // Clear out the old Matrix Size and array.
  58. if(Matrix) delete Matrix; // that is, if there is an array.
  59. F.seekg(0,std::ios::end); // Find the end of the file.
  60. MatrixSize = F.tellg() / sizeof(Token); // Calculate how many tokens.
  61. F.seekg(0); // Go back to the beginning.
  62. if(MatrixSize < MinimumValidMatrix) // If the matrix file is too small then
  63. throw BadMatrix("TokenMatrix::Load() (MatrixSize < MinimumValidMatrix)"); // we must reject it.
  64. Matrix = new Token[MatrixSize]; // Allocate an array of tokens.
  65. if(Matrix == NULL) // Check for an allocation error.
  66. throw BadAllocation("TokenMatrix::Load() Matrix == NULL)"); // and throw an exception if it happens.
  67. F.read( // Now read the file into the allocated
  68. reinterpret_cast<char*>(Matrix), // matrix by recasting it as a character
  69. (MatrixSize * sizeof(Token))); // buffer of the correct size.
  70. if(F.bad()) // If there were any problems reading the
  71. throw BadMatrix("TokenMatrix::Load() (F.bad())"); // matrix then report the bad matrix.
  72. }
  73. // TokenMatrix::Validate(key)
  74. void TokenMatrix::Validate(std::string& SecurityKey) { // Decrypts and validates the matrix.
  75. cd::Mangler ValidationChecker; // Create a mangler engine for validation.
  76. // In order to do the validation we must look at the token matrix as a sequence of bytes.
  77. // We will be decrypting the first and last SecurtySegmentSize of this sequence and then
  78. // detecting wether the appropriate security key has been properly encrypted in the end.
  79. // If we find everything as it should be then we can be sure that the two segments have
  80. // not been tampered with and that we have the correct security key.
  81. unsigned char* TokensAsBytes = reinterpret_cast<unsigned char*>(Matrix);
  82. int BytesInTokenMatrix = (MatrixSize * sizeof(Token));
  83. // Now that we have all of that stuff let's initialize our ValidationChecker.
  84. // Note that the length of our security key is always 24 bytes. The license
  85. // id is 8 bytes, the authentication code is 16 bytes. We don't bother to check
  86. // here because if it's wrong then nothing will decrypt and we'll have essentially
  87. // the same result. Note also that on the end of the rule file we pad this
  88. // encrypted security id with nulls so that we can create a string from it easily
  89. // and so that we have precisely 32 bytes which is the same size as 4 tokens.
  90. //
  91. // Note: The 32 byte value is in SecurityKeyBufferSize. This means that we can
  92. // accept security keys up to 31 bytes in length. We need the ending null to
  93. // assure our null terminated string is as expected. The security key block must
  94. // match up with the edges of tokens in the matrix so we pad the end with nulls
  95. // when encoding the security key in the encoded file.
  96. int SecurityKeyLength = SecurityKey.length(); // For the length of our key
  97. for(int a=0;a<SecurityKeyLength;a++) // feed each byte through the
  98. ValidationChecker.Encrypt(SecurityKey.at(a)); // mangler to evolve the key
  99. // state.
  100. // Now we're ready to decrypt the matrix... We start with the first segment.
  101. for(int a=0;a<SecuritySegmentSize;a++) // For the length of the segment
  102. TokensAsBytes[a] = // replace each byte with the
  103. ValidationChecker.Decrypt(TokensAsBytes[a]); // decrypted byte.
  104. // Next we decrypt the last security segment...
  105. for(int a= BytesInTokenMatrix - SecuritySegmentSize; a<BytesInTokenMatrix; a++)
  106. TokensAsBytes[a] =
  107. ValidationChecker.Decrypt(TokensAsBytes[a]);
  108. // Now that we've done this we should find that our SecurityKey is at the end
  109. // of the loaded token matrix... Let's look and find out shall we?!!!
  110. unsigned char* SecurityCheckKey = // Reference the check
  111. & TokensAsBytes[BytesInTokenMatrix-SecurityKeyBufferSize]; // space in the matrix.
  112. SecurityCheckKey[SecurityKeyBufferSize-1] = 0; // Add a safety null just in case.
  113. std::string SecurityCheck((char*)SecurityCheckKey); // Make a string.
  114. // By now we should have a SecurityCheck string to compare to our SecurityKey.
  115. // If they match then we know everything worked out and that our token matrix has
  116. // been decrypted properly. This is also a good indication that our token matrix
  117. // is not incomplete since if it were the decryption wouldn't work. Saddly, we
  118. // don't have the computing cycles to decrypt the entire file - so we won't be
  119. // doing that until we can load it in a server/daemon and then reuse it over and
  120. // over... Once that happens we will be able to detect tampering also.
  121. if(SecurityKey != SecurityCheck) // If the security keys don't match
  122. throw BadMatrix("TokenMatrix::Validate() (SecurityKey != SecurityCheck)"); // then we have an invalid matrix.
  123. }
  124. // TokenMatrix::Verify(key)
  125. void TokenMatrix::Verify(std::string& SecurityKey) { // Builds and verifies a file digest.
  126. cd::Mangler DigestChecker; // Create a mangler for the digest.
  127. // Gain access to our token matrix as bytes.
  128. unsigned char* TokensAsBytes = reinterpret_cast<unsigned char*>(Matrix);
  129. int BytesInTokenMatrix = (MatrixSize * sizeof(Token));
  130. // Initialize our digest engine with the security key.
  131. int SecurityKeyLength = SecurityKey.length(); // For the length of our key
  132. for(int a=0;a<SecurityKeyLength;a++) // feed each byte through the
  133. DigestChecker.Encrypt(SecurityKey.at(a)); // mangler to evolve the key
  134. // state.
  135. // Build the digest.
  136. int IndexOfDigest = // Find the index of the digest by
  137. BytesInTokenMatrix - // starting at the end of the matrix,
  138. SecurityKeyBufferSize - // backing up past the security key,
  139. RulebaseDigestSize; // then past the digest.
  140. int a=0; // Keep track of where we are.
  141. for(;a<IndexOfDigest;a++) // Loop through up to the digest and
  142. DigestChecker.Encrypt(TokensAsBytes[a]); // pump the file through the mangler.
  143. // Now that the digest is built we must test it.
  144. // The original was emitted by encrypting 0s so if we do the same thing we will match.
  145. for(int b=0;b<RulebaseDigestSize;b++) // Loop through the digest and compare
  146. if(DigestChecker.Encrypt(0)!=TokensAsBytes[a+b]) // our digest to the stored digest. If
  147. throw BadMatrix("TokenMatrix::Verify() Bad Digest"); // any byte doesn't match it's bad!
  148. // If we made it through all of that then we're valid :-)
  149. }
  150. void TokenMatrix::FlipEndian() { // Converts big/little endian tokens.
  151. unsigned int* UInts = reinterpret_cast<unsigned int*>(Matrix); // Grab the matrix as uints.
  152. int Length = ((MatrixSize * sizeof(Token)) / sizeof(unsigned int)); // Calculate it's size.
  153. for(int i = 0; i < Length; i++) { // Loop through the array of u ints
  154. unsigned int x = UInts[i]; // and re-order the bytes in each
  155. x = ((x & 0xff000000) >> 24) | // one to swap from big/little endian
  156. ((x & 0x00ff0000) >> 8) | // to little/big endian.
  157. ((x & 0x0000ff00) << 8) |
  158. ((x & 0x000000ff) << 24);
  159. UInts[i] = x; // Put the flipped int back.
  160. }
  161. }
  162. // Evaluator Implementations //////////////////////////////////////////////////////////////
  163. // 20030216 _M Optimization conversions
  164. // 20140119 _M Deprecated by jump table in evaluator
  165. // inline int Evaluator::i_lower() { return myEvaluationMatrix->i_lower; }
  166. // inline bool Evaluator::i_isDigit() { return myEvaluationMatrix->i_isDigit; }
  167. // inline bool Evaluator::i_isSpace() { return myEvaluationMatrix->i_isSpace; }
  168. // inline bool Evaluator::i_isAlpha() { return myEvaluationMatrix->i_isAlpha; }
  169. // Evaluator::Evaluator(position,evalmatrix) Constructor
  170. Evaluator::Evaluator(unsigned int s, EvaluationMatrix* m)
  171. : myEvaluationMatrix(m),
  172. JumpPoint(0),
  173. Condition(DOING_OK),
  174. NextEvaluator(NULL),
  175. StreamStartPosition(s),
  176. CurrentPosition(0),
  177. WildRunLength(0) { // Constructor...
  178. Matrix = myEvaluationMatrix->getTokens(); // Capture the token matrix I walk in.
  179. MatrixSize = myEvaluationMatrix->getMatrixSize(); // And get it's size.
  180. PositionLimit = MatrixSize - 256;
  181. }
  182. // Of course I may need to resolve some of the following
  183. // wildcard characters.
  184. int Evaluator::xLetter() { return (JumpPoint + WILD_LETTER); } // Match Any letter.
  185. int Evaluator::xDigit() { return (JumpPoint + WILD_DIGIT); } // Match Any digit.
  186. int Evaluator::xNonWhite() { return (JumpPoint + WILD_NONWHITE); } // Match Any non-whitespace.
  187. int Evaluator::xWhiteSpace() { return (JumpPoint + WILD_WHITESPACE); } // Match Any whitespace.
  188. int Evaluator::xAnyInline() { return (JumpPoint + WILD_INLINE); } // Match Any byte but new line.
  189. int Evaluator::xAnything() { return (JumpPoint + WILD_ANYTHING); } // Match Any character at all.
  190. int Evaluator::xRunGateway() { return (JumpPoint + RUN_GATEWAY); } // Match the run-loop gateway.
  191. // void Evaluator::doFollowOrMakeBuddy()
  192. void Evaluator::doFollowOrMakeBuddy(int xKey) {
  193. bool shouldFollow = (FALLEN_OFF == Condition); // What should we do?
  194. if(shouldFollow) { // This is how we follow
  195. Condition = DOING_OK;
  196. CurrentPosition = xKey +
  197. Matrix[xKey].Vector;
  198. }
  199. else { // This is how we make a buddy
  200. myEvaluationMatrix->
  201. AddEvaluator(StreamStartPosition,Matrix[xKey].Vector+xKey);
  202. }
  203. }
  204. void Evaluator::tryFollowingPrecisePath(unsigned short int i) {
  205. int xPrecise = JumpPoint + i; // Match Precise Character
  206. if(Matrix[xPrecise].Character() == i) { // If we've matched our path
  207. doFollowOrMakeBuddy(xPrecise);
  208. }
  209. if(DOING_OK == Condition) WildRunLength = 0;
  210. }
  211. void Evaluator::tryFollowingNoCasePath(unsigned short int i) {
  212. i = tolower(i);
  213. int xNoCase = JumpPoint + i; // Match caps to lower (case insensitive)
  214. if(Matrix[xNoCase].Character()==i){
  215. doFollowOrMakeBuddy(xNoCase);
  216. }
  217. if(DOING_OK == Condition) WildRunLength = 0;
  218. }
  219. void Evaluator::tryFollowingWildAlphaPath() {
  220. if(Matrix[xLetter()].Character()==WILD_LETTER){
  221. doFollowOrMakeBuddy(xLetter());
  222. }
  223. }
  224. void Evaluator::tryFollowingWildDigitPath() {
  225. if(Matrix[xDigit()].Character()==WILD_DIGIT){
  226. doFollowOrMakeBuddy(xDigit());
  227. }
  228. }
  229. void Evaluator::tryFollowingWildNonWhitePath() {
  230. if(Matrix[xNonWhite()].Character()==WILD_NONWHITE){
  231. doFollowOrMakeBuddy(xNonWhite());
  232. }
  233. }
  234. void Evaluator::tryFollowingWildWhitePath() {
  235. if(Matrix[xWhiteSpace()].Character()==WILD_WHITESPACE){
  236. doFollowOrMakeBuddy(xWhiteSpace());
  237. }
  238. }
  239. void Evaluator::tryFollowingWildInlinePath() {
  240. if(Matrix[xAnyInline()].Character()==WILD_INLINE){
  241. doFollowOrMakeBuddy(xAnyInline());
  242. }
  243. }
  244. void Evaluator::tryFollowingWildAnythingPath() {
  245. if(Matrix[xAnything()].Character()==WILD_ANYTHING){
  246. doFollowOrMakeBuddy(xAnything());
  247. }
  248. }
  249. void Evaluator::doFollowerJumpTable(unsigned short int i) {
  250. // tryFollowingPrecisePath(i);
  251. // tryFollowingUppercasePath(); 0x41 - 0x5A
  252. // tryFollowingWildAlphaPath(); 0x61 - 0x7A
  253. // tryFollowingWildDigitPath(); 0x30 - 0x39
  254. // tryFollowingWildWhitePath(); 0x09 - 0x0D, 0x20
  255. // tryFollowingWildNonWhitePath(); > 0x20
  256. // tryFollowingWildInlinePath(); Not 0x0A, or 0x0D
  257. switch(i) {
  258. // These nnly match WildAnything because they conflict with special check values...
  259. // NUL, SOH, STX, ETX, EOT, ENQ, ACK, BEL, BS, TAB, LF, VT, FF, CR, SO, SI
  260. case 0x00: case 0x01: case 0x02: case 0x03: case 0x04: case 0x05: case 0x06: case 0x07:
  261. case 0x08: {
  262. break;
  263. }
  264. // tab
  265. case 0x09: {
  266. tryFollowingPrecisePath(i);
  267. tryFollowingWildWhitePath();
  268. tryFollowingWildInlinePath();
  269. break;
  270. }
  271. // LF, VT, FF, CR, SO, SI
  272. case 0x0A: case 0x0B: case 0x0C: case 0x0D: case 0x0E: case 0x0F:
  273. // DLE, DC1, DC2, DC3, DC4, NAK, SYN, ETB, CAN, EM, SUB, ESC, FS, GS, RS, US
  274. case 0x10: case 0x11: case 0x12: case 0x13: case 0x14: case 0x15: case 0x16: case 0x17:
  275. case 0x18: case 0x19: case 0x1A: case 0x1B: case 0x1C: case 0x1D: case 0x1E: case 0x1F: {
  276. tryFollowingPrecisePath(i);
  277. tryFollowingWildWhitePath();
  278. break;
  279. }
  280. // the final fronteer
  281. case 0x20: {
  282. tryFollowingPrecisePath(i);
  283. tryFollowingWildWhitePath();
  284. tryFollowingWildInlinePath();
  285. break;
  286. }
  287. // ! " # $ % & ' ( ) * + , - . /
  288. case 0x21: case 0x22: case 0x23: case 0x24: case 0x25: case 0x26: case 0x27:
  289. case 0x28: case 0x29: case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E: case 0x2F: {
  290. tryFollowingPrecisePath(i);
  291. tryFollowingWildNonWhitePath();
  292. tryFollowingWildInlinePath();
  293. break;
  294. }
  295. // 0 - 9
  296. case 0x30: case 0x31: case 0x32: case 0x33: case 0x34: case 0x35: case 0x36: case 0x37:
  297. case 0x38: case 0x39: {
  298. tryFollowingPrecisePath(i);
  299. tryFollowingWildDigitPath();
  300. tryFollowingWildNonWhitePath();
  301. tryFollowingWildInlinePath();
  302. break;
  303. }
  304. // : ; < = > ? @
  305. case 0x3A: case 0x3B: case 0x3C: case 0x3D: case 0x3E: case 0x3F:
  306. case 0x40: {
  307. tryFollowingPrecisePath(i);
  308. tryFollowingWildNonWhitePath();
  309. tryFollowingWildInlinePath();
  310. break;
  311. }
  312. // A - Z
  313. case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
  314. case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C: case 0x4D: case 0x4E: case 0x4F:
  315. case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57:
  316. case 0x58: case 0x59: case 0x5A: {
  317. tryFollowingPrecisePath(i);
  318. tryFollowingNoCasePath(i);
  319. tryFollowingWildAlphaPath();
  320. tryFollowingWildNonWhitePath();
  321. tryFollowingWildInlinePath();
  322. break;
  323. }
  324. // [ \ ] ^ _ `
  325. case 0x5B: case 0x5C: case 0x5D: case 0x5E: case 0x5F:
  326. case 0x60: {
  327. tryFollowingPrecisePath(i);
  328. tryFollowingWildNonWhitePath();
  329. tryFollowingWildInlinePath();
  330. break;
  331. }
  332. // a - z
  333. case 0x61: case 0x62: case 0x63: case 0x64: case 0x65: case 0x66: case 0x67:
  334. case 0x68: case 0x69: case 0x6A: case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
  335. case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
  336. case 0x78: case 0x79: case 0x7A: {
  337. tryFollowingPrecisePath(i);
  338. tryFollowingWildAlphaPath();
  339. tryFollowingWildNonWhitePath();
  340. tryFollowingWildInlinePath();
  341. break;
  342. }
  343. // { | } ~
  344. case 0x7B: case 0x7C: case 0x7D: case 0x7E: case 0x7F: {
  345. tryFollowingPrecisePath(i);
  346. tryFollowingWildNonWhitePath();
  347. tryFollowingWildInlinePath();
  348. }
  349. // high ascii
  350. case 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: case 0x87:
  351. case 0x88: case 0x89: case 0x8A: case 0x8B: case 0x8C: case 0x8D: case 0x8E: case 0x8F:
  352. case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: case 0x95: case 0x96: case 0x97:
  353. case 0x98: case 0x99: case 0x9A: case 0x9B: case 0x9C: case 0x9D: case 0x9E: case 0x9F:
  354. case 0xA0: case 0xA1: case 0xA2: case 0xA3: case 0xA4: case 0xA5: case 0xA6: case 0xA7:
  355. case 0xA8: case 0xA9: case 0xAA: case 0xAB: case 0xAC: case 0xAD: case 0xAE: case 0xAF:
  356. case 0xB0: case 0xB1: case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6: case 0xB7:
  357. case 0xB8: case 0xB9: case 0xBA: case 0xBB: case 0xBC: case 0xBD: case 0xBE: case 0xBF:
  358. case 0xC0: case 0xC1: case 0xC2: case 0xC3: case 0xC4: case 0xC5: case 0xC6: case 0xC7:
  359. case 0xC8: case 0xC9: case 0xCA: case 0xCB: case 0xCC: case 0xCD: case 0xCE: case 0xCF:
  360. case 0xD0: case 0xD1: case 0xD2: case 0xD3: case 0xD4: case 0xD5: case 0xD6: case 0xD7:
  361. case 0xD8: case 0xD9: case 0xDA: case 0xDB: case 0xDC: case 0xDD: case 0xDE: case 0xDF:
  362. case 0xE0: case 0xE1: case 0xE2: case 0xE3: case 0xE4: case 0xE5: case 0xE6: case 0xE7:
  363. case 0xE8: case 0xE9: case 0xEA: case 0xEB: case 0xEC: case 0xED: case 0xEE: case 0xEF:
  364. case 0xF0: case 0xF1: case 0xF2: case 0xF3: case 0xF4: case 0xF5: case 0xF6: case 0xF7:
  365. case 0xF8: case 0xF9: case 0xFA: case 0xFB: case 0xFC: case 0xFD: case 0xFE: case 0xFF: {
  366. tryFollowingPrecisePath(i);
  367. tryFollowingWildNonWhitePath();
  368. tryFollowingWildInlinePath();
  369. break;
  370. }
  371. }
  372. tryFollowingWildAnythingPath();
  373. }
  374. // Evaluator::EvaluateThis()
  375. Evaluator::States Evaluator::EvaluateThis(unsigned short int i) { // Follow the this byte.
  376. // First upgrade will be to DOING_OK, after that we launch buddies.
  377. Condition = FALLEN_OFF; // Start off guessing we'll fall off.
  378. // In order to handle wildcard characters, this evaluation function must actually
  379. // compare the character to a number of possibilities in most-specific to least-
  380. // specific order to see if any match. In order to support overlapping rule sets,
  381. // if more than one wildcard matches at this node, an additional evaluator will be
  382. // placed in line already _AT THIS PATH POINT_ so that both possibilities will be
  383. // explored. New evaluators are always added at the TOP of the list so we are always
  384. // guaranteed not to overdrive an evaluator and end up in a recursive race condition.
  385. // 20140121_M The previous optimization with binary flags has been replaced with
  386. // a jump table implementation. Now, each byte only excites behaviors that are
  387. // possible for the current byte so only those paths will be tested.
  388. if(CurrentPosition >= PositionLimit) return Condition = OUT_OF_RANGE;
  389. // All of the positions calculated below are guaranteed to be within the ranges checked
  390. // above so we're safe if we get to this point.
  391. // So, at this point it's safe to check and see if I'm terminated. Note that if I
  392. // am at a termination point, my path has terminated and I have a symbol so I don't
  393. // need to resolve any more characters - even the current one.
  394. if(Matrix[CurrentPosition].isTermination()) return Condition = TERMINATED;
  395. // NOTE: The above is written for sudden-death termination. Eventually we will want
  396. // to support deep - filters which will show every rule match and this will need to
  397. // be rewritten.
  398. // Evaluation order, most-to-least specific with what is possible for that byte.
  399. JumpPoint = CurrentPosition;
  400. doFollowerJumpTable(i); // Excite followers based on this byte.
  401. { // Precise matches reset the wild run counter.
  402. ++WildRunLength; // Count up the run length.
  403. if(WildRunLength >= MaxWildRunLength) // If we exceed the max then
  404. return Condition = FALLEN_OFF; // we've fallen off the path
  405. } // and we do it immediately.
  406. // 20021112 _M
  407. // Beginning with version 2 of Message Sniffer we've implemented a new construct
  408. // for run-loops that prevents any interference between rules where run-loops might
  409. // appear in locations coinciding with standard match bytes. The new methodology
  410. // uses a special run-loop-gateway character to isolate any run loops from standard
  411. // nodes in the matrix. Whenever a run-loop gateway is present at a node a buddy is
  412. // inserted AFTER the current evaluator so that it will evaluate the current character
  413. // from the position of the run-loop gateway. This allows run loops to occupy the same
  414. // positional space as standard matches while maintaining isolation between their paths
  415. // in the matrix.
  416. // We don't want to launch any run loop buddies unless we matched this far. If we did
  417. // match up to this point and the next character in a pattern includes a run loop then
  418. // we will find a gateway byte at this point representing the path to any run loops.
  419. // If we made it this far launch a buddy for any run-loop gateway that's present.
  420. // Of course, the buddy must be evaluated after this evaluator during this pass because
  421. // he will have shown up late... That is, we don't detect a run gateway until we're
  422. // sitting on a new node looking for a result... The very result we may be looking for
  423. // could be behind the gateway - so we launch the buddy behind us and he will be able
  424. // to match anything in this pass that we missed when looking for a non-run match.
  425. if(Matrix[xRunGateway()].Character() == RUN_GATEWAY)
  426. myEvaluationMatrix->
  427. InsEvaluator(StreamStartPosition,Matrix[xRunGateway()].Vector+xRunGateway());
  428. // At this point, we've tried all of our rules, and created any buddies we needed.
  429. // If we got a match, we terminated long ago. If we didn't, then we either stayed
  430. // on the path or we fell off. Either way, the flag is in Condition so we can send
  431. // it on.
  432. return Condition;
  433. }
  434. ///////////////////////////////////////////////////////////////////////////////////////////
  435. // EvaluationMatrix Implementations ///////////////////////////////////////////////////////
  436. // EvaluationMatrix::AddMatchRecord(int sp, int ep, int sym)
  437. // Most of this functionality is about deep scans - which have been put on hold for now
  438. // due to the complexity and the scope of the current application. For now, although
  439. // we will use this reporting mechanism, it will generally record only one event.
  440. MatchRecord* EvaluationMatrix::AddMatchRecord(int sp, int ep, int sym) {
  441. // 20030216 _M Added range check code to watch for corruption. Some systems have
  442. // reported matches with zero length indicating an undetected corruption. This
  443. // range check will detect and report it.
  444. if(sp==ep) // Check that we're in range - no zero
  445. throw OutOfRange("sp==ep"); // length pattern matches allowed!
  446. MatchRecord* NewMatchRecord = // Then, create the new result object
  447. new MatchRecord(sp,ep,sym); // by passing it the important parts.
  448. if(NewMatchRecord==NULL) // Check for a bad allocation and throw
  449. throw BadAllocation("NewMatchRecord==NULL"); // an exception if that happens.
  450. if(ResultList == NULL) { // If this is our first result we simply
  451. ResultList = NewMatchRecord; // add the result to our list, and of course
  452. LastResultInList = NewMatchRecord; // it is the end of the list as well.
  453. } else { // If we already have some results, then
  454. LastResultInList->NextMatchRecord = // we add the new record to the result list
  455. NewMatchRecord; // and record that the new record is now the
  456. LastResultInList = NewMatchRecord; // last result in the list.
  457. }
  458. return NewMatchRecord; // Return our new match record.
  459. }
  460. // EvaluationMatrix::AddEvaluator()
  461. // 20021112 _M
  462. // This function has be modified to include a check for duplicates as well as setting
  463. // the mount point for the new evaluator. This eliminates a good deal of code elsewhere
  464. // and encapsulates the complete operation. If a duplicate evaluator is found then the
  465. // function returns NULL indicating that nothing was done. In practic, no check is made
  466. // since any serious error conditions cause errors to be thrown from within this function
  467. // call. These notes apply to some extent to InsEvaluator which is copied from this function
  468. // and which has the only difference of putting the new evaluator after the current one
  469. // in the chain in order to support branch-out operations for loop sequences in the matrix.
  470. Evaluator* EvaluationMatrix::AddEvaluator(int s, unsigned int m) { // Adds a new evaluator at top.
  471. if(!isNoDuplicate(m)) return NULL; // If there is a duplicate do nothing.
  472. if(CountOfEvaluators >= MAX_EVALS) // If we've exceeded our population size
  473. throw MaxEvalsExceeded("Add:CountOfEvaluators >= MAX_EVALS"); // then throw an exception.
  474. Evaluator* NewEvaluator = SourceEvaluator(s,this); // Make up a new evaluator.
  475. if(NewEvaluator == NULL) // Check for a bad allocation and throw
  476. throw BadAllocation("Add:NewEvaluator == NULL"); // an exception if it happens.
  477. NewEvaluator->NextEvaluator = EvaluatorList; // Point the new evaluator to the list.
  478. EvaluatorList = NewEvaluator; // Then point the list head to
  479. // the new evaluator.
  480. NewEvaluator->CurrentPosition = m; // Esablish the mount point.
  481. ++CountOfEvaluators; // Add one to our evaluator count.
  482. if(CountOfEvaluators > MaximumCountOfEvaluators) // If the count is the biggest we
  483. MaximumCountOfEvaluators = CountOfEvaluators; // have seen then keep track of it.
  484. return NewEvaluator; // Return the new evaluator.
  485. }
  486. // EvaluationMatrix::InsEvaluator()
  487. Evaluator* EvaluationMatrix::InsEvaluator(int s, unsigned int m) { // Inserts a new evaluator.
  488. if(!isNoDuplicate(m)) return NULL; // If there is a duplicate do nothing.
  489. if(CountOfEvaluators >= MAX_EVALS) // If we've exceeded our population size
  490. throw MaxEvalsExceeded("Ins:CountOfEvaluators >= MAX_EVALS"); // then throw an exception.
  491. Evaluator* NewEvaluator = SourceEvaluator(s,this); // Make up a new evaluator.
  492. if(NewEvaluator == NULL) // Check for a bad allocation and throw
  493. throw BadAllocation("Ins:NewEvaluator == NULL"); // an exception if it happens.
  494. NewEvaluator->NextEvaluator = // Point the new evaluator where the
  495. CurrentEvaluator->NextEvaluator; // current evalautor points... then point
  496. CurrentEvaluator->NextEvaluator = // the current evaluator to this one. This
  497. NewEvaluator; // accomplishes the insert operation.
  498. NewEvaluator->CurrentPosition = m; // Esablish the mount point.
  499. ++CountOfEvaluators; // Add one to our evaluator count.
  500. if(CountOfEvaluators > MaximumCountOfEvaluators) // If the count is the biggest we
  501. MaximumCountOfEvaluators = CountOfEvaluators; // have seen then keep track of it.
  502. return NewEvaluator; // Return the new evaluator.
  503. }
  504. // EvaluationMatrix::DropEvaluator()
  505. void EvaluationMatrix::DropEvaluator() { // Drops the current evaluator from the matrix.
  506. Evaluator* WhereTo = CurrentEvaluator->NextEvaluator; // Where do we go from here?
  507. // First step is to heal the list as if the current evaluator were not present.
  508. // If there is no previous evaluator - meaning this should be the first one in the
  509. // list - then we point the list head to the next evaluator on the list (WhereTo)
  510. if(PreviousEvaluator != NULL) // If we have a Previous then
  511. PreviousEvaluator->NextEvaluator = WhereTo; // our next is it's next.
  512. else // If we don't then our next
  513. EvaluatorList = WhereTo; // is the first in the list.
  514. // Now that our list is properly healed, it's time to drop the dead evaluator and
  515. // get on with our lives...
  516. CurrentEvaluator->NextEvaluator = NULL; // Disconnect from any list.
  517. CacheEvaluator(CurrentEvaluator); // Drop the current eval.
  518. CurrentEvaluator = WhereTo; // Move on.
  519. --CountOfEvaluators; // Reduce our evaluator count.
  520. }
  521. Evaluator* findEvaluatorListTail(Evaluator* head) {
  522. Evaluator* next = head;
  523. while(NULL != (next->NextEvaluator)) next = next->NextEvaluator;
  524. return next;
  525. }
  526. void EvaluationMatrix::dropAllEvaluators() {
  527. bool haveActiveEvaluators = (NULL != EvaluatorList);
  528. if(haveActiveEvaluators) {
  529. Evaluator* tail = findEvaluatorListTail(EvaluatorList);
  530. tail->NextEvaluator = EvaluatorCache;
  531. EvaluatorCache = EvaluatorList;
  532. }
  533. PreviousEvaluator = NULL;
  534. CurrentEvaluator = NULL;
  535. EvaluatorList = NULL;
  536. CountOfEvaluators = 0;
  537. }
  538. void EvaluationMatrix::restartEngineAt(int newCharacterCount) {
  539. dropAllEvaluators();
  540. CountOfCharacters = newCharacterCount;
  541. }
  542. // EvaluationMatrix::EvaluateThis()
  543. //
  544. // This function returns the number of matches that were found. It is possible for more
  545. // than one evaluator to match on a single character.
  546. //
  547. // 0 indicates no matches were found.
  548. // >0 indicates some matches were found.
  549. // If there is a problem then an exception will be thrown.
  550. int EvaluationMatrix::EvaluateThis(unsigned short int i) {
  551. AddEvaluator(CountOfCharacters,0); // First, add a new Evaluator at the root of the
  552. // matrix for the current position in the scan
  553. // stream.
  554. // The new evaluator is now at the top of our list.
  555. // If there was a problem then an exception will have been thrown.
  556. // If our allocation worked ok, then we'll be here and ready to start scanning
  557. // the rule set with our current character.
  558. PassResult = 0; // Start by assuming we won't match.
  559. CurrentEvaluator = EvaluatorList; // Start at the top of the list.
  560. PreviousEvaluator = NULL; // NULL means previous is the top.
  561. // 20030216 _M
  562. // Next do some basic conversions and evaluations so they don't need to be done
  563. // again within the evaluators. From now on the evaluators will look here for basic
  564. // conversions and boolean check values rather than performing the checks themselves.
  565. // 20140119 _M deprecated by jump table in evaluator
  566. // i_lower = tolower(i); // Convert i to lower case.
  567. // i_isDigit = isdigit(i); // Check for a digit.
  568. // i_isSpace = isspace(i); // Check for whitespace.
  569. // i_isAlpha = isalpha(i); // Check for letters.
  570. // Next, loop through the list and pass the incoming character to
  571. // each evaluator. Drop those that fall off, and record those that terminate. The
  572. // rest of them stick around to walk their paths until they meet their fate.
  573. while(CurrentEvaluator != NULL) { // While there are more evaluators...
  574. // go through the list and evaluate
  575. switch(CurrentEvaluator->EvaluateThis(i)) { // the current character against each.
  576. case Evaluator::FALLEN_OFF: { // If we've fallen off the path
  577. DropEvaluator(); // drop the current evaluator and
  578. break; // move on with our lives.
  579. }
  580. case Evaluator::DOING_OK: { // If we're still going then...
  581. PreviousEvaluator = CurrentEvaluator; // keep track of where we've been and
  582. CurrentEvaluator = // move forward to the next evaluator
  583. CurrentEvaluator->NextEvaluator; // in the list.
  584. break;
  585. }
  586. case Evaluator::TERMINATED: { // If we've terminated a path...
  587. ++PassResult; // Record our PassResult.
  588. // Create a new match result using the data in the current evaluator.
  589. // If there is a problem adding the match an exception will be thrown.
  590. AddMatchRecord(
  591. CurrentEvaluator->StreamStartPosition,
  592. CountOfCharacters - 1,
  593. myTokenMatrix->Symbol(CurrentEvaluator->CurrentPosition)
  594. );
  595. // From Version 2 onward we're always doing deep scans...
  596. // Having successfully recorded the result of this critter we can kill them off.
  597. DropEvaluator(); // He's dead.
  598. break; // Now let's keep looking.
  599. }
  600. case Evaluator::OUT_OF_RANGE: { // This result is really bad and
  601. throw OutOfRange("case Evaluator::OUT_OF_RANGE:"); // probably means we have a bad matrix.
  602. break;
  603. // The reason we don't throw OutOfRange from within the evaluator is that we
  604. // may want to take some other action in the future... So, we allow the evaluator
  605. // to tell us we sent it out of range and then we decide what to do about it.
  606. }
  607. }
  608. }
  609. // At the end of this function our PassResult is either an error (which is
  610. // reported immediately), or it is a match condition. We start out by assuming
  611. // there will be no match. If we find one, then we reset that result... so at
  612. // this point, all we need do is report our findings.
  613. ++CountOfCharacters; // Add one to our Character Count statistic.
  614. // Note that from this point on, the index in the stream is one less than the
  615. // CountOfCharacters... for example, if I've evaluated (am evaluating) one character
  616. // the it's index is 0. This will be important when we create any match records.
  617. return PassResult; // When we're finished, return the last known result.
  618. }
  619. void EvaluationMatrix::evaluateSegment(std::vector<unsigned char>& data, unsigned int start, unsigned int finish) {
  620. restartEngineAt(start);
  621. finish = (finish < data.size()) ? finish : data.size();
  622. for(unsigned int a = start; a < finish; a++) EvaluateThis(data[a]);
  623. }