You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572
  1. // snf_engine.hpp
  2. //
  3. // (C) 1985-2004 MicroNeil Research Corporation
  4. // (C) 2005-2009 ARM Research Labs, LLC.
  5. //
  6. // Derived from original work on cellular automation for complex pattern
  7. // reflex engine 1985 Pete McNeil (Madscientist)
  8. //
  9. // Derived from rapid scripting engine (token matrix) implementation 1987
  10. //
  11. // This is the header file for the sniffer pattern matching engine.
  12. // 20080305 _M - Added FlipEndian() function to convert rulebases from their
  13. // native little-endian format to big-endian format for CPUs that need it. See
  14. // additional work in SNFMulti to call the FlipEndian() function AFTER the
  15. // rulebase has been authenticated but before it is put into use.
  16. // 20070606 _M - Refactored exceptions to use base std::exception and improved
  17. // the evaluator code to reduce the strength of safety testing from 3 compares
  18. // per byte to 1.
  19. // 20060531 _M - Added evaluator caching to save a few cycles by not allocating
  20. // new memory and performing a complete initialization of an evaluator if there
  21. // is already one handy from a previous use.
  22. // 20021030 _M - Created.
  23. #pragma once
  24. #include <cassert>
  25. #include <stdexcept>
  26. #include <unistd.h>
  27. #include <cstdio>
  28. #include <cctype>
  29. #include <ctime>
  30. #include <cstdlib>
  31. #include <fstream>
  32. #include <iostream>
  33. #include <string>
  34. #include <vector>
  35. #include <exception>
  36. #include "../CodeDweller/faults.hpp"
  37. #include "../CodeDweller/mangler.hpp"
  38. //#include "../nvwa-0.6/nvwa/debug_new.h"
  39. // 20030929 _M SYMBOL_RANGE moved to snf_engine.hpp as part of augmenting the
  40. // capability of a match record. Match records now can decode themselves.
  41. const int SYMBOL_RANGE = 256; // Symbol result coding modulator.
  42. // Let's create our utility classes and structures.
  43. // The Token class.
  44. // This class represents the structure of a token. The rule file is, in fact,
  45. // a token matrix. Tokens within the matrix allow the sniffer to navigate through
  46. // a state change matrix attempting to locate special positions that indicate the
  47. // termination of a path, or more specifically, the recognition of a string that
  48. // has been evaluated along that path.
  49. //
  50. // IT IS IMPORTANT TO NOTE THAT AS THESE PROGRAMS ARE WRITTEN IT ASSUMES WE ARE IN
  51. // A 32 BIT INTEL ENVIRONMENT SO THAT THE TOKEN MATRIX CAN BE LOADED IN A SINGLE PASS
  52. // USING A BINARY INPUT STREAM.
  53. ////////////////////////////////////////////////////////////////////////////////////////
  54. // Token Declaration ///////////////////////////////////////////////////////////////////
  55. class Token { // Token class for defining and interpreting nodes within the matrix.
  56. public: // Beginning of Public stuff.
  57. int Check; // The first int is a check character.
  58. int Vector; // The second int is a vector.
  59. // isUnused() Returns true if the token is in an unused state.
  60. int isUnused() {
  61. return (Check==-1 && Vector==0) ? true : false;
  62. }
  63. // isTermination() Returns true if the token is in a termination state.
  64. int isTermination() {
  65. if(Check==0 && Vector > 0)
  66. return true;
  67. else
  68. return false;
  69. }
  70. // Symbol() Returns the symbol value for the token.
  71. int Symbol() { return Vector; }
  72. // Character() Returns the check character for this token.
  73. int Character() { return Check; }
  74. // End of Public stuff.
  75. // Note that no constructor is needed because the default constructor will do nicely.
  76. };
  77. ////////////////////////////////////////////////////////////////////////////////////////
  78. // Token Matrix Declaration ////////////////////////////////////////////////////////////
  79. ////////////////////////////////////////////////////////////////////////////////////////
  80. //
  81. // The Token Matrix loads, verifies, and maintains an array of tokens for the evaluators
  82. // to live in. This class provides safe access to the token matrix.
  83. //
  84. ////////////////////////////////////////////////////////////////////////////////////////
  85. class TokenMatrix {
  86. private:
  87. Token* Matrix; // Where we hold the token matrix.
  88. int MatrixSize; // What size is the matrix.
  89. public:
  90. // Exceptions...
  91. class BadAllocation : public std::runtime_error { // Exception for a bad memory allocation.
  92. public: BadAllocation(const std::string& w):runtime_error(w) {}
  93. };
  94. class BadMatrix : public std::runtime_error { // Exception for invalid matrix loads.
  95. public: BadMatrix(const std::string& w):runtime_error(w) {}
  96. };
  97. class BadFile : public std::runtime_error { // Exception for missing rulebase files.
  98. public: BadFile(const std::string& w):runtime_error(w) {}
  99. };
  100. class OutOfRange : public std::runtime_error { // Exception for indexes out of range.
  101. public: OutOfRange(const std::string& w):runtime_error(w) {}
  102. };
  103. // Standards...
  104. static const int SecuritySegmentSize = 1024; // File Authentication Segment
  105. static const int SecurityKeyBufferSize = 32; // Security Key Pad Block Size
  106. static const int RulebaseDigestSize = 64; // Number of bytes in digest.
  107. static const int MinimumValidMatrix = // Establish the smallest valid
  108. SecuritySegmentSize * 2 / SecurityKeyBufferSize; // matrix size
  109. // The first interface component checks the range and gives up the token.
  110. Token at(int x) { // Get the token at x
  111. if(x<0 || x>=MatrixSize) // Check to see if we're in bounds.
  112. throw OutOfRange("(x<0 || x>=MatrixSize)"); // If we're not then throw an exception.
  113. return Matrix[x]; // If we are then give it to them.
  114. }
  115. // The second interface component delivers the Matrix if it's valid so that other
  116. // code can manipulate it more efficiently (without constantly checking bounds.
  117. Token* getMatrix() { // Return the matrix.
  118. if(MatrixSize==0 || Matrix==NULL) // If the matrix isn't ready then
  119. throw BadMatrix("(MatrixSize==0 || Matrix==NULL)"); // throw an exception. If it is
  120. return Matrix; // ready then send it out.
  121. }
  122. // For simplicity we simply extend the underlying Token functions by taking a
  123. // position reference, checking it's range, and returning the result.
  124. int isUnused(int x) { // Extend Token.isUnused()
  125. return at(x).isUnused();
  126. }
  127. int isTermination(int x) { // Extend Token.isTermination()
  128. return at(x).isTermination();
  129. }
  130. int Symbol(int x) { // Exetend Token.Symbol()
  131. return at(x).Symbol();
  132. }
  133. int Character(int x) { // Extend Token.Character()
  134. return at(x).Character();
  135. }
  136. // Utility functions...
  137. int Size() { return MatrixSize; } // Returns the size of the matrix.
  138. void Load(const char* FileName); // Loads the matrix from a file name.
  139. void Load(std::string& FileName); // Loads the matrix from a file name string.
  140. void Load(std::ifstream& F); // Loads the token matrix from the file.
  141. void Validate(std::string& SecurityKey); // Validates the matrix with a key string.
  142. void Verify(std::string& SecurityKey); // Verifies the matrix digest.
  143. void FlipEndian(); // Converts big/little endian tokens.
  144. // Constructors...
  145. TokenMatrix() :
  146. Matrix(NULL),
  147. MatrixSize(0) { }
  148. TokenMatrix(std::ifstream& F) :
  149. Matrix(NULL),
  150. MatrixSize(0) {
  151. Load(F);
  152. }
  153. ~TokenMatrix() { // The Distructor...
  154. MatrixSize = 0; // Set the size to zero.
  155. if(Matrix) { delete [] Matrix; Matrix = NULL; } // If we have a matrix, remove it.
  156. }
  157. };
  158. /////////////////////////////////////////////////////////////////////////////////////////
  159. // End Token Work ///////////////////////////////////////////////////////////////////////
  160. /////////////////////////////////////////////////////////////////////////////////////////
  161. // Having defined the token matrix, I now define the Evaluator class which
  162. // be used to follow any matching rule threads as the program scans a a file.
  163. // A new evaluator is started at each position in the input stream making all
  164. // of the rules in the token matrix global.
  165. // The following two values are returned by the Evaluator at every step.
  166. const int WILD_WHITESPACE = 1; // Token code for whitespace wildcards.
  167. const int WILD_DIGIT = 2; // Token code for digit wildcards.
  168. const int WILD_LETTER = 3; // Token code for letter wildcards.
  169. const int WILD_NONWHITE = 4; // Token code for non-whitespace wildcards.
  170. const int WILD_ANYTHING = 5; // Token code for any character.
  171. const int WILD_INLINE = 6; // Token code for any character except new line.
  172. const int RUN_GATEWAY = 8; // Token code for run-loop gateways.
  173. // Here are some tuning parameters
  174. const int MaxWildRunLength = 4096; // Maximum span of "any number" wildcards.
  175. const int MAX_EVALS = 2048; // Maximum number of evaluators.
  176. //////////////////////////////////////////////////////////////////////////////////////////
  177. // Evaluators and the Evaluation Matrix
  178. //////////////////////////////////////////////////////////////////////////////////////////
  179. class EvaluationMatrix; // We've got to pre-declare this for some compilers.
  180. class Evaluator { // Evaluator class for following threads through the matrix.
  181. private:
  182. EvaluationMatrix* myEvaluationMatrix; // The evaluation matrix I live in.
  183. Token* Matrix; // The raw token matrix I walk in.
  184. int MatrixSize; // Size of raw token matrix.
  185. // 20070606 _M Optimized Evaluator code by reducing the strength of the
  186. // safety check from 3 comparisons to 1.
  187. unsigned int PositionLimit; // Largest CurrentPosition.
  188. // 20030216 _M Optimization conversions
  189. // 20140119 _M Deprecated by jump table in evaluator
  190. // inline int i_lower(); // { return myEvaluationMatrix->i_lower; }
  191. // inline bool i_isDigit(); // { return myEvaluationMatrix->i_isDigit; }
  192. // inline bool i_isSpace(); // { return myEvaluationMatrix->i_isSpace; }
  193. // inline bool i_isAlpha(); // { return myEvaluationMatrix->i_isAphpa; }
  194. unsigned int JumpPoint;
  195. int xLetter(); // Match Any letter.
  196. int xDigit(); // Match Any digit.
  197. int xNonWhite(); // Match Any non-whitespace.
  198. int xWhiteSpace(); // Match Any whitespace.
  199. int xAnyInline(); // Match Any byte but new line.
  200. int xAnything(); // Match Any character at all.
  201. int xRunGateway(); // Match the run-loop gateway.
  202. void doFollowOrMakeBuddy(int keyVector); // Follow and divide algorithm.
  203. void tryFollowingPrecisePath(unsigned short int i);
  204. void tryFollowingNoCasePath(unsigned short int i);
  205. void tryFollowingWildAlphaPath();
  206. void tryFollowingWildDigitPath();
  207. void tryFollowingWildNonWhitePath();
  208. void tryFollowingWildWhitePath();
  209. void tryFollowingWildInlinePath();
  210. void tryFollowingWildAnythingPath();
  211. void doFollowerJumpTable(unsigned short int i);
  212. public:
  213. // Standard Values...
  214. enum States { // These are the posible coditions.
  215. OUT_OF_RANGE, // We're outside the matrix - very bad.
  216. FALLEN_OFF, // We've fallen off the path and are lost.
  217. DOING_OK, // We're doing ok and following along.
  218. TERMINATED // We've reached the end of our path.
  219. };
  220. // Attributes...
  221. States Condition; // What state am I in? How's my health?
  222. Evaluator* NextEvaluator; // Linked List Pointer.
  223. unsigned int StreamStartPosition; // Indexes the position where we started.
  224. unsigned int CurrentPosition; // Indexes the node we are surfing.
  225. int WildRunLength; // Wildcard run length so far.
  226. // EvaluateThis() assumes it is being given the next character along the
  227. // path of a thread in the token matrix. It follows that thread and evaluates
  228. // it's condition.
  229. States EvaluateThis(unsigned short int i); // Follow the next byte.
  230. // isNoDuplicate() is used to keep us from allocating identical evaluators. This is
  231. // key to creating buddies when working with wildcards. It prevents us from recursively
  232. // proliferating evaluators at each new character when running in a wildcard loop.
  233. bool isNoDuplicate(unsigned int Position) { // Returns false if there is a duplicate.
  234. if(CurrentPosition == Position) // Obviously, if I match, then there's a dup.
  235. return false;
  236. // If I don't match and I'm the last one then
  237. if(NextEvaluator==NULL) // it must be true there are no dups. If there
  238. return true; // are more to ask then I'll let them answer.
  239. else
  240. return NextEvaluator->isNoDuplicate(Position);
  241. }
  242. Evaluator(unsigned int s, EvaluationMatrix* m); // Constructor...
  243. ~Evaluator(){
  244. if(NextEvaluator!=NULL){ // If there's more to this list then
  245. delete NextEvaluator; // delete it.
  246. }
  247. NextEvaluator = NULL; // Always null on exit.
  248. }
  249. };
  250. // A MatchRecord is created each time a new rule match occurrs. These records form a
  251. // linked list within the Evaluation Matrix that can be spit out after the process is
  252. // over for reporting purposes.
  253. class MatchRecord {
  254. public:
  255. int MatchStartPosition; // Where in the data stream did the match start?
  256. int MatchEndPosition; // Where in the data stream did the match end?
  257. int MatchSymbol; // What symbol was attached to the match rule?
  258. inline int RuleId(){return (MatchSymbol/SYMBOL_RANGE);} // Decode RuleID
  259. inline int RuleGroup(){return (MatchSymbol%SYMBOL_RANGE);} // Decode GroupID
  260. MatchRecord* NextMatchRecord;
  261. MatchRecord(int sp, int ep, int sym) { // When constructing a MatchRecord,
  262. MatchStartPosition = sp; // you must provide all of it's data.
  263. MatchEndPosition = ep;
  264. MatchSymbol = sym;
  265. // Since match records are always added to
  266. NextMatchRecord = NULL; // the end our next pointer is always NULL.
  267. }
  268. ~MatchRecord(){
  269. if(NextMatchRecord != NULL) // If there's more list, then delete it.
  270. delete NextMatchRecord;
  271. NextMatchRecord = NULL; // Clean up our pointer before leaving.
  272. }
  273. };
  274. // Now that we've created our utility classes, we'll create another class (with an instance)
  275. // that builds a matrix to evaluate all incoming characters, manage the list, and keeps
  276. // statistics and results from the execution process.
  277. class EvaluationMatrix {
  278. private:
  279. TokenMatrix* myTokenMatrix; // Token Matrix that I evaluate with.
  280. Evaluator* EvaluatorList; // Linked list of Evaluators.
  281. Evaluator* CurrentEvaluator; // Current Evaluator (when checking)
  282. Evaluator* PreviousEvaluator; // Previous Evaluator (when checking)
  283. // Evaluator Caching Mechanism.
  284. Evaluator* EvaluatorCache; // List of cached, ready evaluators.
  285. Evaluator* SourceEvaluator(int s, EvaluationMatrix* m); // Get a cached or new evaluator.
  286. void CacheEvaluator(Evaluator* e); // Cache a used evaluator.
  287. int CountOfEvaluators; // Current count of evaluators.
  288. int PassResult; // Result of the latest evaluation pass.
  289. MatchRecord* LastResultInList; // Keeps track of the end of the result list.
  290. MatchRecord* AddMatchRecord(int sp, int ep, int sym); // Add a match result.
  291. // DropEvaluator() is called by the EvaluateThis() method whenever an evaluator
  292. // reports the FALLEN_OFF result. The EvaluateThis() method keeps two values up
  293. // to date - one is the current evaluator (which will be dropped) and the other is
  294. // the previous evaluator (which will be updated to heal the list).
  295. // When we've finished this function, the CurrentEvaluator will be on the next
  296. // evaluator node if it exists. Therefore, the caller should skip it's normal
  297. // list itteration code when this function has been called.
  298. void DropEvaluator();
  299. void dropAllEvaluators();
  300. public:
  301. // Exception classes...
  302. class BadAllocation : public std::runtime_error { // Allocation failed exception.
  303. public: BadAllocation(const std::string& w):runtime_error(w) {}
  304. };
  305. class MaxEvalsExceeded : public std::runtime_error { // Too many evaluators exception.
  306. public: MaxEvalsExceeded(const std::string& w):runtime_error(w) {}
  307. };
  308. class OutOfRange : public std::runtime_error { // Out of range exception.
  309. public: OutOfRange(const std::string& w):runtime_error(w) {}
  310. };
  311. // Attributes...
  312. int CountOfCharacters; // How many characters have been evaluated.
  313. int MaximumCountOfEvaluators; // Largest matrix size reached.
  314. MatchRecord* ResultList; // List of match results.
  315. int DeepSwitch; // true if we're doing a deep scans.
  316. // 20030216 _M High Level Conversion Optimizers...
  317. // 20140119 _M Deprecated by jump table in evaluator
  318. // int i_lower; // Lower case version of byte under test.
  319. // bool i_isDigit; // true if i is a digit.
  320. // bool i_isSpace; // true if i is whitespace.
  321. // bool i_isAlpha; // true if i is alpha.
  322. // AddEvaluator() is made public because the Evaluator object must have access
  323. // to it in order to handle the creation of buddies as it evaluates it's rules.
  324. // Similarly the getTokens is public because evaluators must use this when they
  325. // initialize. In a later version we will clean this up so that all of this stuff
  326. // can be handled somewhat more privately.
  327. Token* getTokens() { // Deliver the raw token matrix
  328. return myTokenMatrix->getMatrix(); // for use when creating evaluators.
  329. }
  330. int getMatrixSize() { // Deliver the raw matrix size
  331. return myTokenMatrix->Size(); // for use when creating evaluators.
  332. }
  333. Evaluator* AddEvaluator(int s, unsigned int m); // Adds a new evaluator to the top.
  334. Evaluator* InsEvaluator(int s, unsigned int m); // Inserts a new evaluator after the
  335. // current evaluator. (Only called by
  336. // an existing evaluator in process...)
  337. // isNoDuplicate(int p) checks for duplicate evaulators
  338. bool isNoDuplicate(unsigned int p) { // If there's no list there can be no
  339. if(EvaluatorList == NULL) // duplicates so we're true. If there is
  340. return true; // a list then we'll let the list answer.
  341. else
  342. return EvaluatorList->isNoDuplicate(p);
  343. }
  344. // EvaluateThis() Moves each evaluator with the current character and creates a new
  345. // evaluator for the current spot in the input file to make all rules global.
  346. int EvaluateThis(unsigned short int i);
  347. void evaluateSegment(std::vector<unsigned char>& data, unsigned int start, unsigned int finish);
  348. void restartEngineAt(int newCharacterCount);
  349. EvaluationMatrix(TokenMatrix* m) { // Constructor w/ pointer to Token Matrix...
  350. myTokenMatrix = m; // Grab my TokenMatrix.
  351. EvaluatorList = NULL; // Start off with no evaluators.
  352. EvaluatorCache = NULL; // Start off with no evaluator cache.
  353. CurrentEvaluator = NULL; // NULL means starting at the top.
  354. PreviousEvaluator = NULL; // NULL means previous is the top.
  355. ResultList = NULL; // Start off with no results in our list.
  356. LastResultInList = NULL;
  357. CountOfCharacters = 0; // The count of characters will be zero and
  358. MaximumCountOfEvaluators = 0; // the maximum Evaluator count will be zero
  359. CountOfEvaluators = 0; // and the current count will also be zero.
  360. PassResult = 0; // Initialize expecting no matches.
  361. }
  362. ~EvaluationMatrix(){ // Destructor to clean up memory allocations.
  363. myTokenMatrix = NULL; // Stop pointing at the TokenMatrix
  364. // Both of these lists konw how to delete themselves.
  365. // 20060531_M Fixed possible crash by checking for NULL before
  366. // deleting these lists. Also added cleanup for the EvaluatorCache.
  367. if(NULL!=EvaluatorCache) {
  368. delete EvaluatorCache; // Delete the evaluator cache.
  369. EvaluatorCache = NULL; // Then clear it's pointer.
  370. }
  371. if(NULL!=EvaluatorList) {
  372. delete EvaluatorList; // Delete the evaluator list.
  373. EvaluatorList = NULL; // Then clear it's pointer.
  374. }
  375. if(NULL!=ResultList) {
  376. delete ResultList; // Delete the result list.
  377. ResultList = NULL; // Then clear it's pointer.
  378. }
  379. }
  380. };
  381. // 20060531_M Implementation of the evaluator cache is all inline.
  382. // In place of new Evaluator() we now can use SourceEvaluator()
  383. // In place of delete Evaluator() we now can use CacheEvaluator()
  384. // The effect is to store previously allocaed evaluators in the EvaluatorCache
  385. // list so that they can be reused. This avoids the frequen use of
  386. // new and delete and allows us to skip a few extra cycles for initialization
  387. // because much of the constructor work for a new evaluator is already done
  388. // in any cached evaluator.
  389. //
  390. // In practice, at least one evaluator is likely to be created and destroyed
  391. // for each byte that is scanned. This new mechanism significantly reduces the
  392. // number of cycles that would normally be associated with those operations by
  393. // eliminating them most of the time. Instead of returning used memory to the
  394. // heap during delete, the evaulator is simply added to the cache list. Instead
  395. // of allocating new space from the heap and initializing the object, a chached
  396. // evaluator is simply moved from the cache into production. Moving into and
  397. // out of the cache is roughly as simple as changing a couple of pointers.
  398. // In place of new Evaluator, we do this...
  399. inline Evaluator* EvaluationMatrix::SourceEvaluator(int s, EvaluationMatrix* m) { // Get a cached or new evaluator.
  400. if(NULL==EvaluatorCache) return new Evaluator(s,m); // If we have no cache, use new!
  401. Evaluator* reuse = EvaluatorCache; // Otherwise grab a reusable one.
  402. EvaluatorCache = reuse->NextEvaluator; // Collaps the cache by one.
  403. reuse->NextEvaluator = NULL; // Clean it up a bit.
  404. reuse->StreamStartPosition = s; // Record our starting point.
  405. reuse->CurrentPosition = 0; // Reset the Current Position.
  406. reuse->WildRunLength = 0; // Reset the run length.
  407. reuse->Condition = Evaluator::DOING_OK; // Reset the condition.
  408. return reuse; // Return the reusable unit.
  409. }
  410. // In place of delete Evaluator, we do this...
  411. inline void EvaluationMatrix::CacheEvaluator(Evaluator* e) { // Cache a used evaluator.
  412. e->NextEvaluator = EvaluatorCache; // Link the used evaluator
  413. EvaluatorCache = e; // into the cache;
  414. }
  415. // In the above, the first evaluator added will get NULL as it's NextEvaluator.
  416. // When that first evaulator is used, the NULL pointer will return to the root
  417. // of the EvaluatorCache list. In this regard the cache acts like a stack.