You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

snf_engine.hpp 27KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. // snf_engine.hpp
  2. //
  3. // (C) 1985-2004 MicroNeil Research Corporation
  4. // (C) 2005-2009 ARM Research Labs, LLC.
  5. //
  6. // Derived from original work on cellular automation for complex pattern
  7. // reflex engine 1985 Pete McNeil (Madscientist)
  8. //
  9. // Derived from rapid scripting engine (token matrix) implementation 1987
  10. //
  11. // This is the header file for the sniffer pattern matching engine.
  12. // 20080305 _M - Added FlipEndian() function to convert rulebases from their
  13. // native little-endian format to big-endian format for CPUs that need it. See
  14. // additional work in SNFMulti to call the FlipEndian() function AFTER the
  15. // rulebase has been authenticated but before it is put into use.
  16. // 20070606 _M - Refactored exceptions to use base std::exception and improved
  17. // the evaluator code to reduce the strength of safety testing from 3 compares
  18. // per byte to 1.
  19. // 20060531 _M - Added evaluator caching to save a few cycles by not allocating
  20. // new memory and performing a complete initialization of an evaluator if there
  21. // is already one handy from a previous use.
  22. // 20021030 _M - Created.
  23. #ifndef _MN_SNF_ENGINE
  24. #define _MN_SNF_ENGINE
  25. #include <cassert>
  26. #include <stdexcept>
  27. #include <unistd.h>
  28. #include <cstdio>
  29. #include <cctype>
  30. #include <ctime>
  31. #include <cstdlib>
  32. #include <fstream>
  33. #include <iostream>
  34. #include <string>
  35. #include <exception>
  36. #include "mangler.hpp"
  37. //#include "../nvwa-0.6/nvwa/debug_new.h"
  38. using namespace std;
  39. // 20030929 _M SYMBOL_RANGE moved to snf_engine.hpp as part of augmenting the
  40. // capability of a match record. Match records now can decode themselves.
  41. const int SYMBOL_RANGE = 256; // Symbol result coding modulator.
  42. // Let's create our utility classes and structures.
  43. // The Token class.
  44. // This class represents the structure of a token. The rule file is, in fact,
  45. // a token matrix. Tokens within the matrix allow the sniffer to navigate through
  46. // a state change matrix attempting to locate special positions that indicate the
  47. // termination of a path, or more specifically, the recognition of a string that
  48. // has been evaluated along that path.
  49. //
  50. // IT IS IMPORTANT TO NOTE THAT AS THESE PROGRAMS ARE WRITTEN IT ASSUMES WE ARE IN
  51. // A 32 BIT INTEL ENVIRONMENT SO THAT THE TOKEN MATRIX CAN BE LOADED IN A SINGLE PASS
  52. // USING A BINARY INPUT STREAM.
  53. ////////////////////////////////////////////////////////////////////////////////////////
  54. // Token Declaration ///////////////////////////////////////////////////////////////////
  55. class Token { // Token class for defining and interpreting nodes within the matrix.
  56. public: // Beginning of Public stuff.
  57. int Check; // The first int is a check character.
  58. int Vector; // The second int is a vector.
  59. // isUnused() Returns true if the token is in an unused state.
  60. int isUnused() {
  61. return (Check==-1 && Vector==0) ? true : false;
  62. }
  63. // isTermination() Returns true if the token is in a termination state.
  64. int isTermination() {
  65. if(Check==0 && Vector > 0)
  66. return true;
  67. else
  68. return false;
  69. }
  70. // Symbol() Returns the symbol value for the token.
  71. int Symbol() { return Vector; }
  72. // Character() Returns the check character for this token.
  73. int Character() { return Check; }
  74. // End of Public stuff.
  75. // Note that no constructor is needed because the default constructor will do nicely.
  76. };
  77. ////////////////////////////////////////////////////////////////////////////////////////
  78. // Token Matrix Declaration ////////////////////////////////////////////////////////////
  79. ////////////////////////////////////////////////////////////////////////////////////////
  80. //
  81. // The Token Matrix loads, verifies, and maintains an array of tokens for the evaluators
  82. // to live in. This class provides safe access to the token matrix.
  83. //
  84. ////////////////////////////////////////////////////////////////////////////////////////
  85. class TokenMatrix {
  86. private:
  87. Token* Matrix; // Where we hold the token matrix.
  88. int MatrixSize; // What size is the matrix.
  89. public:
  90. // Exceptions...
  91. class BadAllocation : public runtime_error { // Exception for a bad memory allocation.
  92. public: BadAllocation(const string& w):runtime_error(w) {}
  93. };
  94. class BadMatrix : public runtime_error { // Exception for invalid matrix loads.
  95. public: BadMatrix(const string& w):runtime_error(w) {}
  96. };
  97. class BadFile : public runtime_error { // Exception for missing rulebase files.
  98. public: BadFile(const string& w):runtime_error(w) {}
  99. };
  100. class OutOfRange : public runtime_error { // Exception for indexes out of range.
  101. public: OutOfRange(const string& w):runtime_error(w) {}
  102. };
  103. // Standards...
  104. static const int SecuritySegmentSize = 1024; // File Authentication Segment
  105. static const int SecurityKeyBufferSize = 32; // Security Key Pad Block Size
  106. static const int RulebaseDigestSize = 64; // Number of bytes in digest.
  107. static const int MinimumValidMatrix = // Establish the smallest valid
  108. SecuritySegmentSize * 2 / SecurityKeyBufferSize; // matrix size
  109. // The first interface component checks the range and gives up the token.
  110. Token at(int x) { // Get the token at x
  111. if(x<0 || x>=MatrixSize) // Check to see if we're in bounds.
  112. throw OutOfRange("(x<0 || x>=MatrixSize)"); // If we're not then throw an exception.
  113. return Matrix[x]; // If we are then give it to them.
  114. }
  115. // The second interface component delivers the Matrix if it's valid so that other
  116. // code can manipulate it more efficiently (without constantly checking bounds.
  117. Token* getMatrix() { // Return the matrix.
  118. if(MatrixSize==0 || Matrix==NULL) // If the matrix isn't ready then
  119. throw BadMatrix("(MatrixSize==0 || Matrix==NULL)"); // throw an exception. If it is
  120. return Matrix; // ready then send it out.
  121. }
  122. // For simplicity we simply extend the underlying Token functions by taking a
  123. // position reference, checking it's range, and returning the result.
  124. int isUnused(int x) { // Extend Token.isUnused()
  125. return at(x).isUnused();
  126. }
  127. int isTermination(int x) { // Extend Token.isTermination()
  128. return at(x).isTermination();
  129. }
  130. int Symbol(int x) { // Exetend Token.Symbol()
  131. return at(x).Symbol();
  132. }
  133. int Character(int x) { // Extend Token.Character()
  134. return at(x).Character();
  135. }
  136. // Utility functions...
  137. int Size() { return MatrixSize; } // Returns the size of the matrix.
  138. void Load(const char* FileName); // Loads the matrix from a file name.
  139. void Load(string& FileName); // Loads the matrix from a file name string.
  140. void Load(ifstream& F); // Loads the token matrix from the file.
  141. void Validate(string& SecurityKey); // Validates the matrix with a key string.
  142. void Verify(string& SecurityKey); // Verifies the matrix digest.
  143. void FlipEndian(); // Converts big/little endian tokens.
  144. // Constructors...
  145. TokenMatrix() :
  146. MatrixSize(0),
  147. Matrix(NULL) { }
  148. TokenMatrix(ifstream& F) :
  149. MatrixSize(0),
  150. Matrix(NULL) {
  151. Load(F);
  152. }
  153. ~TokenMatrix() { // The Distructor...
  154. MatrixSize = 0; // Set the size to zero.
  155. if(Matrix) { delete [] Matrix; Matrix = NULL; } // If we have a matrix, remove it.
  156. }
  157. };
  158. /////////////////////////////////////////////////////////////////////////////////////////
  159. // End Token Work ///////////////////////////////////////////////////////////////////////
  160. /////////////////////////////////////////////////////////////////////////////////////////
  161. // Having defined the token matrix, I now define the Evaluator class which
  162. // be used to follow any matching rule threads as the program scans a a file.
  163. // A new evaluator is started at each position in the input stream making all
  164. // of the rules in the token matrix global.
  165. // The following two values are returned by the Evaluator at every step.
  166. const int WILD_WHITESPACE = 1; // Token code for whitespace wildcards.
  167. const int WILD_DIGIT = 2; // Token code for digit wildcards.
  168. const int WILD_LETTER = 3; // Token code for letter wildcards.
  169. const int WILD_NONWHITE = 4; // Token code for non-whitespace wildcards.
  170. const int WILD_ANYTHING = 5; // Token code for any character.
  171. const int WILD_INLINE = 6; // Token code for any character except new line.
  172. const int RUN_GATEWAY = 8; // Token code for run-loop gateways.
  173. // Here are some tuning parameters
  174. const int MaxWildRunLength = 4096; // Maximum span of "any number" wildcards.
  175. const int MAX_EVALS = 2048; // Maximum number of evaluators.
  176. //////////////////////////////////////////////////////////////////////////////////////////
  177. // Evaluators and the Evaluation Matrix
  178. //////////////////////////////////////////////////////////////////////////////////////////
  179. class EvaluationMatrix; // We've got to pre-declare this for some compilers.
  180. class Evaluator { // Evaluator class for following threads through the matrix.
  181. private:
  182. EvaluationMatrix* myEvaluationMatrix; // The evaluation matrix I live in.
  183. Token* Matrix; // The raw token matrix I walk in.
  184. int MatrixSize; // Size of raw token matrix.
  185. // 20070606 _M Optimized Evaluator code by reducing the strength of the
  186. // safety check from 3 comparisons to 1.
  187. unsigned int PositionLimit; // Largest CurrentPosition.
  188. // 20030216 _M Optimization conversions
  189. inline int i_lower(); // { return myEvaluationMatrix->i_lower; }
  190. inline bool i_isDigit(); // { return myEvaluationMatrix->i_isDigit; }
  191. inline bool i_isSpace(); // { return myEvaluationMatrix->i_isSpace; }
  192. inline bool i_isAlpha(); // { return myEvaluationMatrix->i_isAphpa; }
  193. public:
  194. // Standard Values...
  195. enum States { // These are the posible coditions.
  196. OUT_OF_RANGE, // We're outside the matrix - very bad.
  197. FALLEN_OFF, // We've fallen off the path and are lost.
  198. DOING_OK, // We're doing ok and following along.
  199. TERMINATED // We've reached the end of our path.
  200. };
  201. // Attributes...
  202. States Condition; // What state am I in? How's my health?
  203. Evaluator* NextEvaluator; // Linked List Pointer.
  204. int StreamStartPosition; // Indexes the position where we started.
  205. unsigned int CurrentPosition; // Indexes the node we are surfing.
  206. int WildRunLength; // Wildcard run length so far.
  207. // EvaluateThis() assumes it is being given the next character along the
  208. // path of a thread in the token matrix. It follows that thread and evaluates
  209. // it's condition.
  210. States EvaluateThis(unsigned short int i); // Follow the next byte.
  211. // isNoDuplicate() is used to keep us from allocating identical evaluators. This is
  212. // key to creating buddies when working with wildcards. It prevents us from recursively
  213. // proliferating evaluators at each new character when running in a wildcard loop.
  214. int isNoDuplicate(int Position) { // Returns false if there is a duplicate.
  215. if(CurrentPosition == Position) // Obviously, if I match, then there's a dup.
  216. return false;
  217. // If I don't match and I'm the last one then
  218. if(NextEvaluator==NULL) // it must be true there are no dups. If there
  219. return true; // are more to ask then I'll let them answer.
  220. else
  221. return NextEvaluator->isNoDuplicate(Position);
  222. }
  223. Evaluator(int s, EvaluationMatrix* m); // Constructor...
  224. ~Evaluator(){
  225. if(NextEvaluator!=NULL){ // If there's more to this list then
  226. delete NextEvaluator; // delete it.
  227. }
  228. NextEvaluator = NULL; // Always null on exit.
  229. }
  230. };
  231. // A MatchRecord is created each time a new rule match occurrs. These records form a
  232. // linked list within the Evaluation Matrix that can be spit out after the process is
  233. // over for reporting purposes.
  234. class MatchRecord {
  235. public:
  236. int MatchStartPosition; // Where in the data stream did the match start?
  237. int MatchEndPosition; // Where in the data stream did the match end?
  238. int MatchSymbol; // What symbol was attached to the match rule?
  239. inline int RuleId(){return (MatchSymbol/SYMBOL_RANGE);} // Decode RuleID
  240. inline int RuleGroup(){return (MatchSymbol%SYMBOL_RANGE);} // Decode GroupID
  241. MatchRecord* NextMatchRecord;
  242. MatchRecord(int sp, int ep, int sym) { // When constructing a MatchRecord,
  243. MatchStartPosition = sp; // you must provide all of it's data.
  244. MatchEndPosition = ep;
  245. MatchSymbol = sym;
  246. // Since match records are always added to
  247. NextMatchRecord = NULL; // the end our next pointer is always NULL.
  248. }
  249. ~MatchRecord(){
  250. if(NextMatchRecord != NULL) // If there's more list, then delete it.
  251. delete NextMatchRecord;
  252. NextMatchRecord = NULL; // Clean up our pointer before leaving.
  253. }
  254. };
  255. // Now that we've created our utility classes, we'll create another class (with an instance)
  256. // that builds a matrix to evaluate all incoming characters, manage the list, and keeps
  257. // statistics and results from the execution process.
  258. class EvaluationMatrix {
  259. private:
  260. TokenMatrix* myTokenMatrix; // Token Matrix that I evaluate with.
  261. Evaluator* EvaluatorList; // Linked list of Evaluators.
  262. Evaluator* CurrentEvaluator; // Current Evaluator (when checking)
  263. Evaluator* PreviousEvaluator; // Previous Evaluator (when checking)
  264. // Evaluator Caching Mechanism.
  265. Evaluator* EvaluatorCache; // List of cached, ready evaluators.
  266. Evaluator* SourceEvaluator(int s, EvaluationMatrix* m); // Get a cached or new evaluator.
  267. void CacheEvaluator(Evaluator* e); // Cache a used evaluator.
  268. int CountOfEvaluators; // Current count of evaluators.
  269. int PassResult; // Result of the latest evaluation pass.
  270. MatchRecord* LastResultInList; // Keeps track of the end of the result list.
  271. MatchRecord* AddMatchRecord(int sp, int ep, int sym); // Add a match result.
  272. // DropEvaluator() is called by the EvaluateThis() method whenever an evaluator
  273. // reports the FALLEN_OFF result. The EvaluateThis() method keeps two values up
  274. // to date - one is the current evaluator (which will be dropped) and the other is
  275. // the previous evaluator (which will be updated to heal the list).
  276. // When we've finished this function, the CurrentEvaluator will be on the next
  277. // evaluator node if it exists. Therefore, the caller should skip it's normal
  278. // list itteration code when this function has been called.
  279. void DropEvaluator();
  280. public:
  281. // Exception classes...
  282. class BadAllocation : public runtime_error { // Allocation failed exception.
  283. public: BadAllocation(const string& w):runtime_error(w) {}
  284. };
  285. class MaxEvalsExceeded : public runtime_error { // Too many evaluators exception.
  286. public: MaxEvalsExceeded(const string& w):runtime_error(w) {}
  287. };
  288. class OutOfRange : public runtime_error { // Out of range exception.
  289. public: OutOfRange(const string& w):runtime_error(w) {}
  290. };
  291. // Attributes...
  292. int CountOfCharacters; // How many characters have been evaluated.
  293. int MaximumCountOfEvaluators; // Largest matrix size reached.
  294. MatchRecord* ResultList; // List of match results.
  295. int DeepSwitch; // true if we're doing a deep scans.
  296. // 20030216 _M High Level Conversion Optimizers...
  297. int i_lower; // Lower case version of byte under test.
  298. bool i_isDigit; // true if i is a digit.
  299. bool i_isSpace; // true if i is whitespace.
  300. bool i_isAlpha; // true if i is alpha.
  301. // AddEvaluator() is made public because the Evaluator object must have access
  302. // to it in order to handle the creation of buddies as it evaluates it's rules.
  303. // Similarly the getTokens is public because evaluators must use this when they
  304. // initialize. In a later version we will clean this up so that all of this stuff
  305. // can be handled somewhat more privately.
  306. Token* getTokens() { // Deliver the raw token matrix
  307. return myTokenMatrix->getMatrix(); // for use when creating evaluators.
  308. }
  309. int getMatrixSize() { // Deliver the raw matrix size
  310. return myTokenMatrix->Size(); // for use when creating evaluators.
  311. }
  312. Evaluator* AddEvaluator(int s, int m); // Adds a new evaluator to the top.
  313. Evaluator* InsEvaluator(int s, int m); // Inserts a new evaluator after the
  314. // current evaluator. (Only called by
  315. // an existing evaluator in process...)
  316. // isNoDuplicate(int p) checks for duplicate evaulators
  317. int isNoDuplicate(int p) { // If there's no list there can be no
  318. if(EvaluatorList == NULL) // duplicates so we're true. If there is
  319. return true; // a list then we'll let the list answer.
  320. else
  321. return EvaluatorList->isNoDuplicate(p);
  322. }
  323. // EvaluateThis() Moves each evaluator with the current character and creates a new
  324. // evaluator for the current spot in the input file to make all rules global.
  325. int EvaluateThis(unsigned short int i);
  326. EvaluationMatrix(TokenMatrix* m) { // Constructor w/ pointer to Token Matrix...
  327. myTokenMatrix = m; // Grab my TokenMatrix.
  328. EvaluatorList = NULL; // Start off with no evaluators.
  329. EvaluatorCache = NULL; // Start off with no evaluator cache.
  330. CurrentEvaluator = NULL; // NULL means starting at the top.
  331. PreviousEvaluator = NULL; // NULL means previous is the top.
  332. ResultList = NULL; // Start off with no results in our list.
  333. LastResultInList = NULL;
  334. CountOfCharacters = 0; // The count of characters will be zero and
  335. MaximumCountOfEvaluators = 0; // the maximum Evaluator count will be zero
  336. CountOfEvaluators = 0; // and the current count will also be zero.
  337. PassResult = 0; // Initialize expecting no matches.
  338. }
  339. ~EvaluationMatrix(){ // Destructor to clean up memory allocations.
  340. myTokenMatrix = NULL; // Stop pointing at the TokenMatrix
  341. // Both of these lists konw how to delete themselves.
  342. // 20060531_M Fixed possible crash by checking for NULL before
  343. // deleting these lists. Also added cleanup for the EvaluatorCache.
  344. if(NULL!=EvaluatorCache) {
  345. delete EvaluatorCache; // Delete the evaluator cache.
  346. EvaluatorCache = NULL; // Then clear it's pointer.
  347. }
  348. if(NULL!=EvaluatorList) {
  349. delete EvaluatorList; // Delete the evaluator list.
  350. EvaluatorList = NULL; // Then clear it's pointer.
  351. }
  352. if(NULL!=ResultList) {
  353. delete ResultList; // Delete the result list.
  354. ResultList = NULL; // Then clear it's pointer.
  355. }
  356. }
  357. };
  358. // 20060531_M Implementation of the evaluator cache is all inline.
  359. // In place of new Evaluator() we now can use SourceEvaluator()
  360. // In place of delete Evaluator() we now can use CacheEvaluator()
  361. // The effect is to store previously allocaed evaluators in the EvaluatorCache
  362. // list so that they can be reused. This avoids the frequen use of
  363. // new and delete and allows us to skip a few extra cycles for initialization
  364. // because much of the constructor work for a new evaluator is already done
  365. // in any cached evaluator.
  366. //
  367. // In practice, at least one evaluator is likely to be created and destroyed
  368. // for each byte that is scanned. This new mechanism significantly reduces the
  369. // number of cycles that would normally be associated with those operations by
  370. // eliminating them most of the time. Instead of returning used memory to the
  371. // heap during delete, the evaulator is simply added to the cache list. Instead
  372. // of allocating new space from the heap and initializing the object, a chached
  373. // evaluator is simply moved from the cache into production. Moving into and
  374. // out of the cache is roughly as simple as changing a couple of pointers.
  375. // In place of new Evaluator, we do this...
  376. inline Evaluator* EvaluationMatrix::SourceEvaluator(int s, EvaluationMatrix* m) { // Get a cached or new evaluator.
  377. if(NULL==EvaluatorCache) return new Evaluator(s,m); // If we have no cache, use new!
  378. Evaluator* reuse = EvaluatorCache; // Otherwise grab a reusable one.
  379. EvaluatorCache = reuse->NextEvaluator; // Collaps the cache by one.
  380. reuse->NextEvaluator = NULL; // Clean it up a bit.
  381. reuse->StreamStartPosition = s; // Record our starting point.
  382. reuse->CurrentPosition = 0; // Reset the Current Position.
  383. reuse->WildRunLength = 0; // Reset the run length.
  384. reuse->Condition = Evaluator::DOING_OK; // Reset the condition.
  385. return reuse; // Return the reusable unit.
  386. }
  387. // In place of delete Evaluator, we do this...
  388. inline void EvaluationMatrix::CacheEvaluator(Evaluator* e) { // Cache a used evaluator.
  389. e->NextEvaluator = EvaluatorCache; // Link the used evaluator
  390. EvaluatorCache = e; // into the cache;
  391. }
  392. // In the above, the first evaluator added will get NULL as it's NextEvaluator.
  393. // When that first evaulator is used, the NULL pointer will return to the root
  394. // of the EvaluatorCache list. In this regard the cache acts like a stack.
  395. #endif