Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

snf_engine.hpp 27KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. // snf_engine.hpp
  2. //
  3. // (C) 1985-2004 MicroNeil Research Corporation
  4. // (C) 2005-2009 ARM Research Labs, LLC.
  5. //
  6. // Derived from original work on cellular automation for complex pattern
  7. // reflex engine 1985 Pete McNeil (Madscientist)
  8. //
  9. // Derived from rapid scripting engine (token matrix) implementation 1987
  10. //
  11. // This is the header file for the sniffer pattern matching engine.
  12. // 20080305 _M - Added FlipEndian() function to convert rulebases from their
  13. // native little-endian format to big-endian format for CPUs that need it. See
  14. // additional work in SNFMulti to call the FlipEndian() function AFTER the
  15. // rulebase has been authenticated but before it is put into use.
  16. // 20070606 _M - Refactored exceptions to use base std::exception and improved
  17. // the evaluator code to reduce the strength of safety testing from 3 compares
  18. // per byte to 1.
  19. // 20060531 _M - Added evaluator caching to save a few cycles by not allocating
  20. // new memory and performing a complete initialization of an evaluator if there
  21. // is already one handy from a previous use.
  22. // 20021030 _M - Created.
  23. #ifndef _MN_SNF_ENGINE
  24. #define _MN_SNF_ENGINE
  25. #include <cassert>
  26. #include <stdexcept>
  27. #include <unistd.h>
  28. #include <cstdio>
  29. #include <cctype>
  30. #include <ctime>
  31. #include <cstdlib>
  32. #include <fstream>
  33. #include <iostream>
  34. #include <string>
  35. #include <exception>
  36. #include "../CodeDweller/faults.hpp"
  37. #include "../CodeDweller/mangler.hpp"
  38. //#include "../nvwa-0.6/nvwa/debug_new.h"
  39. using namespace std;
  40. // 20030929 _M SYMBOL_RANGE moved to snf_engine.hpp as part of augmenting the
  41. // capability of a match record. Match records now can decode themselves.
  42. const int SYMBOL_RANGE = 256; // Symbol result coding modulator.
  43. // Let's create our utility classes and structures.
  44. // The Token class.
  45. // This class represents the structure of a token. The rule file is, in fact,
  46. // a token matrix. Tokens within the matrix allow the sniffer to navigate through
  47. // a state change matrix attempting to locate special positions that indicate the
  48. // termination of a path, or more specifically, the recognition of a string that
  49. // has been evaluated along that path.
  50. //
  51. // IT IS IMPORTANT TO NOTE THAT AS THESE PROGRAMS ARE WRITTEN IT ASSUMES WE ARE IN
  52. // A 32 BIT INTEL ENVIRONMENT SO THAT THE TOKEN MATRIX CAN BE LOADED IN A SINGLE PASS
  53. // USING A BINARY INPUT STREAM.
  54. ////////////////////////////////////////////////////////////////////////////////////////
  55. // Token Declaration ///////////////////////////////////////////////////////////////////
  56. class Token { // Token class for defining and interpreting nodes within the matrix.
  57. public: // Beginning of Public stuff.
  58. int Check; // The first int is a check character.
  59. int Vector; // The second int is a vector.
  60. // isUnused() Returns true if the token is in an unused state.
  61. int isUnused() {
  62. return (Check==-1 && Vector==0) ? true : false;
  63. }
  64. // isTermination() Returns true if the token is in a termination state.
  65. int isTermination() {
  66. if(Check==0 && Vector > 0)
  67. return true;
  68. else
  69. return false;
  70. }
  71. // Symbol() Returns the symbol value for the token.
  72. int Symbol() { return Vector; }
  73. // Character() Returns the check character for this token.
  74. int Character() { return Check; }
  75. // End of Public stuff.
  76. // Note that no constructor is needed because the default constructor will do nicely.
  77. };
  78. ////////////////////////////////////////////////////////////////////////////////////////
  79. // Token Matrix Declaration ////////////////////////////////////////////////////////////
  80. ////////////////////////////////////////////////////////////////////////////////////////
  81. //
  82. // The Token Matrix loads, verifies, and maintains an array of tokens for the evaluators
  83. // to live in. This class provides safe access to the token matrix.
  84. //
  85. ////////////////////////////////////////////////////////////////////////////////////////
  86. class TokenMatrix {
  87. private:
  88. Token* Matrix; // Where we hold the token matrix.
  89. int MatrixSize; // What size is the matrix.
  90. public:
  91. // Exceptions...
  92. class BadAllocation : public runtime_error { // Exception for a bad memory allocation.
  93. public: BadAllocation(const string& w):runtime_error(w) {}
  94. };
  95. class BadMatrix : public runtime_error { // Exception for invalid matrix loads.
  96. public: BadMatrix(const string& w):runtime_error(w) {}
  97. };
  98. class BadFile : public runtime_error { // Exception for missing rulebase files.
  99. public: BadFile(const string& w):runtime_error(w) {}
  100. };
  101. class OutOfRange : public runtime_error { // Exception for indexes out of range.
  102. public: OutOfRange(const string& w):runtime_error(w) {}
  103. };
  104. // Standards...
  105. static const int SecuritySegmentSize = 1024; // File Authentication Segment
  106. static const int SecurityKeyBufferSize = 32; // Security Key Pad Block Size
  107. static const int RulebaseDigestSize = 64; // Number of bytes in digest.
  108. static const int MinimumValidMatrix = // Establish the smallest valid
  109. SecuritySegmentSize * 2 / SecurityKeyBufferSize; // matrix size
  110. // The first interface component checks the range and gives up the token.
  111. Token at(int x) { // Get the token at x
  112. if(x<0 || x>=MatrixSize) // Check to see if we're in bounds.
  113. throw OutOfRange("(x<0 || x>=MatrixSize)"); // If we're not then throw an exception.
  114. return Matrix[x]; // If we are then give it to them.
  115. }
  116. // The second interface component delivers the Matrix if it's valid so that other
  117. // code can manipulate it more efficiently (without constantly checking bounds.
  118. Token* getMatrix() { // Return the matrix.
  119. if(MatrixSize==0 || Matrix==NULL) // If the matrix isn't ready then
  120. throw BadMatrix("(MatrixSize==0 || Matrix==NULL)"); // throw an exception. If it is
  121. return Matrix; // ready then send it out.
  122. }
  123. // For simplicity we simply extend the underlying Token functions by taking a
  124. // position reference, checking it's range, and returning the result.
  125. int isUnused(int x) { // Extend Token.isUnused()
  126. return at(x).isUnused();
  127. }
  128. int isTermination(int x) { // Extend Token.isTermination()
  129. return at(x).isTermination();
  130. }
  131. int Symbol(int x) { // Exetend Token.Symbol()
  132. return at(x).Symbol();
  133. }
  134. int Character(int x) { // Extend Token.Character()
  135. return at(x).Character();
  136. }
  137. // Utility functions...
  138. int Size() { return MatrixSize; } // Returns the size of the matrix.
  139. void Load(const char* FileName); // Loads the matrix from a file name.
  140. void Load(string& FileName); // Loads the matrix from a file name string.
  141. void Load(ifstream& F); // Loads the token matrix from the file.
  142. void Validate(string& SecurityKey); // Validates the matrix with a key string.
  143. void Verify(string& SecurityKey); // Verifies the matrix digest.
  144. void FlipEndian(); // Converts big/little endian tokens.
  145. // Constructors...
  146. TokenMatrix() :
  147. Matrix(NULL),
  148. MatrixSize(0) { }
  149. TokenMatrix(ifstream& F) :
  150. Matrix(NULL),
  151. MatrixSize(0) {
  152. Load(F);
  153. }
  154. ~TokenMatrix() { // The Distructor...
  155. MatrixSize = 0; // Set the size to zero.
  156. if(Matrix) { delete [] Matrix; Matrix = NULL; } // If we have a matrix, remove it.
  157. }
  158. };
  159. /////////////////////////////////////////////////////////////////////////////////////////
  160. // End Token Work ///////////////////////////////////////////////////////////////////////
  161. /////////////////////////////////////////////////////////////////////////////////////////
  162. // Having defined the token matrix, I now define the Evaluator class which
  163. // be used to follow any matching rule threads as the program scans a a file.
  164. // A new evaluator is started at each position in the input stream making all
  165. // of the rules in the token matrix global.
  166. // The following two values are returned by the Evaluator at every step.
  167. const int WILD_WHITESPACE = 1; // Token code for whitespace wildcards.
  168. const int WILD_DIGIT = 2; // Token code for digit wildcards.
  169. const int WILD_LETTER = 3; // Token code for letter wildcards.
  170. const int WILD_NONWHITE = 4; // Token code for non-whitespace wildcards.
  171. const int WILD_ANYTHING = 5; // Token code for any character.
  172. const int WILD_INLINE = 6; // Token code for any character except new line.
  173. const int RUN_GATEWAY = 8; // Token code for run-loop gateways.
  174. // Here are some tuning parameters
  175. const int MaxWildRunLength = 4096; // Maximum span of "any number" wildcards.
  176. const int MAX_EVALS = 2048; // Maximum number of evaluators.
  177. //////////////////////////////////////////////////////////////////////////////////////////
  178. // Evaluators and the Evaluation Matrix
  179. //////////////////////////////////////////////////////////////////////////////////////////
  180. class EvaluationMatrix; // We've got to pre-declare this for some compilers.
  181. class Evaluator { // Evaluator class for following threads through the matrix.
  182. private:
  183. EvaluationMatrix* myEvaluationMatrix; // The evaluation matrix I live in.
  184. Token* Matrix; // The raw token matrix I walk in.
  185. int MatrixSize; // Size of raw token matrix.
  186. // 20070606 _M Optimized Evaluator code by reducing the strength of the
  187. // safety check from 3 comparisons to 1.
  188. unsigned int PositionLimit; // Largest CurrentPosition.
  189. // 20030216 _M Optimization conversions
  190. inline int i_lower(); // { return myEvaluationMatrix->i_lower; }
  191. inline bool i_isDigit(); // { return myEvaluationMatrix->i_isDigit; }
  192. inline bool i_isSpace(); // { return myEvaluationMatrix->i_isSpace; }
  193. inline bool i_isAlpha(); // { return myEvaluationMatrix->i_isAphpa; }
  194. public:
  195. // Standard Values...
  196. enum States { // These are the posible coditions.
  197. OUT_OF_RANGE, // We're outside the matrix - very bad.
  198. FALLEN_OFF, // We've fallen off the path and are lost.
  199. DOING_OK, // We're doing ok and following along.
  200. TERMINATED // We've reached the end of our path.
  201. };
  202. // Attributes...
  203. States Condition; // What state am I in? How's my health?
  204. Evaluator* NextEvaluator; // Linked List Pointer.
  205. unsigned int StreamStartPosition; // Indexes the position where we started.
  206. unsigned int CurrentPosition; // Indexes the node we are surfing.
  207. int WildRunLength; // Wildcard run length so far.
  208. // EvaluateThis() assumes it is being given the next character along the
  209. // path of a thread in the token matrix. It follows that thread and evaluates
  210. // it's condition.
  211. States EvaluateThis(unsigned short int i); // Follow the next byte.
  212. // isNoDuplicate() is used to keep us from allocating identical evaluators. This is
  213. // key to creating buddies when working with wildcards. It prevents us from recursively
  214. // proliferating evaluators at each new character when running in a wildcard loop.
  215. bool isNoDuplicate(unsigned int Position) { // Returns false if there is a duplicate.
  216. if(CurrentPosition == Position) // Obviously, if I match, then there's a dup.
  217. return false;
  218. // If I don't match and I'm the last one then
  219. if(NextEvaluator==NULL) // it must be true there are no dups. If there
  220. return true; // are more to ask then I'll let them answer.
  221. else
  222. return NextEvaluator->isNoDuplicate(Position);
  223. }
  224. Evaluator(unsigned int s, EvaluationMatrix* m); // Constructor...
  225. ~Evaluator(){
  226. if(NextEvaluator!=NULL){ // If there's more to this list then
  227. delete NextEvaluator; // delete it.
  228. }
  229. NextEvaluator = NULL; // Always null on exit.
  230. }
  231. };
  232. // A MatchRecord is created each time a new rule match occurrs. These records form a
  233. // linked list within the Evaluation Matrix that can be spit out after the process is
  234. // over for reporting purposes.
  235. class MatchRecord {
  236. public:
  237. int MatchStartPosition; // Where in the data stream did the match start?
  238. int MatchEndPosition; // Where in the data stream did the match end?
  239. int MatchSymbol; // What symbol was attached to the match rule?
  240. inline int RuleId(){return (MatchSymbol/SYMBOL_RANGE);} // Decode RuleID
  241. inline int RuleGroup(){return (MatchSymbol%SYMBOL_RANGE);} // Decode GroupID
  242. MatchRecord* NextMatchRecord;
  243. MatchRecord(int sp, int ep, int sym) { // When constructing a MatchRecord,
  244. MatchStartPosition = sp; // you must provide all of it's data.
  245. MatchEndPosition = ep;
  246. MatchSymbol = sym;
  247. // Since match records are always added to
  248. NextMatchRecord = NULL; // the end our next pointer is always NULL.
  249. }
  250. ~MatchRecord(){
  251. if(NextMatchRecord != NULL) // If there's more list, then delete it.
  252. delete NextMatchRecord;
  253. NextMatchRecord = NULL; // Clean up our pointer before leaving.
  254. }
  255. };
  256. // Now that we've created our utility classes, we'll create another class (with an instance)
  257. // that builds a matrix to evaluate all incoming characters, manage the list, and keeps
  258. // statistics and results from the execution process.
  259. class EvaluationMatrix {
  260. private:
  261. TokenMatrix* myTokenMatrix; // Token Matrix that I evaluate with.
  262. Evaluator* EvaluatorList; // Linked list of Evaluators.
  263. Evaluator* CurrentEvaluator; // Current Evaluator (when checking)
  264. Evaluator* PreviousEvaluator; // Previous Evaluator (when checking)
  265. // Evaluator Caching Mechanism.
  266. Evaluator* EvaluatorCache; // List of cached, ready evaluators.
  267. Evaluator* SourceEvaluator(int s, EvaluationMatrix* m); // Get a cached or new evaluator.
  268. void CacheEvaluator(Evaluator* e); // Cache a used evaluator.
  269. int CountOfEvaluators; // Current count of evaluators.
  270. int PassResult; // Result of the latest evaluation pass.
  271. MatchRecord* LastResultInList; // Keeps track of the end of the result list.
  272. MatchRecord* AddMatchRecord(int sp, int ep, int sym); // Add a match result.
  273. // DropEvaluator() is called by the EvaluateThis() method whenever an evaluator
  274. // reports the FALLEN_OFF result. The EvaluateThis() method keeps two values up
  275. // to date - one is the current evaluator (which will be dropped) and the other is
  276. // the previous evaluator (which will be updated to heal the list).
  277. // When we've finished this function, the CurrentEvaluator will be on the next
  278. // evaluator node if it exists. Therefore, the caller should skip it's normal
  279. // list itteration code when this function has been called.
  280. void DropEvaluator();
  281. public:
  282. // Exception classes...
  283. class BadAllocation : public runtime_error { // Allocation failed exception.
  284. public: BadAllocation(const string& w):runtime_error(w) {}
  285. };
  286. class MaxEvalsExceeded : public runtime_error { // Too many evaluators exception.
  287. public: MaxEvalsExceeded(const string& w):runtime_error(w) {}
  288. };
  289. class OutOfRange : public runtime_error { // Out of range exception.
  290. public: OutOfRange(const string& w):runtime_error(w) {}
  291. };
  292. // Attributes...
  293. int CountOfCharacters; // How many characters have been evaluated.
  294. int MaximumCountOfEvaluators; // Largest matrix size reached.
  295. MatchRecord* ResultList; // List of match results.
  296. int DeepSwitch; // true if we're doing a deep scans.
  297. // 20030216 _M High Level Conversion Optimizers...
  298. int i_lower; // Lower case version of byte under test.
  299. bool i_isDigit; // true if i is a digit.
  300. bool i_isSpace; // true if i is whitespace.
  301. bool i_isAlpha; // true if i is alpha.
  302. // AddEvaluator() is made public because the Evaluator object must have access
  303. // to it in order to handle the creation of buddies as it evaluates it's rules.
  304. // Similarly the getTokens is public because evaluators must use this when they
  305. // initialize. In a later version we will clean this up so that all of this stuff
  306. // can be handled somewhat more privately.
  307. Token* getTokens() { // Deliver the raw token matrix
  308. return myTokenMatrix->getMatrix(); // for use when creating evaluators.
  309. }
  310. int getMatrixSize() { // Deliver the raw matrix size
  311. return myTokenMatrix->Size(); // for use when creating evaluators.
  312. }
  313. Evaluator* AddEvaluator(int s, unsigned int m); // Adds a new evaluator to the top.
  314. Evaluator* InsEvaluator(int s, unsigned int m); // Inserts a new evaluator after the
  315. // current evaluator. (Only called by
  316. // an existing evaluator in process...)
  317. // isNoDuplicate(int p) checks for duplicate evaulators
  318. bool isNoDuplicate(unsigned int p) { // If there's no list there can be no
  319. if(EvaluatorList == NULL) // duplicates so we're true. If there is
  320. return true; // a list then we'll let the list answer.
  321. else
  322. return EvaluatorList->isNoDuplicate(p);
  323. }
  324. // EvaluateThis() Moves each evaluator with the current character and creates a new
  325. // evaluator for the current spot in the input file to make all rules global.
  326. int EvaluateThis(unsigned short int i);
  327. EvaluationMatrix(TokenMatrix* m) { // Constructor w/ pointer to Token Matrix...
  328. myTokenMatrix = m; // Grab my TokenMatrix.
  329. EvaluatorList = NULL; // Start off with no evaluators.
  330. EvaluatorCache = NULL; // Start off with no evaluator cache.
  331. CurrentEvaluator = NULL; // NULL means starting at the top.
  332. PreviousEvaluator = NULL; // NULL means previous is the top.
  333. ResultList = NULL; // Start off with no results in our list.
  334. LastResultInList = NULL;
  335. CountOfCharacters = 0; // The count of characters will be zero and
  336. MaximumCountOfEvaluators = 0; // the maximum Evaluator count will be zero
  337. CountOfEvaluators = 0; // and the current count will also be zero.
  338. PassResult = 0; // Initialize expecting no matches.
  339. }
  340. ~EvaluationMatrix(){ // Destructor to clean up memory allocations.
  341. myTokenMatrix = NULL; // Stop pointing at the TokenMatrix
  342. // Both of these lists konw how to delete themselves.
  343. // 20060531_M Fixed possible crash by checking for NULL before
  344. // deleting these lists. Also added cleanup for the EvaluatorCache.
  345. if(NULL!=EvaluatorCache) {
  346. delete EvaluatorCache; // Delete the evaluator cache.
  347. EvaluatorCache = NULL; // Then clear it's pointer.
  348. }
  349. if(NULL!=EvaluatorList) {
  350. delete EvaluatorList; // Delete the evaluator list.
  351. EvaluatorList = NULL; // Then clear it's pointer.
  352. }
  353. if(NULL!=ResultList) {
  354. delete ResultList; // Delete the result list.
  355. ResultList = NULL; // Then clear it's pointer.
  356. }
  357. }
  358. };
  359. // 20060531_M Implementation of the evaluator cache is all inline.
  360. // In place of new Evaluator() we now can use SourceEvaluator()
  361. // In place of delete Evaluator() we now can use CacheEvaluator()
  362. // The effect is to store previously allocaed evaluators in the EvaluatorCache
  363. // list so that they can be reused. This avoids the frequen use of
  364. // new and delete and allows us to skip a few extra cycles for initialization
  365. // because much of the constructor work for a new evaluator is already done
  366. // in any cached evaluator.
  367. //
  368. // In practice, at least one evaluator is likely to be created and destroyed
  369. // for each byte that is scanned. This new mechanism significantly reduces the
  370. // number of cycles that would normally be associated with those operations by
  371. // eliminating them most of the time. Instead of returning used memory to the
  372. // heap during delete, the evaulator is simply added to the cache list. Instead
  373. // of allocating new space from the heap and initializing the object, a chached
  374. // evaluator is simply moved from the cache into production. Moving into and
  375. // out of the cache is roughly as simple as changing a couple of pointers.
  376. // In place of new Evaluator, we do this...
  377. inline Evaluator* EvaluationMatrix::SourceEvaluator(int s, EvaluationMatrix* m) { // Get a cached or new evaluator.
  378. if(NULL==EvaluatorCache) return new Evaluator(s,m); // If we have no cache, use new!
  379. Evaluator* reuse = EvaluatorCache; // Otherwise grab a reusable one.
  380. EvaluatorCache = reuse->NextEvaluator; // Collaps the cache by one.
  381. reuse->NextEvaluator = NULL; // Clean it up a bit.
  382. reuse->StreamStartPosition = s; // Record our starting point.
  383. reuse->CurrentPosition = 0; // Reset the Current Position.
  384. reuse->WildRunLength = 0; // Reset the run length.
  385. reuse->Condition = Evaluator::DOING_OK; // Reset the condition.
  386. return reuse; // Return the reusable unit.
  387. }
  388. // In place of delete Evaluator, we do this...
  389. inline void EvaluationMatrix::CacheEvaluator(Evaluator* e) { // Cache a used evaluator.
  390. e->NextEvaluator = EvaluatorCache; // Link the used evaluator
  391. EvaluatorCache = e; // into the cache;
  392. }
  393. // In the above, the first evaluator added will get NULL as it's NextEvaluator.
  394. // When that first evaulator is used, the NULL pointer will return to the root
  395. // of the EvaluatorCache list. In this regard the cache acts like a stack.
  396. #endif