Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

snf_engine.hpp 28KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577
  1. // snf_engine.hpp
  2. //
  3. // (C) 1985-2004 MicroNeil Research Corporation
  4. // (C) 2005-2009 ARM Research Labs, LLC.
  5. //
  6. // Derived from original work on cellular automation for complex pattern
  7. // reflex engine 1985 Pete McNeil (Madscientist)
  8. //
  9. // Derived from rapid scripting engine (token matrix) implementation 1987
  10. //
  11. // This is the header file for the sniffer pattern matching engine.
  12. // 20080305 _M - Added FlipEndian() function to convert rulebases from their
  13. // native little-endian format to big-endian format for CPUs that need it. See
  14. // additional work in SNFMulti to call the FlipEndian() function AFTER the
  15. // rulebase has been authenticated but before it is put into use.
  16. // 20070606 _M - Refactored exceptions to use base std::exception and improved
  17. // the evaluator code to reduce the strength of safety testing from 3 compares
  18. // per byte to 1.
  19. // 20060531 _M - Added evaluator caching to save a few cycles by not allocating
  20. // new memory and performing a complete initialization of an evaluator if there
  21. // is already one handy from a previous use.
  22. // 20021030 _M - Created.
  23. #pragma once
  24. #include <cassert>
  25. #include <stdexcept>
  26. #include <unistd.h>
  27. #include <cstdio>
  28. #include <cctype>
  29. #include <ctime>
  30. #include <cstdlib>
  31. #include <fstream>
  32. #include <iostream>
  33. #include <string>
  34. #include <vector>
  35. #include <exception>
  36. #include "../CodeDweller/faults.hpp"
  37. #include "../CodeDweller/mangler.hpp"
  38. //#include "../nvwa-0.6/nvwa/debug_new.h"
  39. namespace codedweller {
  40. // 20030929 _M SYMBOL_RANGE moved to snf_engine.hpp as part of augmenting the
  41. // capability of a match record. Match records now can decode themselves.
  42. const int SYMBOL_RANGE = 256; // Symbol result coding modulator.
  43. // Let's create our utility classes and structures.
  44. // The Token class.
  45. // This class represents the structure of a token. The rule file is, in fact,
  46. // a token matrix. Tokens within the matrix allow the sniffer to navigate through
  47. // a state change matrix attempting to locate special positions that indicate the
  48. // termination of a path, or more specifically, the recognition of a string that
  49. // has been evaluated along that path.
  50. //
  51. // IT IS IMPORTANT TO NOTE THAT AS THESE PROGRAMS ARE WRITTEN IT ASSUMES WE ARE IN
  52. // A 32 BIT INTEL ENVIRONMENT SO THAT THE TOKEN MATRIX CAN BE LOADED IN A SINGLE PASS
  53. // USING A BINARY INPUT STREAM.
  54. ////////////////////////////////////////////////////////////////////////////////////////
  55. // Token Declaration ///////////////////////////////////////////////////////////////////
  56. class Token { // Token class for defining and interpreting nodes within the matrix.
  57. public: // Beginning of Public stuff.
  58. int Check; // The first int is a check character.
  59. int Vector; // The second int is a vector.
  60. // isUnused() Returns true if the token is in an unused state.
  61. int isUnused() {
  62. return (Check==-1 && Vector==0) ? true : false;
  63. }
  64. // isTermination() Returns true if the token is in a termination state.
  65. int isTermination() {
  66. if(Check==0 && Vector > 0)
  67. return true;
  68. else
  69. return false;
  70. }
  71. // Symbol() Returns the symbol value for the token.
  72. int Symbol() { return Vector; }
  73. // Character() Returns the check character for this token.
  74. int Character() { return Check; }
  75. // End of Public stuff.
  76. // Note that no constructor is needed because the default constructor will do nicely.
  77. };
  78. ////////////////////////////////////////////////////////////////////////////////////////
  79. // Token Matrix Declaration ////////////////////////////////////////////////////////////
  80. ////////////////////////////////////////////////////////////////////////////////////////
  81. //
  82. // The Token Matrix loads, verifies, and maintains an array of tokens for the evaluators
  83. // to live in. This class provides safe access to the token matrix.
  84. //
  85. ////////////////////////////////////////////////////////////////////////////////////////
  86. class TokenMatrix {
  87. private:
  88. Token* Matrix; // Where we hold the token matrix.
  89. int MatrixSize; // What size is the matrix.
  90. public:
  91. // Exceptions...
  92. class BadAllocation : public std::runtime_error { // Exception for a bad memory allocation.
  93. public: BadAllocation(const std::string& w):runtime_error(w) {}
  94. };
  95. class BadMatrix : public std::runtime_error { // Exception for invalid matrix loads.
  96. public: BadMatrix(const std::string& w):runtime_error(w) {}
  97. };
  98. class BadFile : public std::runtime_error { // Exception for missing rulebase files.
  99. public: BadFile(const std::string& w):runtime_error(w) {}
  100. };
  101. class OutOfRange : public std::runtime_error { // Exception for indexes out of range.
  102. public: OutOfRange(const std::string& w):runtime_error(w) {}
  103. };
  104. // Standards...
  105. static const int SecuritySegmentSize = 1024; // File Authentication Segment
  106. static const int SecurityKeyBufferSize = 32; // Security Key Pad Block Size
  107. static const int RulebaseDigestSize = 64; // Number of bytes in digest.
  108. static const int MinimumValidMatrix = // Establish the smallest valid
  109. SecuritySegmentSize * 2 / SecurityKeyBufferSize; // matrix size
  110. // The first interface component checks the range and gives up the token.
  111. Token at(int x) { // Get the token at x
  112. if(x<0 || x>=MatrixSize) // Check to see if we're in bounds.
  113. throw OutOfRange("(x<0 || x>=MatrixSize)"); // If we're not then throw an exception.
  114. return Matrix[x]; // If we are then give it to them.
  115. }
  116. // The second interface component delivers the Matrix if it's valid so that other
  117. // code can manipulate it more efficiently (without constantly checking bounds.
  118. Token* getMatrix() { // Return the matrix.
  119. if(MatrixSize==0 || Matrix==NULL) // If the matrix isn't ready then
  120. throw BadMatrix("(MatrixSize==0 || Matrix==NULL)"); // throw an exception. If it is
  121. return Matrix; // ready then send it out.
  122. }
  123. // For simplicity we simply extend the underlying Token functions by taking a
  124. // position reference, checking it's range, and returning the result.
  125. int isUnused(int x) { // Extend Token.isUnused()
  126. return at(x).isUnused();
  127. }
  128. int isTermination(int x) { // Extend Token.isTermination()
  129. return at(x).isTermination();
  130. }
  131. int Symbol(int x) { // Exetend Token.Symbol()
  132. return at(x).Symbol();
  133. }
  134. int Character(int x) { // Extend Token.Character()
  135. return at(x).Character();
  136. }
  137. // Utility functions...
  138. int Size() { return MatrixSize; } // Returns the size of the matrix.
  139. void Load(const char* FileName); // Loads the matrix from a file name.
  140. void Load(std::string& FileName); // Loads the matrix from a file name string.
  141. void Load(std::ifstream& F); // Loads the token matrix from the file.
  142. void Validate(std::string& SecurityKey); // Validates the matrix with a key string.
  143. void Verify(std::string& SecurityKey); // Verifies the matrix digest.
  144. void FlipEndian(); // Converts big/little endian tokens.
  145. // Constructors...
  146. TokenMatrix() :
  147. Matrix(NULL),
  148. MatrixSize(0) { }
  149. TokenMatrix(std::ifstream& F) :
  150. Matrix(NULL),
  151. MatrixSize(0) {
  152. Load(F);
  153. }
  154. ~TokenMatrix() { // The Distructor...
  155. MatrixSize = 0; // Set the size to zero.
  156. if(Matrix) { delete [] Matrix; Matrix = NULL; } // If we have a matrix, remove it.
  157. }
  158. };
  159. /////////////////////////////////////////////////////////////////////////////////////////
  160. // End Token Work ///////////////////////////////////////////////////////////////////////
  161. /////////////////////////////////////////////////////////////////////////////////////////
  162. // Having defined the token matrix, I now define the Evaluator class which
  163. // be used to follow any matching rule threads as the program scans a a file.
  164. // A new evaluator is started at each position in the input stream making all
  165. // of the rules in the token matrix global.
  166. // The following two values are returned by the Evaluator at every step.
  167. const int WILD_WHITESPACE = 1; // Token code for whitespace wildcards.
  168. const int WILD_DIGIT = 2; // Token code for digit wildcards.
  169. const int WILD_LETTER = 3; // Token code for letter wildcards.
  170. const int WILD_NONWHITE = 4; // Token code for non-whitespace wildcards.
  171. const int WILD_ANYTHING = 5; // Token code for any character.
  172. const int WILD_INLINE = 6; // Token code for any character except new line.
  173. const int RUN_GATEWAY = 8; // Token code for run-loop gateways.
  174. // Here are some tuning parameters
  175. const int MaxWildRunLength = 4096; // Maximum span of "any number" wildcards.
  176. const int MAX_EVALS = 2048; // Maximum number of evaluators.
  177. //////////////////////////////////////////////////////////////////////////////////////////
  178. // Evaluators and the Evaluation Matrix
  179. //////////////////////////////////////////////////////////////////////////////////////////
  180. class EvaluationMatrix; // We've got to pre-declare this for some compilers.
  181. class Evaluator { // Evaluator class for following threads through the matrix.
  182. private:
  183. EvaluationMatrix* myEvaluationMatrix; // The evaluation matrix I live in.
  184. Token* Matrix; // The raw token matrix I walk in.
  185. int MatrixSize; // Size of raw token matrix.
  186. // 20070606 _M Optimized Evaluator code by reducing the strength of the
  187. // safety check from 3 comparisons to 1.
  188. unsigned int PositionLimit; // Largest CurrentPosition.
  189. // 20030216 _M Optimization conversions
  190. // 20140119 _M Deprecated by jump table in evaluator
  191. // inline int i_lower(); // { return myEvaluationMatrix->i_lower; }
  192. // inline bool i_isDigit(); // { return myEvaluationMatrix->i_isDigit; }
  193. // inline bool i_isSpace(); // { return myEvaluationMatrix->i_isSpace; }
  194. // inline bool i_isAlpha(); // { return myEvaluationMatrix->i_isAphpa; }
  195. unsigned int JumpPoint;
  196. int xLetter(); // Match Any letter.
  197. int xDigit(); // Match Any digit.
  198. int xNonWhite(); // Match Any non-whitespace.
  199. int xWhiteSpace(); // Match Any whitespace.
  200. int xAnyInline(); // Match Any byte but new line.
  201. int xAnything(); // Match Any character at all.
  202. int xRunGateway(); // Match the run-loop gateway.
  203. void doFollowOrMakeBuddy(int keyVector); // Follow and divide algorithm.
  204. void tryFollowingPrecisePath(unsigned short int i);
  205. void tryFollowingNoCasePath(unsigned short int i);
  206. void tryFollowingWildAlphaPath();
  207. void tryFollowingWildDigitPath();
  208. void tryFollowingWildNonWhitePath();
  209. void tryFollowingWildWhitePath();
  210. void tryFollowingWildInlinePath();
  211. void tryFollowingWildAnythingPath();
  212. void doFollowerJumpTable(unsigned short int i);
  213. public:
  214. // Standard Values...
  215. enum States { // These are the posible coditions.
  216. OUT_OF_RANGE, // We're outside the matrix - very bad.
  217. FALLEN_OFF, // We've fallen off the path and are lost.
  218. DOING_OK, // We're doing ok and following along.
  219. TERMINATED // We've reached the end of our path.
  220. };
  221. // Attributes...
  222. States Condition; // What state am I in? How's my health?
  223. Evaluator* NextEvaluator; // Linked List Pointer.
  224. unsigned int StreamStartPosition; // Indexes the position where we started.
  225. unsigned int CurrentPosition; // Indexes the node we are surfing.
  226. int WildRunLength; // Wildcard run length so far.
  227. // EvaluateThis() assumes it is being given the next character along the
  228. // path of a thread in the token matrix. It follows that thread and evaluates
  229. // it's condition.
  230. States EvaluateThis(unsigned short int i); // Follow the next byte.
  231. // isNoDuplicate() is used to keep us from allocating identical evaluators. This is
  232. // key to creating buddies when working with wildcards. It prevents us from recursively
  233. // proliferating evaluators at each new character when running in a wildcard loop.
  234. bool isNoDuplicate(unsigned int Position) { // Returns false if there is a duplicate.
  235. if(CurrentPosition == Position) // Obviously, if I match, then there's a dup.
  236. return false;
  237. // If I don't match and I'm the last one then
  238. if(NextEvaluator==NULL) // it must be true there are no dups. If there
  239. return true; // are more to ask then I'll let them answer.
  240. else
  241. return NextEvaluator->isNoDuplicate(Position);
  242. }
  243. Evaluator(unsigned int s, EvaluationMatrix* m); // Constructor...
  244. ~Evaluator(){
  245. if(NextEvaluator!=NULL){ // If there's more to this list then
  246. delete NextEvaluator; // delete it.
  247. }
  248. NextEvaluator = NULL; // Always null on exit.
  249. }
  250. };
  251. // A MatchRecord is created each time a new rule match occurrs. These records form a
  252. // linked list within the Evaluation Matrix that can be spit out after the process is
  253. // over for reporting purposes.
  254. class MatchRecord {
  255. public:
  256. int MatchStartPosition; // Where in the data stream did the match start?
  257. int MatchEndPosition; // Where in the data stream did the match end?
  258. int MatchSymbol; // What symbol was attached to the match rule?
  259. inline int RuleId(){return (MatchSymbol/SYMBOL_RANGE);} // Decode RuleID
  260. inline int RuleGroup(){return (MatchSymbol%SYMBOL_RANGE);} // Decode GroupID
  261. MatchRecord* NextMatchRecord;
  262. MatchRecord(int sp, int ep, int sym) { // When constructing a MatchRecord,
  263. MatchStartPosition = sp; // you must provide all of it's data.
  264. MatchEndPosition = ep;
  265. MatchSymbol = sym;
  266. // Since match records are always added to
  267. NextMatchRecord = NULL; // the end our next pointer is always NULL.
  268. }
  269. ~MatchRecord(){
  270. if(NextMatchRecord != NULL) // If there's more list, then delete it.
  271. delete NextMatchRecord;
  272. NextMatchRecord = NULL; // Clean up our pointer before leaving.
  273. }
  274. };
  275. // Now that we've created our utility classes, we'll create another class (with an instance)
  276. // that builds a matrix to evaluate all incoming characters, manage the list, and keeps
  277. // statistics and results from the execution process.
  278. class EvaluationMatrix {
  279. private:
  280. TokenMatrix* myTokenMatrix; // Token Matrix that I evaluate with.
  281. Evaluator* EvaluatorList; // Linked list of Evaluators.
  282. Evaluator* CurrentEvaluator; // Current Evaluator (when checking)
  283. Evaluator* PreviousEvaluator; // Previous Evaluator (when checking)
  284. // Evaluator Caching Mechanism.
  285. Evaluator* EvaluatorCache; // List of cached, ready evaluators.
  286. Evaluator* SourceEvaluator(int s, EvaluationMatrix* m); // Get a cached or new evaluator.
  287. void CacheEvaluator(Evaluator* e); // Cache a used evaluator.
  288. int CountOfEvaluators; // Current count of evaluators.
  289. int PassResult; // Result of the latest evaluation pass.
  290. MatchRecord* LastResultInList; // Keeps track of the end of the result list.
  291. MatchRecord* AddMatchRecord(int sp, int ep, int sym); // Add a match result.
  292. // DropEvaluator() is called by the EvaluateThis() method whenever an evaluator
  293. // reports the FALLEN_OFF result. The EvaluateThis() method keeps two values up
  294. // to date - one is the current evaluator (which will be dropped) and the other is
  295. // the previous evaluator (which will be updated to heal the list).
  296. // When we've finished this function, the CurrentEvaluator will be on the next
  297. // evaluator node if it exists. Therefore, the caller should skip it's normal
  298. // list itteration code when this function has been called.
  299. void DropEvaluator();
  300. void dropAllEvaluators();
  301. public:
  302. // Exception classes...
  303. class BadAllocation : public std::runtime_error { // Allocation failed exception.
  304. public: BadAllocation(const std::string& w):runtime_error(w) {}
  305. };
  306. class MaxEvalsExceeded : public std::runtime_error { // Too many evaluators exception.
  307. public: MaxEvalsExceeded(const std::string& w):runtime_error(w) {}
  308. };
  309. class OutOfRange : public std::runtime_error { // Out of range exception.
  310. public: OutOfRange(const std::string& w):runtime_error(w) {}
  311. };
  312. // Attributes...
  313. int CountOfCharacters; // How many characters have been evaluated.
  314. int MaximumCountOfEvaluators; // Largest matrix size reached.
  315. MatchRecord* ResultList; // List of match results.
  316. int DeepSwitch; // true if we're doing a deep scans.
  317. // 20030216 _M High Level Conversion Optimizers...
  318. // 20140119 _M Deprecated by jump table in evaluator
  319. // int i_lower; // Lower case version of byte under test.
  320. // bool i_isDigit; // true if i is a digit.
  321. // bool i_isSpace; // true if i is whitespace.
  322. // bool i_isAlpha; // true if i is alpha.
  323. // AddEvaluator() is made public because the Evaluator object must have access
  324. // to it in order to handle the creation of buddies as it evaluates it's rules.
  325. // Similarly the getTokens is public because evaluators must use this when they
  326. // initialize. In a later version we will clean this up so that all of this stuff
  327. // can be handled somewhat more privately.
  328. Token* getTokens() { // Deliver the raw token matrix
  329. return myTokenMatrix->getMatrix(); // for use when creating evaluators.
  330. }
  331. int getMatrixSize() { // Deliver the raw matrix size
  332. return myTokenMatrix->Size(); // for use when creating evaluators.
  333. }
  334. Evaluator* AddEvaluator(int s, unsigned int m); // Adds a new evaluator to the top.
  335. Evaluator* InsEvaluator(int s, unsigned int m); // Inserts a new evaluator after the
  336. // current evaluator. (Only called by
  337. // an existing evaluator in process...)
  338. // isNoDuplicate(int p) checks for duplicate evaulators
  339. bool isNoDuplicate(unsigned int p) { // If there's no list there can be no
  340. if(EvaluatorList == NULL) // duplicates so we're true. If there is
  341. return true; // a list then we'll let the list answer.
  342. else
  343. return EvaluatorList->isNoDuplicate(p);
  344. }
  345. // EvaluateThis() Moves each evaluator with the current character and creates a new
  346. // evaluator for the current spot in the input file to make all rules global.
  347. int EvaluateThis(unsigned short int i);
  348. void evaluateSegment(std::vector<unsigned char>& data, unsigned int start, unsigned int finish);
  349. void restartEngineAt(int newCharacterCount);
  350. EvaluationMatrix(TokenMatrix* m) { // Constructor w/ pointer to Token Matrix...
  351. myTokenMatrix = m; // Grab my TokenMatrix.
  352. EvaluatorList = NULL; // Start off with no evaluators.
  353. EvaluatorCache = NULL; // Start off with no evaluator cache.
  354. CurrentEvaluator = NULL; // NULL means starting at the top.
  355. PreviousEvaluator = NULL; // NULL means previous is the top.
  356. ResultList = NULL; // Start off with no results in our list.
  357. LastResultInList = NULL;
  358. CountOfCharacters = 0; // The count of characters will be zero and
  359. MaximumCountOfEvaluators = 0; // the maximum Evaluator count will be zero
  360. CountOfEvaluators = 0; // and the current count will also be zero.
  361. PassResult = 0; // Initialize expecting no matches.
  362. }
  363. ~EvaluationMatrix(){ // Destructor to clean up memory allocations.
  364. myTokenMatrix = NULL; // Stop pointing at the TokenMatrix
  365. // Both of these lists konw how to delete themselves.
  366. // 20060531_M Fixed possible crash by checking for NULL before
  367. // deleting these lists. Also added cleanup for the EvaluatorCache.
  368. if(NULL!=EvaluatorCache) {
  369. delete EvaluatorCache; // Delete the evaluator cache.
  370. EvaluatorCache = NULL; // Then clear it's pointer.
  371. }
  372. if(NULL!=EvaluatorList) {
  373. delete EvaluatorList; // Delete the evaluator list.
  374. EvaluatorList = NULL; // Then clear it's pointer.
  375. }
  376. if(NULL!=ResultList) {
  377. delete ResultList; // Delete the result list.
  378. ResultList = NULL; // Then clear it's pointer.
  379. }
  380. }
  381. };
  382. // 20060531_M Implementation of the evaluator cache is all inline.
  383. // In place of new Evaluator() we now can use SourceEvaluator()
  384. // In place of delete Evaluator() we now can use CacheEvaluator()
  385. // The effect is to store previously allocaed evaluators in the EvaluatorCache
  386. // list so that they can be reused. This avoids the frequen use of
  387. // new and delete and allows us to skip a few extra cycles for initialization
  388. // because much of the constructor work for a new evaluator is already done
  389. // in any cached evaluator.
  390. //
  391. // In practice, at least one evaluator is likely to be created and destroyed
  392. // for each byte that is scanned. This new mechanism significantly reduces the
  393. // number of cycles that would normally be associated with those operations by
  394. // eliminating them most of the time. Instead of returning used memory to the
  395. // heap during delete, the evaulator is simply added to the cache list. Instead
  396. // of allocating new space from the heap and initializing the object, a chached
  397. // evaluator is simply moved from the cache into production. Moving into and
  398. // out of the cache is roughly as simple as changing a couple of pointers.
  399. // In place of new Evaluator, we do this...
  400. inline Evaluator* EvaluationMatrix::SourceEvaluator(int s, EvaluationMatrix* m) { // Get a cached or new evaluator.
  401. if(NULL==EvaluatorCache) return new Evaluator(s,m); // If we have no cache, use new!
  402. Evaluator* reuse = EvaluatorCache; // Otherwise grab a reusable one.
  403. EvaluatorCache = reuse->NextEvaluator; // Collaps the cache by one.
  404. reuse->NextEvaluator = NULL; // Clean it up a bit.
  405. reuse->StreamStartPosition = s; // Record our starting point.
  406. reuse->CurrentPosition = 0; // Reset the Current Position.
  407. reuse->WildRunLength = 0; // Reset the run length.
  408. reuse->Condition = Evaluator::DOING_OK; // Reset the condition.
  409. return reuse; // Return the reusable unit.
  410. }
  411. // In place of delete Evaluator, we do this...
  412. inline void EvaluationMatrix::CacheEvaluator(Evaluator* e) { // Cache a used evaluator.
  413. e->NextEvaluator = EvaluatorCache; // Link the used evaluator
  414. EvaluatorCache = e; // into the cache;
  415. }
  416. // In the above, the first evaluator added will get NULL as it's NextEvaluator.
  417. // When that first evaulator is used, the NULL pointer will return to the root
  418. // of the EvaluatorCache list. In this regard the cache acts like a stack.
  419. } // End namespace codedweller