You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. // GBUdb.hpp
  2. //
  3. // (C) Copyright 2006 - 2009 ARM Research Labs, LLC
  4. // See www.armresearch.com for the copyright terms.
  5. //
  6. // Good, Bad, Ugly, Ignore IP database engine.
  7. ////////////////////////////////////////////////////////////////////////////////
  8. // Include M_GBUdb Only Once
  9. #pragma once
  10. #include "../CodeDweller/faults.hpp"
  11. #ifdef WIN32
  12. // Required because threading.hpp includes windows.h.
  13. #include <winsock2.h>
  14. #endif
  15. #include "../CodeDweller/threading.hpp"
  16. #include <cmath>
  17. #include <cctype>
  18. #include <string>
  19. #include <sstream>
  20. #include <list>
  21. #include <cstdlib>
  22. #include <ctime>
  23. namespace cd = codedweller;
  24. const unsigned int GBUdbFlagsMask = 0xC0000000; // Top 2 bits are the flag.
  25. const unsigned int GBUdbIgnore = 0xC0000000; // Ignore is the 11 flag.
  26. const unsigned int GBUdbUgly = 0x00000000; // Ugly/Unknown is the 00 flag.
  27. const unsigned int GBUdbGood = 0x80000000; // Good is the 10 flag.
  28. const unsigned int GBUdbBad = 0x40000000; // Bad is the 01 flag.
  29. const unsigned int GBUdbGoodMask = 0x3FFF8000; // The good count is masked in this range.
  30. const unsigned int GBUdbBadMask = 0x00007FFF; // Tha bad count is masked here.
  31. const unsigned int GBUdbLimit = GBUdbBadMask; // When a count hits this, normalize in half.
  32. const unsigned int GBUdbGoodShift = 15; // Shift good counts this many bits.
  33. const unsigned int GBUdbMatchEntryBit = 0x80000000; // Match entry Index bit.
  34. const unsigned int GBUdbMatchUnusedBit = 0x40000000; // Unalocated Match entry Index bit.
  35. const unsigned int GBUdbMatchDataMask = 0x3fffffff; // IP Match data mask.
  36. enum GBUdbFlag { // A type for the GBUdb flag.
  37. Ignore = GBUdbIgnore, // Ignore
  38. Ugly = GBUdbUgly, // Ugly
  39. Good = GBUdbGood, // Good
  40. Bad = GBUdbBad // Bad
  41. };
  42. //// GBUdbLocking semantics
  43. //// When doForAllRecords() is called at the GBUdb level, we need to know how
  44. //// the GBUdb mutex should be handled.
  45. enum GBUdbLocking { // A type that describes locking semantics.
  46. Dataset, // Lock the through the entire operation.
  47. Record, // Lock and unlock for each record.
  48. None // Do not lock.
  49. };
  50. typedef unsigned int GBUdbIndex; // A type for Index values from records.
  51. const GBUdbIndex GBUdbUnknown = 0x00000000; // The unknown address.
  52. const int GBUdbRecordsPerNode = 256; // Records per node.
  53. const int GBUdbDefaultGrowNodes = 8192; // Default Nodes to grow.
  54. const int GBUdbDefaultArraySize = GBUdbRecordsPerNode * GBUdbDefaultGrowNodes; // Default initial Array size.
  55. const int GBUdbRootNodeOffset = 256; // First indexing node after node 0.
  56. const int GBUdbGrowthThreshold = 4; // Time to grow at this # free nodes.
  57. //// Node 0 is the go-nowhere node for when things fall off the index so it
  58. //// is coded to all GBUdbUnknown.
  59. //// The last node in the array is used for global statistics & allocation
  60. //// tables.
  61. const int GBUdbControlNodeOffset = -256; // Offset from end of data for control node.
  62. const int GBUdbNextFreeNodeOffset = GBUdbControlNodeOffset + 0; // Offset for next free node index.
  63. const int GBUdbMatchListOffset = GBUdbControlNodeOffset +1; // Offset for Match record allocation root.
  64. const int GBUdbIPCountOffset = GBUdbControlNodeOffset + 2; // Offset for count of IPs in GBUdb.
  65. // GBUdbRecord converts an ordinary unsigned long integer into a wealth of
  66. // useful information just by adding a collection of useful tools.
  67. class GBUdbRecord { // A GBUdb record is really just a
  68. public: // long integer, but it can be interpreted
  69. // lots of ways.
  70. unsigned int RawData; // The raw unsigned int goes here.
  71. GBUdbRecord(); // Initialize to zero.
  72. GBUdbFlag Flag(); // This returns the flag.
  73. GBUdbFlag Flag(GBUdbFlag f); // This sets and returns the flag.
  74. unsigned int Good(); // This returns the good count.
  75. unsigned int Good(unsigned int g); // This sets and returns the good count.
  76. unsigned int Bad(); // This returns the bad count.
  77. unsigned int Bad(unsigned int b); // This sets and returns the bad count.
  78. unsigned int addGood(unsigned int g = 1); // This increments the good count.
  79. unsigned int addBad(unsigned int b = 1); // This increments the bad count.
  80. GBUdbRecord& integrate(GBUdbRecord& A, int LocalWeight, int RemoteWeight); // This integrates another record.
  81. GBUdbIndex Index(); // This returns the record as an Index.
  82. GBUdbIndex Index(GBUdbIndex i); // This sets the record as an index.
  83. double Probability(); // Return +(bad) or -(good) probability.
  84. double Confidence(); // Return the confidence based on samples.
  85. };
  86. // Special events need to be recorded. For that job we have GBUdbAlerts
  87. const int UTCBufferSize = 16; // C string buffer size for UTC stamp.
  88. class GBUdbAlert {
  89. public:
  90. GBUdbAlert(); // Constructor sets timestamp & nulls.
  91. char UTC[UTCBufferSize]; // Time stamp for this alert.
  92. unsigned int IP; // IP for this alert.
  93. GBUdbRecord R; // GBUdbRecord for this alert.
  94. std::string toXML(); // Convert to an xml representation.
  95. };
  96. // Mass update kinds of operations are handled by providing a functor
  97. // of the type GBUdbOperator to the method doForAllRecords(). The functor is
  98. // called with every record in the GBUdb.
  99. //// Here is the virtual GBUdb Operator class.
  100. class GBUdbOperator {
  101. public:
  102. virtual GBUdbRecord& operator()(unsigned int IP, GBUdbRecord& R) = 0;
  103. };
  104. // GBUdbDataset manages a large array of GBUdb records and nodes. Nodes are
  105. // simulated data structures -- essentially arrays of GBUdbRecords that are
  106. // interpreted as Indexes so that each byte of a particular IP can be used
  107. // to follow the index through the tree to the final record that actually
  108. // represents the IPs data.
  109. // The last few records in the array are used to keep track of some basic
  110. // statistics including where the next node will come from. As with the GBUdb
  111. // record itself, it's all in how the data is interpreted. Using this strategy
  112. // of converting plain-old integers into various data types on the fly allows
  113. // us to allocate the entire structure as a single block and avoid much
  114. // page swapping behind the scenes.
  115. class GBUdbDataset {
  116. private:
  117. GBUdbRecord* DataArray; // Array of GBUdbRecords, nodes, etc.
  118. int MyArraySize; // The size of the array in records.
  119. std::string MyFileName; // CString for the file name.
  120. GBUdbIndex ixIPCount(); // Index of the IP count for this db.
  121. GBUdbIndex ixNextFreeNode(); // Index of the Next Free Node Index.
  122. GBUdbIndex ixMatchListRoot(); // Index of the Match List Root Index.
  123. GBUdbIndex newMatchRecord(unsigned int IP); // Allocate a new Match record for IP.
  124. GBUdbIndex newMatchNodeRoot(); // Allocate a new Match node.
  125. GBUdbIndex newNodeRoot(); // Allocates a new node, returns offset.
  126. void deleteMatchAt(GBUdbIndex I); // Recall match record at I for reuse.
  127. // invokeAt() Handles invocation at each node/octet using and managing MatchRecords as needed.
  128. GBUdbIndex invokeAt(GBUdbRecord& R, unsigned int IP, int Octet, bool ExtendMatches);
  129. int increaseIPCount(); // When we add an IP to the db.
  130. int decreaseIPCount(); // When we drop an IP from the db.
  131. void increaseIPCountIfNew(GBUdbRecord& R); // If R is GBUdbUnknown, IncreaseIPCount.
  132. bool isMatch(GBUdbIndex I); // True if record at I is a match record.
  133. bool isMatch(GBUdbIndex I, unsigned int IP); // True if record at I is a match for IP.
  134. GBUdbRecord& MatchedData(GBUdbIndex I); // Returns the data for the match at I.
  135. unsigned int EncodedMatch(unsigned int IP); // Returns encoded raw dat for a Match.
  136. //// In order to support binmodal indexing we must make sure that
  137. //// no octet3 data is mapped to the root record in an octet3 node. If
  138. //// it were so mapped then an octet2 evaluation might misinterpret the
  139. //// GBUdbFlag fields as a MatchRecord indicator and cause the data to
  140. //// become corrupted. To solve this problem, any time an octet2 node
  141. //// maps to an octet3 node and NOT a MatchRecord, the 0 record in the
  142. //// octet3 node must have no flags. Since x.x.x.0 is presumed to be the
  143. //// network address, and x.x.x.255 is presumed to be a broadcast address
  144. //// we cause both to map to a single record (the 255 record) where the
  145. //// Class C, B, or A data can be recorded and modified in safety. Since
  146. //// there is no need to track the brodcast and network address cases.
  147. //// separately there is no inherent conflict in this approach. The
  148. //// remapIP00toFF method performs this transform as needed in the
  149. //// readRecord() and invokeRecord() methods.
  150. unsigned int remapIP00toFF(unsigned int IP); // Remaps final octet 00 to FF if needed.
  151. GBUdbRecord MySafeUnknownRecord; // Safe unknown record to return.
  152. GBUdbRecord& SafeUnknownRecord(); // Clears and returns the Safe record.
  153. // doForAllNodes does its job by launching a recursive search algorythm
  154. // which is embodied in doAllAtNode(). The doAllAtNode() method is called
  155. // for the root node by doForAllRecords and searches through the tree depth
  156. // first to locate each active record in the GBUdb and call the Operator.
  157. // updateWorkingIP() uses progressive input from eacn level to determine
  158. // the effective IP for the node under test.
  159. void updateWorkingIP(unsigned int& WIP, int OctetValue, int Level);
  160. void doAllAtNode(GBUdbIndex I, GBUdbOperator& O, int NodeLevel, unsigned int WorkingIP);
  161. public:
  162. ~GBUdbDataset(); // Flush & shutdown a dataset.
  163. GBUdbDataset(const char* SetFileName); // Create with a name or no name (NULL).
  164. GBUdbDataset(GBUdbDataset& Original); // Copy constructor.
  165. class CouldNotGrow {}; // Thrown when grow() fails.
  166. class NoFreeNodes {}; // Thrown when newNodeRoot() fails.
  167. class MatchAllocationCorrupted {}; // Thrown when newMatchRecord() fails.
  168. GBUdbRecord& readRecord(unsigned int IP); // Read only - find a GBUdb record.
  169. GBUdbRecord& invokeRecord(unsigned int IP); // Create and/or Find a GBUdb record.
  170. bool dropRecord(unsigned int IP); // Drop an IP record. (true if we did)
  171. int ArraySize(); // Array size.
  172. int FreeNodes(); // Number of free nodes remaining.
  173. int IPCount(); // Number of IPs stored.
  174. const char* FileName(const char* NewName); // Set new file name w/ cstring.
  175. const char* FileName(); // Return the name.
  176. void grow(int HowManyNodes = GBUdbDefaultGrowNodes); // Grow (by number of nodes).
  177. void save(); // Flush the dataset to disk.
  178. void load(); // Read the dataset from disk.
  179. void doForAllRecords(GBUdbOperator& O); // Calls O(IP, Record) W/ every record.
  180. };
  181. // The GBUdb ojbect manages access to the GBUdb. For example, it will grow the
  182. // dataset when that is required, report new events, and generally serve as the
  183. // main access point for a given GBUdb. It even serializes multiple threads.
  184. //// Here is the actual GBUdb class.
  185. class GBUdb {
  186. private:
  187. cd::Mutex MyMutex; // Data sync mutex.
  188. cd::Mutex AlertsMutex; // Mutex for the alerts list.
  189. GBUdbDataset* MyDataset; // Array of records.
  190. int PostsCounter; // Counts good/bad posts.
  191. std::list<GBUdbAlert> MyAlerts; // Allerts list.
  192. void recordAlertFor(unsigned int IP, GBUdbRecord& R, unsigned int C); // Append an alert record if needed.
  193. public:
  194. GBUdb(); // Open/Create w/ no name.
  195. GBUdb(const char* FileName); // Open/Create w/ cstring or NULL.
  196. ~GBUdb(); // Shutdown
  197. const char* FileName(const char* NewName); // Set/Change the file name.
  198. const char* FileName(); // Return the FileName.
  199. void save(); // Save the data.
  200. void load(); // Load the data.
  201. GBUdbRecord addGood(unsigned int IP, int i = 1); // Count an IP as good.
  202. GBUdbRecord addBad(unsigned int IP, int i = 1); // Count an IP as bad.
  203. GBUdbRecord setGood(unsigned int IP); // Set the flag to Good for this IP.
  204. GBUdbRecord setBad(unsigned int IP); // Set the flag to Bad for this IP.
  205. GBUdbRecord setUgly(unsigned int IP); // Set the flag to Ugly for this IP.
  206. GBUdbRecord setIgnore(unsigned int IP); // Set the flag to Ignore for this IP.
  207. bool dropRecord(unsigned int IP); // Drop an IP record. (true if we did)
  208. GBUdbRecord getRecord(unsigned int IP); // Retrieve an IP record.
  209. GBUdbRecord setRecord(unsigned int IP, GBUdbRecord& R); // Store an IP record.
  210. GBUdbRecord adjustCounts(unsigned int IP, GBUdbRecord& R); // Adds counts from R to record for IP.
  211. void doForAllRecords(GBUdbOperator& O, GBUdbLocking L = Dataset); // Call the Operator w/ All records.
  212. void saveSnapshot(); // Saves a snapshot of the current db.
  213. void reduce(); // Reduce all counts by half.
  214. void compress(); // Remove any unknown records (reduced to zero).
  215. int readIgnoreList(const char* FileName = "GBUdbIgnoreList.txt"); // setIgnore for a list of IPs
  216. void GetAlerts(std::list<GBUdbAlert>& ListToFill); // Get all current alerts & clear.
  217. void ImportAlerts(std::list<GBUdbAlert>& PeerAlerts); // Default log2 alert import function.
  218. int IPCount(); // Number of IPs stored.
  219. int Size(); // Size of GBUdb in bytes.
  220. double Utilization(); // Utilization (percent).
  221. int Posts(); // Number of posts since last save.
  222. };
  223. // End of GBUdb Include Only Once
  224. ////////////////////////////////////////////////////////////////////////////////