You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

GBUdb.hpp 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. // GBUdb.hpp
  2. //
  3. // (C) Copyright 2006 - 2009 ARM Research Labs, LLC
  4. // See www.armresearch.com for the copyright terms.
  5. //
  6. // Good, Bad, Ugly, Ignore IP database engine.
  7. ////////////////////////////////////////////////////////////////////////////////
  8. // Include M_GBUdb Only Once
  9. #pragma once
  10. #include "../CodeDweller/faults.hpp"
  11. #include "../CodeDweller/threading.hpp"
  12. #include <cmath>
  13. #include <cctype>
  14. #include <string>
  15. #include <sstream>
  16. #include <list>
  17. #include <cstdlib>
  18. #include <ctime>
  19. namespace cd = codedweller;
  20. const unsigned int GBUdbFlagsMask = 0xC0000000; // Top 2 bits are the flag.
  21. const unsigned int GBUdbIgnore = 0xC0000000; // Ignore is the 11 flag.
  22. const unsigned int GBUdbUgly = 0x00000000; // Ugly/Unknown is the 00 flag.
  23. const unsigned int GBUdbGood = 0x80000000; // Good is the 10 flag.
  24. const unsigned int GBUdbBad = 0x40000000; // Bad is the 01 flag.
  25. const unsigned int GBUdbGoodMask = 0x3FFF8000; // The good count is masked in this range.
  26. const unsigned int GBUdbBadMask = 0x00007FFF; // Tha bad count is masked here.
  27. const unsigned int GBUdbLimit = GBUdbBadMask; // When a count hits this, normalize in half.
  28. const unsigned int GBUdbGoodShift = 15; // Shift good counts this many bits.
  29. const unsigned int GBUdbMatchEntryBit = 0x80000000; // Match entry Index bit.
  30. const unsigned int GBUdbMatchUnusedBit = 0x40000000; // Unalocated Match entry Index bit.
  31. const unsigned int GBUdbMatchDataMask = 0x3fffffff; // IP Match data mask.
  32. enum GBUdbFlag { // A type for the GBUdb flag.
  33. Ignore = GBUdbIgnore, // Ignore
  34. Ugly = GBUdbUgly, // Ugly
  35. Good = GBUdbGood, // Good
  36. Bad = GBUdbBad // Bad
  37. };
  38. //// GBUdbLocking semantics
  39. //// When doForAllRecords() is called at the GBUdb level, we need to know how
  40. //// the GBUdb mutex should be handled.
  41. enum GBUdbLocking { // A type that describes locking semantics.
  42. Dataset, // Lock the through the entire operation.
  43. Record, // Lock and unlock for each record.
  44. None // Do not lock.
  45. };
  46. typedef unsigned int GBUdbIndex; // A type for Index values from records.
  47. const GBUdbIndex GBUdbUnknown = 0x00000000; // The unknown address.
  48. const int GBUdbRecordsPerNode = 256; // Records per node.
  49. const int GBUdbDefaultGrowNodes = 8192; // Default Nodes to grow.
  50. const int GBUdbDefaultArraySize = GBUdbRecordsPerNode * GBUdbDefaultGrowNodes; // Default initial Array size.
  51. const int GBUdbRootNodeOffset = 256; // First indexing node after node 0.
  52. const int GBUdbGrowthThreshold = 4; // Time to grow at this # free nodes.
  53. //// Node 0 is the go-nowhere node for when things fall off the index so it
  54. //// is coded to all GBUdbUnknown.
  55. //// The last node in the array is used for global statistics & allocation
  56. //// tables.
  57. const int GBUdbControlNodeOffset = -256; // Offset from end of data for control node.
  58. const int GBUdbNextFreeNodeOffset = GBUdbControlNodeOffset + 0; // Offset for next free node index.
  59. const int GBUdbMatchListOffset = GBUdbControlNodeOffset +1; // Offset for Match record allocation root.
  60. const int GBUdbIPCountOffset = GBUdbControlNodeOffset + 2; // Offset for count of IPs in GBUdb.
  61. // GBUdbRecord converts an ordinary unsigned long integer into a wealth of
  62. // useful information just by adding a collection of useful tools.
  63. class GBUdbRecord { // A GBUdb record is really just a
  64. public: // long integer, but it can be interpreted
  65. // lots of ways.
  66. unsigned int RawData; // The raw unsigned int goes here.
  67. GBUdbRecord(); // Initialize to zero.
  68. GBUdbFlag Flag(); // This returns the flag.
  69. GBUdbFlag Flag(GBUdbFlag f); // This sets and returns the flag.
  70. unsigned int Good(); // This returns the good count.
  71. unsigned int Good(unsigned int g); // This sets and returns the good count.
  72. unsigned int Bad(); // This returns the bad count.
  73. unsigned int Bad(unsigned int b); // This sets and returns the bad count.
  74. unsigned int addGood(unsigned int g = 1); // This increments the good count.
  75. unsigned int addBad(unsigned int b = 1); // This increments the bad count.
  76. GBUdbRecord& integrate(GBUdbRecord& A, int LocalWeight, int RemoteWeight); // This integrates another record.
  77. GBUdbIndex Index(); // This returns the record as an Index.
  78. GBUdbIndex Index(GBUdbIndex i); // This sets the record as an index.
  79. double Probability(); // Return +(bad) or -(good) probability.
  80. double Confidence(); // Return the confidence based on samples.
  81. };
  82. // Special events need to be recorded. For that job we have GBUdbAlerts
  83. const int UTCBufferSize = 16; // C string buffer size for UTC stamp.
  84. class GBUdbAlert {
  85. public:
  86. GBUdbAlert(); // Constructor sets timestamp & nulls.
  87. char UTC[UTCBufferSize]; // Time stamp for this alert.
  88. unsigned int IP; // IP for this alert.
  89. GBUdbRecord R; // GBUdbRecord for this alert.
  90. std::string toXML(); // Convert to an xml representation.
  91. };
  92. // Mass update kinds of operations are handled by providing a functor
  93. // of the type GBUdbOperator to the method doForAllRecords(). The functor is
  94. // called with every record in the GBUdb.
  95. //// Here is the virtual GBUdb Operator class.
  96. class GBUdbOperator {
  97. public:
  98. virtual GBUdbRecord& operator()(unsigned int IP, GBUdbRecord& R) = 0;
  99. };
  100. // GBUdbDataset manages a large array of GBUdb records and nodes. Nodes are
  101. // simulated data structures -- essentially arrays of GBUdbRecords that are
  102. // interpreted as Indexes so that each byte of a particular IP can be used
  103. // to follow the index through the tree to the final record that actually
  104. // represents the IPs data.
  105. // The last few records in the array are used to keep track of some basic
  106. // statistics including where the next node will come from. As with the GBUdb
  107. // record itself, it's all in how the data is interpreted. Using this strategy
  108. // of converting plain-old integers into various data types on the fly allows
  109. // us to allocate the entire structure as a single block and avoid much
  110. // page swapping behind the scenes.
  111. class GBUdbDataset {
  112. private:
  113. GBUdbRecord* DataArray; // Array of GBUdbRecords, nodes, etc.
  114. int MyArraySize; // The size of the array in records.
  115. std::string MyFileName; // CString for the file name.
  116. GBUdbIndex ixIPCount(); // Index of the IP count for this db.
  117. GBUdbIndex ixNextFreeNode(); // Index of the Next Free Node Index.
  118. GBUdbIndex ixMatchListRoot(); // Index of the Match List Root Index.
  119. GBUdbIndex newMatchRecord(unsigned int IP); // Allocate a new Match record for IP.
  120. GBUdbIndex newMatchNodeRoot(); // Allocate a new Match node.
  121. GBUdbIndex newNodeRoot(); // Allocates a new node, returns offset.
  122. void deleteMatchAt(GBUdbIndex I); // Recall match record at I for reuse.
  123. // invokeAt() Handles invocation at each node/octet using and managing MatchRecords as needed.
  124. GBUdbIndex invokeAt(GBUdbRecord& R, unsigned int IP, int Octet, bool ExtendMatches);
  125. int increaseIPCount(); // When we add an IP to the db.
  126. int decreaseIPCount(); // When we drop an IP from the db.
  127. void increaseIPCountIfNew(GBUdbRecord& R); // If R is GBUdbUnknown, IncreaseIPCount.
  128. bool isMatch(GBUdbIndex I); // True if record at I is a match record.
  129. bool isMatch(GBUdbIndex I, unsigned int IP); // True if record at I is a match for IP.
  130. GBUdbRecord& MatchedData(GBUdbIndex I); // Returns the data for the match at I.
  131. unsigned int EncodedMatch(unsigned int IP); // Returns encoded raw dat for a Match.
  132. //// In order to support binmodal indexing we must make sure that
  133. //// no octet3 data is mapped to the root record in an octet3 node. If
  134. //// it were so mapped then an octet2 evaluation might misinterpret the
  135. //// GBUdbFlag fields as a MatchRecord indicator and cause the data to
  136. //// become corrupted. To solve this problem, any time an octet2 node
  137. //// maps to an octet3 node and NOT a MatchRecord, the 0 record in the
  138. //// octet3 node must have no flags. Since x.x.x.0 is presumed to be the
  139. //// network address, and x.x.x.255 is presumed to be a broadcast address
  140. //// we cause both to map to a single record (the 255 record) where the
  141. //// Class C, B, or A data can be recorded and modified in safety. Since
  142. //// there is no need to track the brodcast and network address cases.
  143. //// separately there is no inherent conflict in this approach. The
  144. //// remapIP00toFF method performs this transform as needed in the
  145. //// readRecord() and invokeRecord() methods.
  146. unsigned int remapIP00toFF(unsigned int IP); // Remaps final octet 00 to FF if needed.
  147. GBUdbRecord MySafeUnknownRecord; // Safe unknown record to return.
  148. GBUdbRecord& SafeUnknownRecord(); // Clears and returns the Safe record.
  149. // doForAllNodes does its job by launching a recursive search algorythm
  150. // which is embodied in doAllAtNode(). The doAllAtNode() method is called
  151. // for the root node by doForAllRecords and searches through the tree depth
  152. // first to locate each active record in the GBUdb and call the Operator.
  153. // updateWorkingIP() uses progressive input from eacn level to determine
  154. // the effective IP for the node under test.
  155. void updateWorkingIP(unsigned int& WIP, int OctetValue, int Level);
  156. void doAllAtNode(GBUdbIndex I, GBUdbOperator& O, int NodeLevel, unsigned int WorkingIP);
  157. public:
  158. ~GBUdbDataset(); // Flush & shutdown a dataset.
  159. GBUdbDataset(const char* SetFileName); // Create with a name or no name (NULL).
  160. GBUdbDataset(GBUdbDataset& Original); // Copy constructor.
  161. class CouldNotGrow {}; // Thrown when grow() fails.
  162. class NoFreeNodes {}; // Thrown when newNodeRoot() fails.
  163. class MatchAllocationCorrupted {}; // Thrown when newMatchRecord() fails.
  164. GBUdbRecord& readRecord(unsigned int IP); // Read only - find a GBUdb record.
  165. GBUdbRecord& invokeRecord(unsigned int IP); // Create and/or Find a GBUdb record.
  166. bool dropRecord(unsigned int IP); // Drop an IP record. (true if we did)
  167. int ArraySize(); // Array size.
  168. int FreeNodes(); // Number of free nodes remaining.
  169. int IPCount(); // Number of IPs stored.
  170. const char* FileName(const char* NewName); // Set new file name w/ cstring.
  171. const char* FileName(); // Return the name.
  172. void grow(int HowManyNodes = GBUdbDefaultGrowNodes); // Grow (by number of nodes).
  173. void save(); // Flush the dataset to disk.
  174. void load(); // Read the dataset from disk.
  175. void doForAllRecords(GBUdbOperator& O); // Calls O(IP, Record) W/ every record.
  176. };
  177. // The GBUdb ojbect manages access to the GBUdb. For example, it will grow the
  178. // dataset when that is required, report new events, and generally serve as the
  179. // main access point for a given GBUdb. It even serializes multiple threads.
  180. //// Here is the actual GBUdb class.
  181. class GBUdb {
  182. private:
  183. cd::Mutex MyMutex; // Data sync mutex.
  184. cd::Mutex AlertsMutex; // Mutex for the alerts list.
  185. GBUdbDataset* MyDataset; // Array of records.
  186. int PostsCounter; // Counts good/bad posts.
  187. std::list<GBUdbAlert> MyAlerts; // Allerts list.
  188. void recordAlertFor(unsigned int IP, GBUdbRecord& R, unsigned int C); // Append an alert record if needed.
  189. public:
  190. GBUdb(); // Open/Create w/ no name.
  191. GBUdb(const char* FileName); // Open/Create w/ cstring or NULL.
  192. ~GBUdb(); // Shutdown
  193. const char* FileName(const char* NewName); // Set/Change the file name.
  194. const char* FileName(); // Return the FileName.
  195. void save(); // Save the data.
  196. void load(); // Load the data.
  197. GBUdbRecord addGood(unsigned int IP, int i = 1); // Count an IP as good.
  198. GBUdbRecord addBad(unsigned int IP, int i = 1); // Count an IP as bad.
  199. GBUdbRecord setGood(unsigned int IP); // Set the flag to Good for this IP.
  200. GBUdbRecord setBad(unsigned int IP); // Set the flag to Bad for this IP.
  201. GBUdbRecord setUgly(unsigned int IP); // Set the flag to Ugly for this IP.
  202. GBUdbRecord setIgnore(unsigned int IP); // Set the flag to Ignore for this IP.
  203. bool dropRecord(unsigned int IP); // Drop an IP record. (true if we did)
  204. GBUdbRecord getRecord(unsigned int IP); // Retrieve an IP record.
  205. GBUdbRecord setRecord(unsigned int IP, GBUdbRecord& R); // Store an IP record.
  206. GBUdbRecord adjustCounts(unsigned int IP, GBUdbRecord& R); // Adds counts from R to record for IP.
  207. void doForAllRecords(GBUdbOperator& O, GBUdbLocking L = Dataset); // Call the Operator w/ All records.
  208. void saveSnapshot(); // Saves a snapshot of the current db.
  209. void reduce(); // Reduce all counts by half.
  210. void compress(); // Remove any unknown records (reduced to zero).
  211. int readIgnoreList(const char* FileName = "GBUdbIgnoreList.txt"); // setIgnore for a list of IPs
  212. void GetAlerts(std::list<GBUdbAlert>& ListToFill); // Get all current alerts & clear.
  213. void ImportAlerts(std::list<GBUdbAlert>& PeerAlerts); // Default log2 alert import function.
  214. int IPCount(); // Number of IPs stored.
  215. int Size(); // Size of GBUdb in bytes.
  216. double Utilization(); // Utilization (percent).
  217. int Posts(); // Number of posts since last save.
  218. };
  219. // End of GBUdb Include Only Once
  220. ////////////////////////////////////////////////////////////////////////////////