// GBUdb.hpp // // (C) Copyright 2006 - 2009 ARM Research Labs, LLC // See www.armresearch.com for the copyright terms. // // Good, Bad, Ugly, Ignore IP database engine. //////////////////////////////////////////////////////////////////////////////// // Include M_GBUdb Only Once #ifndef M_GBUdb #define M_GBUdb #include "../CodeDweller/threading.hpp" #include #include #include #include #include #include using namespace std; const unsigned int GBUdbFlagsMask = 0xC0000000; // Top 2 bits are the flag. const unsigned int GBUdbIgnore = 0xC0000000; // Ignore is the 11 flag. const unsigned int GBUdbUgly = 0x00000000; // Ugly/Unknown is the 00 flag. const unsigned int GBUdbGood = 0x80000000; // Good is the 10 flag. const unsigned int GBUdbBad = 0x40000000; // Bad is the 01 flag. const unsigned int GBUdbGoodMask = 0x3FFF8000; // The good count is masked in this range. const unsigned int GBUdbBadMask = 0x00007FFF; // Tha bad count is masked here. const unsigned int GBUdbLimit = GBUdbBadMask; // When a count hits this, normalize in half. const unsigned int GBUdbGoodShift = 15; // Shift good counts this many bits. const unsigned int GBUdbMatchEntryBit = 0x80000000; // Match entry Index bit. const unsigned int GBUdbMatchUnusedBit = 0x40000000; // Unalocated Match entry Index bit. const unsigned int GBUdbMatchDataMask = 0x3fffffff; // IP Match data mask. enum GBUdbFlag { // A type for the GBUdb flag. Ignore = GBUdbIgnore, // Ignore Ugly = GBUdbUgly, // Ugly Good = GBUdbGood, // Good Bad = GBUdbBad // Bad }; //// GBUdbLocking semantics //// When doForAllRecords() is called at the GBUdb level, we need to know how //// the GBUdb mutex should be handled. enum GBUdbLocking { // A type that describes locking semantics. Dataset, // Lock the through the entire operation. Record, // Lock and unlock for each record. None // Do not lock. }; typedef unsigned int GBUdbIndex; // A type for Index values from records. const GBUdbIndex GBUdbUnknown = 0x00000000; // The unknown address. const int GBUdbRecordsPerNode = 256; // Records per node. const int GBUdbDefaultGrowNodes = 8192; // Default Nodes to grow. const int GBUdbDefaultArraySize = GBUdbRecordsPerNode * GBUdbDefaultGrowNodes; // Default initial Array size. const int GBUdbRootNodeOffset = 256; // First indexing node after node 0. const int GBUdbGrowthThreshold = 4; // Time to grow at this # free nodes. //// Node 0 is the go-nowhere node for when things fall off the index so it //// is coded to all GBUdbUnknown. //// The last node in the array is used for global statistics & allocation //// tables. const int GBUdbControlNodeOffset = -256; // Offset from end of data for control node. const int GBUdbNextFreeNodeOffset = GBUdbControlNodeOffset + 0; // Offset for next free node index. const int GBUdbMatchListOffset = GBUdbControlNodeOffset +1; // Offset for Match record allocation root. const int GBUdbIPCountOffset = GBUdbControlNodeOffset + 2; // Offset for count of IPs in GBUdb. // GBUdbRecord converts an ordinary unsigned long integer into a wealth of // useful information just by adding a collection of useful tools. class GBUdbRecord { // A GBUdb record is really just a public: // long integer, but it can be interpreted // lots of ways. unsigned int RawData; // The raw unsigned int goes here. GBUdbRecord(); // Initialize to zero. GBUdbFlag Flag(); // This returns the flag. GBUdbFlag Flag(GBUdbFlag f); // This sets and returns the flag. unsigned int Good(); // This returns the good count. unsigned int Good(unsigned int g); // This sets and returns the good count. unsigned int Bad(); // This returns the bad count. unsigned int Bad(unsigned int b); // This sets and returns the bad count. unsigned int addGood(unsigned int g = 1); // This increments the good count. unsigned int addBad(unsigned int b = 1); // This increments the bad count. GBUdbRecord& integrate(GBUdbRecord& A, int LocalWeight, int RemoteWeight); // This integrates another record. GBUdbIndex Index(); // This returns the record as an Index. GBUdbIndex Index(GBUdbIndex i); // This sets the record as an index. double Probability(); // Return +(bad) or -(good) probability. double Confidence(); // Return the confidence based on samples. }; // Special events need to be recorded. For that job we have GBUdbAlerts const int UTCBufferSize = 16; // C string buffer size for UTC stamp. class GBUdbAlert { public: GBUdbAlert(); // Constructor sets timestamp & nulls. char UTC[UTCBufferSize]; // Time stamp for this alert. unsigned int IP; // IP for this alert. GBUdbRecord R; // GBUdbRecord for this alert. string toXML(); // Convert to an xml representation. }; // Mass update kinds of operations are handled by providing a functor // of the type GBUdbOperator to the method doForAllRecords(). The functor is // called with every record in the GBUdb. //// Here is the virtual GBUdb Operator class. class GBUdbOperator { public: virtual GBUdbRecord& operator()(unsigned int IP, GBUdbRecord& R) = 0; }; // GBUdbDataset manages a large array of GBUdb records and nodes. Nodes are // simulated data structures -- essentially arrays of GBUdbRecords that are // interpreted as Indexes so that each byte of a particular IP can be used // to follow the index through the tree to the final record that actually // represents the IPs data. // The last few records in the array are used to keep track of some basic // statistics including where the next node will come from. As with the GBUdb // record itself, it's all in how the data is interpreted. Using this strategy // of converting plain-old integers into various data types on the fly allows // us to allocate the entire structure as a single block and avoid much // page swapping behind the scenes. class GBUdbDataset { private: GBUdbRecord* DataArray; // Array of GBUdbRecords, nodes, etc. int MyArraySize; // The size of the array in records. string MyFileName; // CString for the file name. GBUdbIndex ixIPCount(); // Index of the IP count for this db. GBUdbIndex ixNextFreeNode(); // Index of the Next Free Node Index. GBUdbIndex ixMatchListRoot(); // Index of the Match List Root Index. GBUdbIndex newMatchRecord(unsigned int IP); // Allocate a new Match record for IP. GBUdbIndex newMatchNodeRoot(); // Allocate a new Match node. GBUdbIndex newNodeRoot(); // Allocates a new node, returns offset. void deleteMatchAt(GBUdbIndex I); // Recall match record at I for reuse. // invokeAt() Handles invocation at each node/octet using and managing MatchRecords as needed. GBUdbIndex invokeAt(GBUdbRecord& R, unsigned int IP, int Octet, bool ExtendMatches); int increaseIPCount(); // When we add an IP to the db. int decreaseIPCount(); // When we drop an IP from the db. void increaseIPCountIfNew(GBUdbRecord& R); // If R is GBUdbUnknown, IncreaseIPCount. bool isMatch(GBUdbIndex I); // True if record at I is a match record. bool isMatch(GBUdbIndex I, unsigned int IP); // True if record at I is a match for IP. GBUdbRecord& MatchedData(GBUdbIndex I); // Returns the data for the match at I. unsigned int EncodedMatch(unsigned int IP); // Returns encoded raw dat for a Match. //// In order to support binmodal indexing we must make sure that //// no octet3 data is mapped to the root record in an octet3 node. If //// it were so mapped then an octet2 evaluation might misinterpret the //// GBUdbFlag fields as a MatchRecord indicator and cause the data to //// become corrupted. To solve this problem, any time an octet2 node //// maps to an octet3 node and NOT a MatchRecord, the 0 record in the //// octet3 node must have no flags. Since x.x.x.0 is presumed to be the //// network address, and x.x.x.255 is presumed to be a broadcast address //// we cause both to map to a single record (the 255 record) where the //// Class C, B, or A data can be recorded and modified in safety. Since //// there is no need to track the brodcast and network address cases. //// separately there is no inherent conflict in this approach. The //// remapIP00toFF method performs this transform as needed in the //// readRecord() and invokeRecord() methods. unsigned int remapIP00toFF(unsigned int IP); // Remaps final octet 00 to FF if needed. GBUdbRecord MySafeUnknownRecord; // Safe unknown record to return. GBUdbRecord& SafeUnknownRecord(); // Clears and returns the Safe record. // doForAllNodes does its job by launching a recursive search algorythm // which is embodied in doAllAtNode(). The doAllAtNode() method is called // for the root node by doForAllRecords and searches through the tree depth // first to locate each active record in the GBUdb and call the Operator. // updateWorkingIP() uses progressive input from eacn level to determine // the effective IP for the node under test. void updateWorkingIP(unsigned int& WIP, int OctetValue, int Level); void doAllAtNode(GBUdbIndex I, GBUdbOperator& O, int NodeLevel, unsigned int WorkingIP); public: ~GBUdbDataset(); // Flush & shutdown a dataset. GBUdbDataset(const char* SetFileName); // Create with a name or no name (NULL). GBUdbDataset(GBUdbDataset& Original); // Copy constructor. class CouldNotGrow {}; // Thrown when grow() fails. class NoFreeNodes {}; // Thrown when newNodeRoot() fails. class MatchAllocationCorrupted {}; // Thrown when newMatchRecord() fails. GBUdbRecord& readRecord(unsigned int IP); // Read only - find a GBUdb record. GBUdbRecord& invokeRecord(unsigned int IP); // Create and/or Find a GBUdb record. bool dropRecord(unsigned int IP); // Drop an IP record. (true if we did) int ArraySize(); // Array size. int FreeNodes(); // Number of free nodes remaining. int IPCount(); // Number of IPs stored. const char* FileName(const char* NewName); // Set new file name w/ cstring. const char* FileName(); // Return the name. void grow(int HowManyNodes = GBUdbDefaultGrowNodes); // Grow (by number of nodes). void save(); // Flush the dataset to disk. void load(); // Read the dataset from disk. void doForAllRecords(GBUdbOperator& O); // Calls O(IP, Record) W/ every record. }; // The GBUdb ojbect manages access to the GBUdb. For example, it will grow the // dataset when that is required, report new events, and generally serve as the // main access point for a given GBUdb. It even serializes multiple threads. //// Here is the actual GBUdb class. class GBUdb { private: Mutex MyMutex; // Data sync mutex. Mutex AlertsMutex; // Mutex for the alerts list. GBUdbDataset* MyDataset; // Array of records. int PostsCounter; // Counts good/bad posts. list MyAlerts; // Allerts list. void recordAlertFor(unsigned int IP, GBUdbRecord& R, unsigned int C); // Append an alert record if needed. public: GBUdb(); // Open/Create w/ no name. GBUdb(const char* FileName); // Open/Create w/ cstring or NULL. ~GBUdb(); // Shutdown const char* FileName(const char* NewName); // Set/Change the file name. const char* FileName(); // Return the FileName. void save(); // Save the data. void load(); // Load the data. GBUdbRecord addGood(unsigned int IP, int i = 1); // Count an IP as good. GBUdbRecord addBad(unsigned int IP, int i = 1); // Count an IP as bad. GBUdbRecord setGood(unsigned int IP); // Set the flag to Good for this IP. GBUdbRecord setBad(unsigned int IP); // Set the flag to Bad for this IP. GBUdbRecord setUgly(unsigned int IP); // Set the flag to Ugly for this IP. GBUdbRecord setIgnore(unsigned int IP); // Set the flag to Ignore for this IP. bool dropRecord(unsigned int IP); // Drop an IP record. (true if we did) GBUdbRecord getRecord(unsigned int IP); // Retrieve an IP record. GBUdbRecord setRecord(unsigned int IP, GBUdbRecord& R); // Store an IP record. GBUdbRecord adjustCounts(unsigned int IP, GBUdbRecord& R); // Adds counts from R to record for IP. void doForAllRecords(GBUdbOperator& O, GBUdbLocking L = Dataset); // Call the Operator w/ All records. void saveSnapshot(); // Saves a snapshot of the current db. void reduce(); // Reduce all counts by half. void compress(); // Remove any unknown records (reduced to zero). int readIgnoreList(const char* FileName = "GBUdbIgnoreList.txt"); // setIgnore for a list of IPs void GetAlerts(list& ListToFill); // Get all current alerts & clear. void ImportAlerts(list& PeerAlerts); // Default log2 alert import function. int IPCount(); // Number of IPs stored. int Size(); // Size of GBUdb in bytes. double Utilization(); // Utilization (percent). int Posts(); // Number of posts since last save. }; //// Include inline method definitions ///////////////////////////////////////// #include "GBUdb.inline.hpp" #endif // End of GBUdb Include Only Once ////////////////////////////////////////////////////////////////////////////////