// GBUdb.cpp // // (C) Copyright 2006 - 2020 ARM Research Labs, LLC // See www.armresearch.com for the copyright terms. // // See GBUdb.hpp for details. #include #include #include #include #include "GBUdb.hpp" namespace cd = codedweller; //// Handy utilities... //// GBUdbRecord Implementations /////////////////////////////////////////////// GBUdbRecord::GBUdbRecord() : // Initialize a new GBUdbRecord RawData(0) { // to ZERO. } GBUdbFlag GBUdbRecord::Flag() { // Return the flags. return (GBUdbFlag) (RawData & GBUdbFlagsMask); // Isolate the flags from the data & return. } GBUdbFlag GBUdbRecord::Flag(GBUdbFlag f) { // Set the flags. RawData = RawData & (~GBUdbFlagsMask); // Strip the current flags from RawData. RawData = RawData | f; // Put the new flags into RawData. return (GBUdbFlag) (RawData & GBUdbFlagsMask); // Return the flags now in RawData. } unsigned int GBUdbRecord::Good() { // Return the Good count. return ((RawData & GBUdbGoodMask) >> GBUdbGoodShift); // Isolate & shift the good count, return. } unsigned int GBUdbRecord::Good(unsigned int g) { // Set the good count. RawData = RawData & (~GBUdbGoodMask); // Strip the current good count. g = g & GBUdbLimit; // Make g safe (within bitfield limit). RawData = RawData | (g << GBUdbGoodShift); // Shift & combine g with RawData. return g; // Return the safe g value. } unsigned int GBUdbRecord::Bad() { // Get the bad count. return (RawData & GBUdbBadMask); // Isolate the bad data and return. } unsigned int GBUdbRecord::Bad(unsigned int b) { // Set the bad count. RawData = RawData & (~GBUdbBadMask); // Strip out the current bad count. b = b & GBUdbLimit; // Make b safe (strip any extra bits). RawData = RawData | b; // Combine RawData with the safe b. return b; // return the safe b. } unsigned int GBUdbRecord::addGood(unsigned int g) { // Add to the good count & normalize. unsigned int G = Good(); // Get the good. unsigned int B = Bad(); // Get the bad. G = G + g; // Add the new g to the good. while(G > GBUdbLimit) { // If normalization is required G = G >> 1; // then reduce the new good B = B >> 1; // and bad counts by half } // until things are normalized. Good(G); // Then go ahead and set the Bad(B); // new value(s) into place. return G; // Return the new good count. } unsigned int GBUdbRecord::addBad(unsigned int b) { // Add to the bad count & normalize. unsigned int G = Good(); // Get the good. unsigned int B = Bad(); // Get the bad. B = B + b; // Add the new b to the bad. while(B > GBUdbLimit) { // If normalization is required G = G >> 1; // then reduce the new good B = B >> 1; // and bad counts by half } // until things are normalized. Good(G); // Then go ahead and set the Bad(B); // new value(s) into place. return B; // Return the new good count. } GBUdbRecord& GBUdbRecord::integrate(GBUdbRecord& A, int LocalWeight, int RemoteWeight) { // Integrate A unsigned int Gl = Good(); // Get the good and unsigned int Bl = Bad(); // bad counts from unsigned int Gr = A.Good(); // the local and unsigned int Br = A.Bad(); // remote records. Gl = (Gl * LocalWeight) + (Gr * RemoteWeight); // Combine the Good and Bl = (Bl * LocalWeight) + (Br * RemoteWeight); // bad counts using the weights. while(Gl > GBUdbLimit || Bl > GBUdbLimit) { // Normalize the counts by Gl = Gl >> 1; // dividing both in half until Bl = Bl >> 1; // they are both within limits. } Good(Gl); // Then set the new Good Bad(Bl); // and bad values and return return *this; // this object. } GBUdbIndex GBUdbRecord::Index() { // Read the record as an index. return (GBUdbIndex) RawData; } GBUdbIndex GBUdbRecord::Index(GBUdbIndex i) { // Write the index value of the record. RawData = (unsigned int) i; return (GBUdbIndex) RawData; } // Probability is about the ratio of a given event to the total events. // In this case, positive probabilities indicate a tendency toward spam and // negative probabilities indicate a tendency toward ham. double GBUdbRecord::Probability() { // Calculate the probability of spam unsigned int G = Good(); // Get the good and unsigned int B = Bad(); // bad counts and double P = 0.0; // grab a double to hold P. if(0 == B + G) { // If we have no counts yet return P; // then return a zero probability. } // If we have counts lets do the math. P = ((double) B - (double) G) / ((double) B + (double) G); // Calculate the differential return P; // probability and return it. } // The confidence we have in a probability is related to the number of samples // that are present. We calculate the confidence on a logarithmic scale between // one sample and half the maximum number by category (good or bad) because // during condensation all counts may be reduced by half. That is, a 100% // confidence is achieved when a record contains a total of half the maximum // number of counts for a single category. double GBUdbRecord::Confidence() { // Calculate our confidence in prob. unsigned int Total = Good() + Bad(); // What is our total count of samples. if(0 == Total) return 0.0; // No samples is no confidence. double Confidence = (log((double)Total) / log((double)(GBUdbLimit/2))); // Calculate on a log scale. if(1.0 < Confidence) Confidence = 1.0; // Max confidence is 1.0. return Confidence; // Return the result. } //// GBUdbDataSet Inline Methods /////////////////////////////////////////////// GBUdbIndex GBUdbDataset::ixIPCount() { // Index of the IP count for this db. return MyArraySize + GBUdbIPCountOffset; // Return the offest from the end. } GBUdbIndex GBUdbDataset::ixNextFreeNode() { // Index of the Next Free Node. return MyArraySize + GBUdbNextFreeNodeOffset; // Return the offset from the end. } GBUdbIndex GBUdbDataset::newNodeRoot() { // Allocates a new node, returns offset. if(0 >= FreeNodes()) { // Check that we have free nodes to throw NoFreeNodes(); // allocate. If we don't then throw! } GBUdbIndex NewNode = DataArray[ixNextFreeNode()].Index(); // Grab the next new node index. DataArray[ixNextFreeNode()].Index(NewNode + GBUdbRecordsPerNode); // Move the allocator up a node. return NewNode; // Return the allocated node. } int GBUdbDataset::ArraySize() { // Return the current Array Size. return MyArraySize; } int GBUdbDataset::FreeNodes() { // Return the number of free nodes. int FreeRecords = MyArraySize - DataArray[ixNextFreeNode()].RawData; // Find the number of records left. int FreeNodes = (FreeRecords / GBUdbRecordsPerNode) - 1; // Convert to nodes and subtract the return FreeNodes; // control node, the return the value. } int GBUdbDataset::IPCount() { // Return the IP count. return DataArray[ixIPCount()].RawData; } int GBUdbDataset::increaseIPCount() { // When we add an IP to the db. return DataArray[ixIPCount()].RawData++; // Increment and return the IP count. } int GBUdbDataset::decreaseIPCount() { // When we drop an IP from the db. return DataArray[ixIPCount()].RawData--; // Decrement and return the IP count. } const char* GBUdbDataset::FileName() { // get the file name. return MyFileName.c_str(); } unsigned int GBUdbDataset::EncodedMatch(unsigned int IP) { // Encode an IP as a MatchRecord header. return GBUdbMatchEntryBit | (IP & GBUdbMatchDataMask); // Use the MatchEntery bit and as much } // of the remaining IP data as possible. bool GBUdbDataset::isMatch(GBUdbIndex I) { // True if record at I is a match record. return (0 != (DataArray[I].RawData & GBUdbMatchEntryBit)); // Get the raw data and check for the bit. } bool GBUdbDataset::isMatch(GBUdbIndex I, unsigned int IP) { // True if record at I is a match for IP. return (DataArray[I].RawData == EncodedMatch(IP)); } GBUdbRecord& GBUdbDataset::MatchedData(GBUdbIndex I) { // Returns the data for the match at I. return DataArray[I + 1]; // Since I points to the match record we } // return the record immedately after it. GBUdbRecord& GBUdbDataset::SafeUnknownRecord() { // Clears and returns the Safe record. MySafeUnknownRecord.RawData = GBUdbUnknown; // Clear the SafeUnknownRecord and return MySafeUnknownRecord; // return it as the result. } GBUdbIndex GBUdbDataset::ixMatchListRoot() { // Index of the Match List Root Index. return MyArraySize + GBUdbMatchListOffset; } void GBUdbDataset::increaseIPCountIfNew(GBUdbRecord& R) { // If R is GBUdbUnknown, IncreaseIPCount. if(GBUdbUnknown == R.RawData) { increaseIPCount(); } // If new, increase the IP count. } unsigned int GBUdbDataset::remapIP00toFF(unsigned int IP) { // Remaps final octet 00 to FF if needed. const int LowOctetMask = 0x000000FF; // Mask for seeing the low octet. if(0 == (IP & LowOctetMask)) { // If the lowest octet is 00 then return (IP | LowOctetMask); // change it to FF and return. } // If the lowest octet is something else return IP; // then return the IP as is. } void GBUdbDataset::deleteMatchAt(GBUdbIndex I) { // Recalls MatchRecord at I for reuse. GBUdbIndex Next = DataArray[ixMatchListRoot()].Index(); // Find the current allocation list root. DataArray[I].RawData = (Next | GBUdbMatchUnusedBit); // Point the current match to that root. DataArray[I+1].RawData = GBUdbUnknown; // Clean out any data the match had. DataArray[ixMatchListRoot()].Index(I); // Make this record the list root. } //// GBUdb Implementations ///////////////////////////////////////////////////// GBUdb::GBUdb() : // Construct the db as new. PostsCounter(0) { // No posts yet. MyDataset = new GBUdbDataset(NULL); // Construct with no file name. } GBUdb::GBUdb(const char* FileName) : // Construct the db from a file. PostsCounter(0) { // No Posts yet. MyDataset = new GBUdbDataset(FileName); // Load the data set by name. } GBUdb::~GBUdb() { // Destroy the db object. if(NULL != MyDataset) { // Save first if we can. MyDataset->save(); delete MyDataset; } } const char* GBUdb::FileName() { // Return the file name. return MyDataset->FileName(); } const char* GBUdb::FileName(const char* NewName) { // Set/Change the file name. return MyDataset->FileName(NewName); } void GBUdb::save() { // Save the data. cd::ScopeMutex JustMe(MyMutex); // Lock the mutex during this operation. MyDataset->save(); // Save the dataset. PostsCounter = 0; // Reset the posts counter. } void GBUdb::load() { // Load the data. cd::ScopeMutex JustMe(MyMutex); // Lock the mutex during this operation. MyDataset->load(); // Load the dataset. } GBUdbRecord GBUdb::addGood(unsigned int IP, int i) { // Count an IP as good. cd::ScopeMutex JustMe(MyMutex); // Lock the mutex during this operation. ++PostsCounter; // Count this as a post. GBUdbRecord& X = MyDataset->invokeRecord(IP); // Invoke the record. unsigned int C = X.addGood(i); // Add a count to the good side. recordAlertFor(IP, X ,C); // Record an alert if required. return X; // Return a copy for analysis. } GBUdbRecord GBUdb::addBad(unsigned int IP, int i) { // Count an IP as bad. cd::ScopeMutex JustMe(MyMutex); // Lock the mutex during this operation. ++PostsCounter; // Count this as a post. GBUdbRecord& X = MyDataset->invokeRecord(IP); // Invoke the reocrd. unsigned int C = X.addBad(i); // Add a count to the bad side. recordAlertFor(IP, X, C); // Record an alert if required. return X; // Return a copy for analysis. } GBUdbRecord GBUdb::setGood(unsigned int IP) { // Set the flag to Good for this IP. cd::ScopeMutex JustMe(MyMutex); // Lock the mutex during this operation. GBUdbRecord& X = MyDataset->invokeRecord(IP); // Invoke the reocrd. X.Flag(Good); // Set the Good flag. return X; // Return a copy for analysis. } GBUdbRecord GBUdb::setBad(unsigned int IP) { // Set the flag to Bad for this IP. cd::ScopeMutex JustMe(MyMutex); // Lock the mutex during this operation. GBUdbRecord& X = MyDataset->invokeRecord(IP); // Invoke the reocrd. X.Flag(Bad); // Set the Bad flag. return X; // Return a copy for analysis. } GBUdbRecord GBUdb::setUgly(unsigned int IP) { // Set the flag to Ugly for this IP. cd::ScopeMutex JustMe(MyMutex); // Lock the mutex during this operation. GBUdbRecord& X = MyDataset->invokeRecord(IP); // Invoke the reocrd. X.Flag(Ugly); // Set the Ugly flag. return X; // Return a copy for analysis. } GBUdbRecord GBUdb::setIgnore(unsigned int IP) { // Set the flag to Ignore for this IP. cd::ScopeMutex JustMe(MyMutex); // Lock the mutex during this operation. GBUdbRecord& X = MyDataset->invokeRecord(IP); // Invoke the reocrd. X.Flag(Ignore); // Set the Ignore flag. return X; // Return a copy for analysis. } GBUdbRecord GBUdb::getRecord(unsigned int IP) { // Retrieve an IP record. cd::ScopeMutex JustMe(MyMutex); // Lock the mutex during this operation. GBUdbRecord& X = MyDataset->readRecord(IP); // ReadOnly the reocrd. return X; // Return a copy for analysis. } GBUdbRecord GBUdb::setRecord(unsigned int IP, GBUdbRecord& R) { // Store an IP record. cd::ScopeMutex JustMe(MyMutex); // Lock the mutex during this operation. GBUdbRecord& X = MyDataset->invokeRecord(IP); // Invoke the reocrd. X = R; // Overwrite X with R. return X; // Return a copy for analysis. } GBUdbRecord GBUdb::adjustCounts(unsigned int IP, GBUdbRecord& R) { // Adds counts from R to record for IP. cd::ScopeMutex JustMe(MyMutex); // Lock the data for this operation. GBUdbRecord& X = MyDataset->invokeRecord(IP); // Locate the record in the data. X.Bad(X.Bad() + R.Bad()); // Add the reflected adjustments X.Good(X.Good() + R.Good()); // to the good and bad counts. return X; // Return a copy for analysis. } bool GBUdb::dropRecord(unsigned int IP) { // Drop an IP record. cd::ScopeMutex JustMe(MyMutex); // Lock the mutex during this operation. return MyDataset->dropRecord(IP); // Pass on this call to our dataset. } int GBUdb::IPCount() { // Number of IPs stored. cd::ScopeMutex JustMe(MyMutex); return MyDataset->IPCount(); } int GBUdb::Size() { // Size of GBUdb in bytes. cd::ScopeMutex JustMe(MyMutex); // Lock the mutex during this operation. return MyDataset->ArraySize() * sizeof(GBUdbRecord); // Total records converted to bytes. } double GBUdb::Utilization() { // Utilization (percent). cd::ScopeMutex JustMe(MyMutex); // Lock the mutex during this operation. int TotalRecords = MyDataset->ArraySize(); // Calculate the total number of records. int FreeRecords = MyDataset->FreeNodes() * GBUdbRecordsPerNode; // Calculate the number of unused records. int UsedRecords = TotalRecords - FreeRecords; // Calcualte the number of used records. return // Calculate and return as double... ((double) UsedRecords) * 100.0 / // (Used Records * 100) / (TotalRecords) ((double) TotalRecords); } int GBUdb::Posts() { // Number of posts since last snapshot. int CurrentCount = PostsCounter; // Grab the current posts count. return CurrentCount; // Return the count we had. } //// GBUdbDataset implementations ////////////////////////////////////////////// GBUdbDataset::~GBUdbDataset() { // Shutdown a dataset. if(NULL != DataArray) { // If the DataArray was allocated delete[] DataArray; // be sure to delete it and DataArray = NULL; // NULL it's pointer. } MyArraySize = 0; // For safety set the size to zero MyFileName = ""; // and "" the name. } GBUdbDataset::GBUdbDataset(const char* SetFileName) : // Open/Create a dataset. DataArray(NULL), // The array pointer starts as NULL. MyArraySize(0) { // And the size is zero. FileName(SetFileName); // Set the file name if provided. if(0 != MyFileName.length() && (0 == access(MyFileName.c_str(),F_OK))) { // If a file name was provided and exists load(); // then read the file from disk. } else { // If the file name was not provided DataArray = new GBUdbRecord[GBUdbDefaultArraySize]; // then allocate a new Array of MyArraySize = GBUdbDefaultArraySize; // the default size. DataArray[ixNextFreeNode()].RawData = // The first new node is the one GBUdbRootNodeOffset + GBUdbRecordsPerNode; // right after the root node. DataArray[ixMatchListRoot()].RawData = // Once that's up we can use it to newMatchNodeRoot(); // allocate the first MatchNode. } } GBUdbDataset::GBUdbDataset(GBUdbDataset& Original) : // Copy constructor. DataArray(NULL), // The array pointer starts as NULL. MyArraySize(Original.MyArraySize), // Copy the ArraySize MyFileName(Original.MyFileName) { // Copy the name pointer. DataArray = new GBUdbRecord[MyArraySize]; // Allocate a new Array. memcpy(DataArray, Original.DataArray, sizeof(GBUdbRecord) * MyArraySize); // Copy the data wholesale. } const char* GBUdbDataset::FileName(const char* NewName) { // (Re) Set the file name. MyFileName = ""; // Delete any previous file name. if(NULL != NewName) { // If we've been given a non-null cstring MyFileName = NewName; // capture it as our file name. } return MyFileName.c_str(); // Return our new FileName. } //// During the read, it is safe to plow through the array without //// checking because any unknown entry points to the zero node and //// all zero node entries point to the zero node. The read-only //// method does not add new nodes. GBUdbRecord& GBUdbDataset::readRecord(unsigned int IP) { // Read a record. IP = remapIP00toFF(IP); // Make the IP safe for consumption. int a0, a1, a2, a3; // We will break the IP into 4 octets. unsigned int xIP = IP; // Grab a copy of IP to maniuplate. const int LowOctetMask = 0x000000FF; // Mask for seeing the low octet. const int BitsInOneOctet = 8; // Number of bits to shift per octet. a3 = xIP & LowOctetMask; xIP >>= BitsInOneOctet; // Grab the a3 octet and shift the IP. a2 = xIP & LowOctetMask; xIP >>= BitsInOneOctet; // Grab the a2 octet and shift the IP. a1 = xIP & LowOctetMask; xIP >>= BitsInOneOctet; // Grab the a1 octet and shift the IP. a0 = xIP & LowOctetMask; // Grab the final octet. GBUdbIndex RecordIndex = GBUdbRootNodeOffset; // Starting at the root node, follow... RecordIndex = DataArray[RecordIndex + a0].Index(); // Follow the node then if(isMatch(RecordIndex)) { // Check for a shortcut (match record). if(isMatch(RecordIndex, IP)) { return MatchedData(RecordIndex); } // If we have an exact match we're done! else { return SafeUnknownRecord(); } // If we have a mismatch we are lost... } RecordIndex = DataArray[RecordIndex + a1].Index(); // Follow the node then if(isMatch(RecordIndex)) { // Check for a shortcut (match record). if(isMatch(RecordIndex, IP)) { return MatchedData(RecordIndex); } // If we have an exact match we're done! else { return SafeUnknownRecord(); } // If we have a mismatch we are lost... } RecordIndex = DataArray[RecordIndex + a2].Index(); // Follow the node. No more match checks. if(isMatch(RecordIndex)) { // Check for a shortcut (match record). if(isMatch(RecordIndex, IP)) { return MatchedData(RecordIndex); } // If we have an exact match we're done! else { return SafeUnknownRecord(); } // If we have a mismatch we are lost... } return DataArray[RecordIndex + a3]; // Final node has our data :-) } //// dropRecord() //// This code is essentially a hack of the readRecord() code. If it finds //// the record it will return true, mark the record as GBUdbUnknown, reduce //// the IP count, and de-allocate the Match record. Records stored in nodes //// are set to GBUdbUnknown and the node is left in place - otherwise repeated //// add and drop operations would lead to leaking all nodes into the match //// record allocation space. (Node allocation is not a linked list ;-) bool GBUdbDataset::dropRecord(unsigned int IP) { // Drop an IP record. IP = remapIP00toFF(IP); // Make the IP safe for consumption. int a0, a1, a2, a3; // We will break the IP into 4 octets. unsigned int xIP = IP; // Grab a copy of IP to maniuplate. const int LowOctetMask = 0x000000FF; // Mask for seeing the low octet. const int BitsInOneOctet = 8; // Number of bits to shift per octet. a3 = xIP & LowOctetMask; xIP >>= BitsInOneOctet; // Grab the a3 octet and shift the IP. a2 = xIP & LowOctetMask; xIP >>= BitsInOneOctet; // Grab the a2 octet and shift the IP. a1 = xIP & LowOctetMask; xIP >>= BitsInOneOctet; // Grab the a1 octet and shift the IP. a0 = xIP & LowOctetMask; // Grab the final octet. GBUdbIndex RecordIndex = GBUdbRootNodeOffset; // Starting at the root node, follow... GBUdbIndex Node0Index = GBUdbRootNodeOffset; // Keep track of our previous nodes. GBUdbIndex Node1Index = 0; // This node not set yet. GBUdbIndex Node2Index = 0; // This node not set yet. GBUdbIndex Node3Index = 0; // This node not set yet. RecordIndex = DataArray[Node0Index + a0].Index(); // Follow the node then if(isMatch(RecordIndex)) { // Check for a shortcut (match record). if(isMatch(RecordIndex, IP)) { // If we have an exact match we proceed: MatchedData(RecordIndex).RawData = GBUdbUnknown; // Set the data in the match to unknown. DataArray[Node0Index + a0].Index(GBUdbUnknown); // Remove the reference to the match record. deleteMatchAt(RecordIndex); // Reclaim the match record for re-use. decreaseIPCount(); // Reduce the IP count. return true; // Return that we were successful. } else { return false; } // If we have a mismatch we cannot delete. } else { // If this was a Node link then Node1Index = RecordIndex; // capture the node root and get ready } // to follow the next node. RecordIndex = DataArray[Node1Index + a1].Index(); // Follow the node then if(isMatch(RecordIndex)) { // Check for a shortcut (match record). if(isMatch(RecordIndex, IP)) { // If we have an exact match we proceed: MatchedData(RecordIndex).RawData = GBUdbUnknown; // Set the data in the match to unknown. DataArray[Node1Index + a1].Index(GBUdbUnknown); // Remove the reference to the match record. deleteMatchAt(RecordIndex); // Reclaim the match record for re-use. decreaseIPCount(); // Reduce the IP count. return true; // Return that we were successful. } else { return false; } // If we have a mismatch we cannot delete. } else { // If this was a Node link then Node2Index = RecordIndex; // capture the node root and get ready } // to follow the next node. RecordIndex = DataArray[Node2Index + a2].Index(); // Follow the node then if(isMatch(RecordIndex)) { // Check for a shortcut (match record). if(isMatch(RecordIndex, IP)) { // If we have an exact match we proceed: MatchedData(RecordIndex).RawData = GBUdbUnknown; // Set the data in the match to unknown. DataArray[Node2Index + a2].Index(GBUdbUnknown); // Remove the reference to the match record. deleteMatchAt(RecordIndex); // Reclaim the match record for re-use. decreaseIPCount(); // Reduce the IP count. return true; // Return that we were successful. } else { return false; } // If we have a mismatch we cannot delete. } else { // If this was a Node link then Node3Index = RecordIndex; // capture the node root and get ready } // to follow the next node. RecordIndex = Node3Index + a3; // Follow the node. if(GBUdbUnknown != DataArray[RecordIndex].RawData) { // If there is data there then DataArray[RecordIndex].RawData = GBUdbUnknown; // mark the entry as unknown, decreaseIPCount(); // decrease the IP count return true; // and return true. } // If we got all the way to the end and return false; // didn't find a match then return false. } /* Ahhh, the simple life. In a single mode lightning index, each key ** octet lives in a node, so when you grow a new path you either follow ** existing nodes or make new ones. We're not doing that here, but as ** a reference here is how that is usually handled: ** GBUdbIndex GBUdbDataset::invokeAt(GBUdbRecord& R) { // Invoke at Record. if(GBUdbUnknown == R.RawData) { // If the record does not point to a R.Index(newNodeRoot()); // node then give it a new node. } // If the record already has a node return R.Index(); // or we gave it one, then follow it. } */ //// Little helper function for invokeAt() int getOctet(int Octet, unsigned int IP) { // Returns Octet number Octet from IP. const int BitsInOneOctet = 8; // Number of bits to shift per octet. const int LowOctetMask = 0x000000FF; // Mask for seeing the low octet. int BitsToShift = 0; // Assume we want a3 but switch(Octet) { // If we don't, use this handy switch. case 0: { BitsToShift = 3 * BitsInOneOctet; break; } // For octet 0, shift out 3 octets. case 1: { BitsToShift = 2 * BitsInOneOctet; break; } // For octet 1, shift out 2 octets. case 2: { BitsToShift = 1 * BitsInOneOctet; break; } // For octet 2, shift out 1 octets. } // For octet 3, shift none more octets. if(0 < BitsToShift) { // If we have bits to shift then IP >>= BitsToShift; // shift them. } return (IP & LowOctetMask); // Exctract the octet at the bottom. } //// invokeAt() is a helper function that encapsulates the work of growing new //// pathways. There are several cases to handle in a bimodal indexing scheme //// since sometimes you extend new nodes (as commented out above), and some- //// times you create MatchRecords, and sometimes you have collisions and //// have to extend previous matches.... or not. All of that will become clear //// shortly ;-) The good news is that at least invokeAt() is always supposed //// to return the next place to go --- that is, you never get lost because if //// the next step in the path does not exist yet then you create it. GBUdbIndex GBUdbDataset::invokeAt(GBUdbRecord& R, unsigned int IP, int Octet, bool ExtendMatches) { // R is either known (goes somewhere) or unknown (we would be lost). // IF R is UNNKOWN then we ... //// create a match and return it. (No conflict, no extension, no extra node :-) //**** We got out of that one so we're back at the root level. if(GBUdbUnknown == R.RawData) { R.Index(newMatchRecord(IP)); return R.Index(); } // ELSE R is KNOWN then it either points to a MatchRecord or a Node. //// IF R points to a Node then we will simply follow it. //**** We got out of that one so we're back at the root level. if(!isMatch(R.Index())) { return R.Index(); } // ELSE R points to a MatchRecord then we get more complex. //// IF the MatchRecord matches our IP then we simply follow it. //**** We got out of that one so we're back at the root level. if(isMatch(R.Index(),IP)) { return R.Index(); } // ELSE the MatchRecord does not match then we get more complex again... //// IF we are Extending Matches then we... ////// create a new node ////// push the existing match onto the new node ////// and create a new match for the new IP on that node. ////// since we already have the solution we return the new match node index (skip a step). //**** We got out of that one so we're back at the root level. if(ExtendMatches) { // If we are extending matches GBUdbIndex I = newNodeRoot(); // we create a new node. int NewSlotForCurrentMatch = // Locate the slot in that node where getOctet( // the current match should reside Octet + 1, // based on the octet after this one DataArray[R.Index()] // by extracting that octet from .RawData); // the MatchReord header. // Then we put the current match into DataArray[I + NewSlotForCurrentMatch].Index(R.Index()); // the correct slot on the new node, return R.Index(I); // point the current slot to that node } // and return the node to be followed. // ELSE we are NOT Extending Matches then we... // ** KNOW that we are adding node a3 and dealing with the final octet ** //// create a new node //// map the existing match data into the new node. //// delete the existing match (for reallocation). deleteMatchAt(GBUdbIndex I) //// map the new IP into the new node. GBUdbIndex I = newNodeRoot(); // Create a new node. int NewSlotForCurrentMatch = // Locate the slot in that node where getOctet( // the current match should reside Octet + 1, // based on the octet after this one DataArray[R.Index()] // by extracting that octet from .RawData); // the MatchReord header. if(ExtendMatches) { // If we are extending matches... // then we put the current match into DataArray[I + NewSlotForCurrentMatch].Index(R.Index()); // the correct slot on the new node. } else { // If we are not extending matches... // then we must be at the end node so DataArray[I + NewSlotForCurrentMatch].RawData = // we copy in the data from MatchedData(R.Index()).RawData; // the current MatchRecord, deleteMatchAt(R.Index()); // and return the MatchRecord for re-use. } return R.Index(I); // Point the current slot to new node } // and return that node index to follow. //// The "invoke" method creates all of the needed nodes starting //// at any point where an "unwknown" entry is found. GBUdbRecord& GBUdbDataset::invokeRecord(unsigned int IP) { // Invoke a record. if(FreeNodes() < GBUdbGrowthThreshold) grow(); // If we need more space, make more. IP = remapIP00toFF(IP); // Make the IP safe for consumption. int a0, a1, a2, a3; // We will break the IP into 4 octets. unsigned int xIP = IP; // Grab a copy of IP to maniuplate. const int LowOctetMask = 0x000000FF; // Mask for seeing the low octet. const bool Extend = true; // Magic number for extending Matches. const bool DoNotExtend = false; // Magic number for NOT extending them. const int BitsInOneOctet = 8; // Number of bits to shift per octet. a3 = xIP & LowOctetMask; xIP >>= BitsInOneOctet; // Grab the a3 octet and shift the IP. a2 = xIP & LowOctetMask; xIP >>= BitsInOneOctet; // Grab the a2 octet and shift the IP. a1 = xIP & LowOctetMask; xIP >>= BitsInOneOctet; // Grab the a1 octet and shift the IP. a0 = xIP & LowOctetMask; // Grab the final octet. GBUdbIndex RecordIndex = GBUdbRootNodeOffset; // Starting at the root node, RecordIndex = invokeAt(DataArray[RecordIndex + a0], IP, 0, Extend); // Invoke w/ possible match outcome. if(isMatch(RecordIndex, IP)) { // If this resulted in a match GBUdbRecord& Result = MatchedData(RecordIndex); // then we will grab the match data increaseIPCountIfNew(Result); // and increase the IP count if it's new. return Result; // Then we return the result. Done! } RecordIndex = invokeAt(DataArray[RecordIndex + a1], IP, 1, Extend); // Invode w/ possible match outcome. if(isMatch(RecordIndex, IP)) { // If this resulted in a match GBUdbRecord& Result = MatchedData(RecordIndex); // then we will grab the match data increaseIPCountIfNew(Result); // and increase the IP count if it's new. return Result; // Then we return the result. Done! } RecordIndex = invokeAt(DataArray[RecordIndex + a2], IP, 2, DoNotExtend); // Invode w/ possible match outcome. if(isMatch(RecordIndex, IP)) { // If this resulted in a match GBUdbRecord& Result = MatchedData(RecordIndex); // then we will grab the match data increaseIPCountIfNew(Result); // and increase the IP count if it's new. return Result; // Then we return the result. Done! } GBUdbRecord& Result = DataArray[RecordIndex + a3]; // Grab the record at the final node. increaseIPCountIfNew(Result); // If new, increase the IP count. return Result; // Return the record. } void GBUdbDataset::save() { // Flush the GBUdb to disk. std::string TempFileName = MyFileName + ".tmp"; // Calculate temp and std::string BackFileName = MyFileName + ".bak"; // backup file names. std::ofstream dbFile; // Grab a file for writing. dbFile.open(TempFileName.c_str(), // Open the file in binary mode std::ios::out | std::ios::binary | std::ios::trunc); // and truncate if present. dbFile.write((char*)DataArray, sizeof(GBUdbRecord) * MyArraySize); // Write our array into the file. bool AllOK = dbFile.good(); // Are we happy with this? dbFile.close(); // Close the file when done to be nice. if(AllOK) { // If everything appears to be ok unlink(BackFileName.c_str()); // Delete any old backup file we have rename(MyFileName.c_str(), BackFileName.c_str()); // and make the current file a backup. rename(TempFileName.c_str(), MyFileName.c_str()); // Then make our new file current. } } const cd::RuntimeCheck SaneFileSizeCheck("GBUdbDataset::load():SaneFileSizeCheck(SaneGBUdbFileSizeLimit <= FileSize)"); void GBUdbDataset::load() { // Read the GBUdb from disk. std::ifstream dbFile; // Grab a file for reading. dbFile.open(MyFileName.c_str(), std::ios::in | std::ios::binary); // Open the file with the name we have. dbFile.seekg(0, std::ios::end); // Go to the end of the int FileSize = dbFile.tellg(); // file and back so we can dbFile.seekg(0, std::ios::beg); // determine it's size. int SaneGBUdbFileSizeLimit = (GBUdbDefaultArraySize * sizeof(GBUdbRecord)); // What is a sane size limit? SaneFileSizeCheck(SaneGBUdbFileSizeLimit <= FileSize); // File size sanity check. int NewArraySize = FileSize / sizeof(GBUdbRecord); // How many records in this file? if(NULL != DataArray) { // If we have an array loaded then delete[] DataArray; // delete the array, DataArray = NULL; // NULL it's pointer, MyArraySize = 0; // and zero it's size. } DataArray = new GBUdbRecord[NewArraySize]; // Allocate an array of the proper size MyArraySize = NewArraySize; // set the local size variable dbFile.read((char*)DataArray,FileSize); // and read the file into the array. dbFile.close(); // Close when done to be nice. } void GBUdbDataset::grow(int HowManyNodes) { // Grow the DataArray. int NewArraySize = MyArraySize + (HowManyNodes * GBUdbRecordsPerNode); // Calcualte the new array size. GBUdbRecord* NewDataArray = new GBUdbRecord[NewArraySize]; // Allocate the new array. int OldArrayLessControl = MyArraySize + GBUdbControlNodeOffset; // Include all records but no control. memcpy(NewDataArray, DataArray, sizeof(GBUdbRecord) * OldArrayLessControl); // Copy the old data to the new array. for( // Loop through the control nodes... int o = MyArraySize + GBUdbControlNodeOffset, // o = old node index n = NewArraySize + GBUdbControlNodeOffset, // n = new node index c = GBUdbRecordsPerNode; // c = the record count (how many to do). c > 0; // For until we run out of records, c--) { // decrementing the count each time, NewDataArray[n].RawData = DataArray[o].RawData;n++;o++; // Copy the old control data. } delete[] DataArray; // Delete the old data array. DataArray = NewDataArray; // Swap in the new data array. MyArraySize = NewArraySize; // Correct the size value. } GBUdbIndex GBUdbDataset::newMatchRecord(unsigned int IP) { // Allocate a new Match record for IP. GBUdbIndex I = DataArray[ixMatchListRoot()].RawData; // Grab the root unused Match Record index. GBUdbRecord& R = DataArray[I]; // Grab the record itself and inspect it. if((R.RawData & GBUdbFlagsMask) != GBUdbMatchUnusedBit) { // Check that this looks like an throw MatchAllocationCorrupted(); // unused match record and if not throw! } // If all is well then lets proceed. //// First, let's heal the linked list for future allocations. if(GBUdbMatchUnusedBit == R.RawData) { // If the match record we are on is DataArray[ixMatchListRoot()].RawData = // the last in the list then allocate newMatchNodeRoot(); // a new MatchListNode for the next } else { // allocation. However, if there are DataArray[ixMatchListRoot()].RawData = // more records left in the list then (R.RawData & GBUdbMatchDataMask); // set up the next node for the next } // allocation. //// Once that's done we can use the record we have for real data. R.RawData = EncodedMatch(IP); // Encode the match record for the IP. return I; // Return the match record's index. } GBUdbIndex GBUdbDataset::newMatchNodeRoot() { // Allocate a new Match node. GBUdbIndex I = newNodeRoot(); // Grab a new node to convert. int iLastMatch = GBUdbRecordsPerNode - 2; // Calc the localized i for last match. for(int i = 0; i < iLastMatch; i+=2) { // Loop through the node DataArray[I+i].RawData = GBUdbMatchUnusedBit | (I+i+2); // Build a linked list of Unused Match DataArray[I+i+1].RawData = GBUdbUnknown; // records with empty data. } DataArray[I+iLastMatch].RawData = GBUdbMatchUnusedBit; // The last record gets a NULL index DataArray[I+iLastMatch+1].RawData = GBUdbUnknown; // and null data to terminate the list. return I; // Return the root index. } // doForAllRecords() // This method uses a recursive call to doAllAtNode() // doAllAtNode sweeps through each record in a node and processes any // node entries through the next level (calling itself) or directly if // the node is node3, or if it's pointing to a match record. void GBUdbDataset::updateWorkingIP(unsigned int& WIP, int OctetValue, int Level) { // Update the Working IP (WIP) at octet Level switch(Level) { case 0: { // For the node zero address, WIP = WIP & 0x00FFFFFF; // Mask out the node zero bits. OctetValue = OctetValue << 24; // Shift the octet value into position. WIP = WIP | OctetValue; // Or the octet value bits into place. break; } case 1: { WIP = WIP & 0xFF00FFFF; // Mask out the node zero bits. OctetValue = OctetValue << 16; // Shift the octet value into position. WIP = WIP | OctetValue; // Or the octet value bits into place. break; } case 2: { WIP = WIP & 0xFFFF00FF; // Mask out the node zero bits. OctetValue = OctetValue << 8; // Shift the octet value into position. WIP = WIP | OctetValue; // Or the octet value bits into place. break; } case 3: { WIP = WIP & 0xFFFFFF00; // Mask out the node zero bits. WIP = WIP | OctetValue; // Or the octet value bits into place. break; } } } //// Note about doAllAtNode(). The x.x.x.0 address is skipped on purpose. This //// is because all x.x.x.0 addresses are mapped to x.x.x.255. By skipping this //// address and starting at x.x.x.1 in any search, we do not need to check for //// x.x.x.0 ips that were remapped. They will simply appear at x.x.x.255. void GBUdbDataset::doAllAtNode( // Recursively call O with all valid records. GBUdbIndex I, // Input the node index. GBUdbOperator& O, // Input the Operator to call. int NodeLevel, // Input the NodeLevel. unsigned int WIP // Input the working IP. ) { int FirstI = (3 > NodeLevel) ? 0 : 1; // Skip any x.x.x.0 addresses. for(int i = FirstI; i < GBUdbRecordsPerNode; i++) { // Loop through the slots in this node. GBUdbIndex RecordIndex = DataArray[I + i].Index(); // Get the record index for this slot. if(GBUdbUnknown != RecordIndex) { // Check that this slot is not empty. updateWorkingIP(WIP, i, NodeLevel); // If we've got something then update the WIP. if(3 > NodeLevel) { // If we are working in rootward nodes: if(isMatch(RecordIndex)) { // Check for a match record. If we have one then unsigned int MatchIP = WIP & 0xFF000000; // build the IP for the match from the root MatchIP |= (DataArray[RecordIndex].RawData & 0x00FFFFFF); // of the WIP and the match IP data. O(MatchIP, MatchedData(RecordIndex)); // Then call the operator with the matched data. // If this slot is not a match record } else { // then it is a node address so we will doAllAtNode(RecordIndex, O, NodeLevel+1, WIP); // recurse to that node at a deeper level. } } else { // If we are working in the last node then O(WIP, DataArray[I + i]); // call the Operator with this IP & Record. } // All known data values in the last node are } // actual data records after all. } } void GBUdbDataset::doForAllRecords(GBUdbOperator& O) { // Call O for every valid record. unsigned int WorkingIP = 0; // A working IP for all levels to use. int NodeLevel = 0; // The Node level where we start. doAllAtNode(GBUdbRootNodeOffset, O, NodeLevel, WorkingIP); // Start at the root node, level 0. } //// GBUdb Implementations ///////////////////////////////////////////////////// bool AlertFor(int count) { // True if an alert is needed. return ( // We want an alert whenever a count 0x00000001 == count || // hits any of these thresholds. Each 0x00000002 == count || // threshold is a new bit position 0x00000004 == count || // indicating that the count has 0x00000008 == count || // achieved a new power of 2. This 0x00000010 == count || // mechanism insures that newer IPs 0x00000020 == count || // get lots of attention while long 0x00000040 == count || // standing IPs still get visited 0x00000080 == count || // from time to time as their activity 0x00000100 == count || // continues. 0x00000200 == count || 0x00000400 == count || 0x00000800 == count || 0x00001000 == count || 0x00002000 == count || 0x00004000 == count ); } cd::RuntimeCheck GoodTimestampLength("GBUdb.cpp:getTimestamp snprintf(...) == CorrectTimestampLength"); char* getTimestamp(char* TimestampBfr) { // Creates an ISO GMT timestamp. time_t rawtime; // Get a timer and tm * gmt; // a time structure. time(&rawtime); // Grab the current time and gmt=gmtime(&rawtime); // convert it to GMT. size_t l = snprintf(TimestampBfr,UTCBufferSize, "%04d%02d%02d%02d%02d%02d", // Format yyyymmddhhmmss gmt->tm_year+1900, gmt->tm_mon+1, gmt->tm_mday, gmt->tm_hour, gmt->tm_min, gmt->tm_sec ); const size_t CorrectTimestampLength = 4+2+2+2+2+2; GoodTimestampLength(l == CorrectTimestampLength); return TimestampBfr; } char* getIPString(unsigned int IP, char* bfr) { // Converts an IP to a string. int a0, a1, a2, a3; // We will break the IP into 4 octets. const int LowOctetMask = 0x000000FF; // Mask for seeing the low octet. const int BitsInOneOctet = 8; // Number of bits to shift per octet. a3 = IP & LowOctetMask; IP >>= BitsInOneOctet; // Grab the a3 octet and shift the IP. a2 = IP & LowOctetMask; IP >>= BitsInOneOctet; // Grab the a2 octet and shift the IP. a1 = IP & LowOctetMask; IP >>= BitsInOneOctet; // Grab the a1 octet and shift the IP. a0 = IP & LowOctetMask; // Grab the final octet. sprintf(bfr,"%d.%d.%d.%d",a0,a1,a2,a3); return bfr; } void GBUdb::recordAlertFor(unsigned int IP, GBUdbRecord& R, unsigned int C) { // Record an alert event for R if needed. if(AlertFor(C)) { // If an alert is needed at this level... GBUdbAlert NewAlert; // Create a new alert record. NewAlert.IP = IP; // Assign the IP. NewAlert.R = R; // Assign the Record. cd::ScopeMutex JustMe(AlertsMutex); // Lock the alerts list mutex. MyAlerts.push_back(NewAlert); // Add our new alert to the list. } } GBUdbAlert::GBUdbAlert() : // Default constructor gets timestamp. IP(0) { // IP to zero, R will init to zero getTimestamp(UTC); // on it's own... Get timestamp. } std::string GBUdbAlert::toXML() { // Convert this alert to XML text std::stringstream Alert; // We'll use a stringstream. const char* FlagName = "ERROR"; // We will want the Flag as text. switch(R.Flag()) { // Switch on the Flag() value. case Good: { FlagName = "Good"; break; } // Convert each value to it's name. case Bad: { FlagName = "Bad"; break; } case Ugly: { FlagName = "Ugly"; break; } case Ignore: { FlagName = "Ignore"; break; } } char IPStringBfr[20]; // We need a buffer for our IP. Alert << ""; // That's the end. return Alert.str(); // Return the string. } //// Alert import and export - for sharing data between nodes. void GBUdb::GetAlerts(std::list& ListToFill) { // Get all current alerts & clear; ListToFill.clear(); // Clear out the list to fill. cd::ScopeMutex JustMe(AlertsMutex); // Lock for a moment. ListToFill = MyAlerts; // Copy our alerts to the new list. MyAlerts.clear(); // Clear our alerts. } // In order to allow gbudb nodes to interact without swamping their individuality, // the default mode for integrating thier data is to represent the remote peer's // influence on a logarithmic scale. unsigned int rescaleGBUdbCount(unsigned int C) { // Rescale count C for integration. if(C < 0x00000001) { return 0; } else // Log2, really, .. the short way. if(C < 0x00000002) { return 1; } else // How many significant bits are in if(C < 0x00000004) { return 2; } else // the number. Put another way, what if(C < 0x00000008) { return 3; } else // power of 2 is required to for if(C < 0x00000010) { return 4; } else // this number. if(C < 0x00000020) { return 5; } else if(C < 0x00000040) { return 6; } else if(C < 0x00000080) { return 7; } else if(C < 0x00000100) { return 8; } else if(C < 0x00000200) { return 9; } else if(C < 0x00000400) { return 10; } else if(C < 0x00000800) { return 11; } else if(C < 0x00001000) { return 12; } else if(C < 0x00002000) { return 13; } else if(C < 0x00004000) { return 14; } else return 15; } void GBUdb::ImportAlerts(std::list& PeerAlerts) { // Integrate peer alerts using log2. std::list::iterator iA; for(iA = PeerAlerts.begin(); iA != PeerAlerts.end(); iA++) { // Go through the list of PeerAlerts. GBUdbRecord R = (*iA).R; // Grab the Record in this alert. R.Bad(rescaleGBUdbCount(R.Bad())); // Adjust the bad and good counts R.Good(rescaleGBUdbCount(R.Good())); // for integration. adjustCounts((*iA).IP, R); // Adjust the local counts w/ R. } } //// doForAllRecords //// This method handles GBUdbOperators and their locking semantics. //// For full dataset locking the mutex is acquired before calling the //// dataset's doForAllRecords(). For record locking, the O passed to //// this method is wrapped in a record locking shim (below) and that is //// passed to the dataset. If None is selected then the Operator is //// passed to the dataset as is -- assuming that the Operator will handle //// it's own locking as needed. class GBUdbRecordLockingShim : public GBUdbOperator { // Record locking shim for doForAllRecords. private: GBUdbOperator& MyOperator; // Reference the Operator we will be servicing. cd::Mutex& MyMutex; // Reference the Mutex for the GBUdb we are in. public: GBUdbRecordLockingShim(GBUdbOperator& O, cd::Mutex& M) : // On construction we grab our critical pieces. MyOperator(O), MyMutex(M) { } GBUdbRecord& operator()(unsigned int IP, GBUdbRecord& R) { // When our operator() is called cd::ScopeMutex JustMe(MyMutex); // we lock the mutex in scope and return MyOperator(IP, R); // call the Operator we're servicing. } // When we leave scope we unlock (see above). }; void GBUdb::doForAllRecords(GBUdbOperator& O, GBUdbLocking L) { // Calls O(IP, Record) w/Every record. if(Dataset == L) { // If we are locking for the Dataset, then cd::ScopeMutex JustMe(MyMutex); // we will lock the mutex during this MyDataset->doForAllRecords(O); // entire operation. } else if(Record == L) { // If we are locking per record then GBUdbRecordLockingShim X(O, MyMutex); // we create a record locking shim instance MyDataset->doForAllRecords(X); // and call O() through that. } else { // If locking is NOT enabled, then MyDataset->doForAllRecords(O); // we will call O() without any locking. } } //// The saveSnapshot() method allows us to save a snapshot of our dataset //// while keeping the mutex locked for as short a time as possible: Just long //// enough to make a copy of the dataset in RAM. void GBUdb::saveSnapshot() { // Saves a snapshot of the current db. GBUdbDataset* Snapshot = NULL; // We need a pointer for our snapshot. if(NULL == MyDataset) { // If we do not have a dataset to copy return; // then we simply return. } else { // If we do have a Dataset to copy... cd::ScopeMutex JustMe(MyMutex); // Lock the mutex and Snapshot = new GBUdbDataset(*MyDataset); // make a copy in memory. } // Then we can unlock the mutex. Snapshot->save(); // Then outside the mutex we can save. delete Snapshot; // Once saved we can delete the snapshot. PostsCounter = 0; // Reset the posts counter. } //// reduce() //// Using the doForAllRecords() functionality, this method reduces all counts //// by 2 thus renormalizing all records at lower count values. Unknown flagged //// records who's counts drop to zero will achieve the state GBUdbUnknown. As //// such, those values would not be carried over in a compress() operation. class ReduceAll : public GBUdbOperator { // To reduce the good and bad counts. public: GBUdbRecord& operator()(unsigned int IP, GBUdbRecord& R) { // Given each record, R.Good(R.Good() >> 1); // Reduce the Good count by half. R.Bad(R.Bad() >> 1); // Reduce the Bad count by half. return R; // Return the record. } } ReduceAllOperator; void GBUdb::reduce() { // Reduce all counts by half. doForAllRecords(ReduceAllOperator); // Call do for all records with the } // ReduceAllOperator. //// compress() //// Using the doForAllRecords() functionality, this method creates a temporary //// dataset, copies the existing data into that dataset except where the data //// is GBUdbUnknown, and then swaps the new dataset in place of the old. class CompressAll : public GBUdbOperator { private: GBUdbDataset* MyOldDataset; // Where do we find the old dataset. GBUdbDataset* MyNewDataset; // Where do we store our new dataset. int CountConverted; int CountDropped; public: // Note - There is no destructor. It is expected that the calling function // will extract the NewDataset and replace the OldDataset when the operation // has been successful. CompressAll(GBUdbDataset* OldDataset) : // Startup by MyOldDataset(OldDataset), // Grabbing the old dataset, MyNewDataset(NULL), // The new one isn't there yet. CountConverted(0), // Converted and Dropped CountDropped(0) { // Counts are zero. MyNewDataset = new GBUdbDataset(NULL); // Allocate a new Dataset. MyNewDataset->FileName(OldDataset->FileName()); // Set it's name the same as the old. } // We don't want to Load() it that way ;-) GBUdbRecord& operator()(unsigned int IP, GBUdbRecord& R) { // The ForAll Operator goes like this... if(GBUdbUnknown != R.RawData) { // If the record is not GBUdbUnknown then MyNewDataset->invokeRecord(IP).RawData = R.RawData; // invoke it and copy it's data. ++CountConverted; // Increment the converted count. } else { // If the record is GBUdbUnknown then ++CountDropped; // count it as dropped and forget it. } return R; // Return the record reference. } GBUdbDataset* Old() {return MyOldDataset;} // Here we can get our OldDataset pointer. GBUdbDataset* New() {return MyNewDataset;} // Here we can get our NewDataset pointer. int Converted() {return CountConverted;} // Here we can get the converted count. int Dropped() {return CountDropped;} // Here we can get the dropped count. }; void GBUdb::compress() { // Remove any unknown records (reduced to zero). CompressAll BuildCompressedDataset(MyDataset); // Create a CompressAll operator for this dataset. cd::ScopeMutex Freeze(MyMutex); // Lock the mutex for the rest of this operation. MyDataset->doForAllRecords(BuildCompressedDataset); // Copy all of the active data records. MyDataset = BuildCompressedDataset.New(); // Put the new dataset in place. delete BuildCompressedDataset.Old(); // Delete the old dataset. } // All done, so we're unlocked. int GBUdb::readIgnoreList(const char* FileName) { // setIgnore for a list of IPs int IPCount = 0; // Keep track of the IPs we read. try { // Capture any exceptions. char IPLineBuffer[256]; // Create a line buffer. const int SafeBufferSize = sizeof(IPLineBuffer) - 1; // Safe size always leaves a NULL on the end. std::ifstream ListFile(FileName, std::ios::in); // Open up the list file. while(ListFile.good()) { // While we've got a good file (not eof) memset(IPLineBuffer, 0, sizeof(IPLineBuffer)); // Clear the buffer. ListFile.getline(IPLineBuffer, SafeBufferSize); // Read the line. (safely NULL terminated) // Now we have an IP on a line (in theory). We will parse // the ip and process any that parse correctly. // First eat anything that's not a digit. unsigned long IP = 0L; // We need an IP buffer. char* cursor = IPLineBuffer; // Start on the first byte. if('#' == *cursor) continue; // Lines that start with # are comments. while(0 < *cursor && isspace(*cursor)) ++cursor; // Eat any leading spaces. // First octet. if(!isdigit(*cursor)) continue; // If it's not a digit skip this line. if(255 < atoi(cursor)) continue; // If the octet is out of range skip! IP += atoi(cursor); IP <<= 8; // Grab the first int and shift it. while(isdigit(*cursor)) ++cursor; // Eat those digits. if('.'!=(*cursor)) continue; // If we don't find a dot skip this line. ++cursor; // If we do, skip the dot. // Second octet. if(!isdigit(*cursor)) continue; // If we're not at digit skip this line. if(255 < atoi(cursor)) continue; // If the octet is out of range skip! IP += atoi(cursor); IP <<= 8; // Grab the octet and shift things left. while(isdigit(*cursor)) ++cursor; // Eat those digits. if('.'!=(*cursor)) continue; // If we don't find a dot skip this line. ++cursor; // If we do, skip the dot. // Third octet. if(!isdigit(*cursor)) continue; // If we're not at digit skip this line. if(255 < atoi(cursor)) continue; // If the octet is out of range skip! IP += atoi(cursor); IP <<= 8; // Grab the octet and shift things left. while(isdigit(*cursor)) ++cursor; // Eat those digits. if('.'!=(*cursor)) continue; // If we don't find a dot skip this line. ++cursor; // If we do, skip the dot. // Last octet. if(!isdigit(*cursor)) continue; // If we're not at a digit skip this line. if(255 < atoi(cursor)) continue; // If the octet is out of range skip! IP += atoi(cursor); // Grab the octet. IP finished! setIgnore(IP); // Set the IP to Ignore. ++IPCount; // Bump the IP count. } ListFile.close(); } catch(...) { } // If we have an exception we stop. return IPCount; // Always return the number of lines read. }