// snf_HeaderFinder.cpp // Copyright (C) 2007 - 2009 ARM Research Labs, LLC. // See www.armresearch.com for the copyright terms. // // See snf_HeaderFinder.hpp for details #include "snf_HeaderFinder.hpp" #include "snfLOGmgr.hpp" #include "snfCFGmgr.hpp" const int NumberOfByteValues = 256; // Number of possible byte values. HeaderFinder::HeaderFinder( // To construct one of these: snfScanData* EngineScanData, // -- Scanner control data ptr. const HeaderDirectiveSet& Patterns, // -- this is the set of patterns. const unsigned char* MessageBuffer, // -- this is the message buffer. const int MessageLength // -- this is the length of the buffer. ) : ScanData(EngineScanData), // Grab the scan control block. HeaderDirectives(Patterns), // Grab the Directives and Bfr(MessageBuffer), // the message buffer. Len(MessageLength), ImpossibleBytes(NumberOfByteValues, false), // Clear the impossible bytes cache. Directives(0) { // Zero the composite result. UnfoldHeaders(); // Unfold the headers. } void HeaderFinder::CheckContent(string& Header, const HeaderFinderPattern& P) { // Check for a match in the header. if(string::npos != Header.find(P.Contains, P.Header.length())) { // If we find the required contents: /*** if/else laddar - too complex for switch ***/ if( HeaderDirectiveBypass == P.Directive || // If this is a bypass directive or HeaderDirectiveWhite == P.Directive // a white header directive: ) { Directives |= P.Directive; // Add the flags to our output. } else if(HeaderDirectiveDrillDown == P.Directive) { // If this is a DrillDown rule ScanData->drillPastOrdinal(P.Ordinal); // mark the IP DrillDown flag. Directives |= P.Directive; // Add the flags to our output. } else if(HeaderDirectiveContext == P.Directive) { // If this is a context activation ActivatedContexts.insert(P.Context); // header then activate the context. Directives |= P.Directive; // Add the flags to our output. } else if( // Are we forcing the message source? HeaderDirectiveSource == P.Directive && // If we matched a source directive and false == ScanData->FoundSourceIP() && // the source is not already set and ActivatedContexts.end() != ActivatedContexts.find(P.Context) // and the source context is active then ) { // we set the source from this header. // Extract the IP from the header. const string digits = "0123456789"; // These are valid digits. int IPStart = Header.find_first_of(digits, P.Header.length()); // Find the first digit in the header. if(string::npos == IPStart) return; // If we don't find it we're done. const string ipchars = ".0123456789"; // These are valid IP characters. int IPEnd = Header.find_first_not_of(ipchars, IPStart); // Find the end of the IP. if(string::npos == IPEnd) IPEnd = Header.length(); // Correct for end of string cases. ScanData->HeaderDirectiveSourceIP( // Extract the IP from the header and Header.substr(IPStart, (IPEnd - IPStart)) // expose it to the calling scanner. ); Directives |= P.Directive; // Add the flags to our output. } } } void HeaderFinder::MatchHeaders(string& Header) { // Check that the header matches. if(0 >= Header.length()) return; // If there's nothing to look at, done! HeaderFinderPattern Key; // We will need a handy key. Key.Header.push_back(Header.at(0)); // Set up a minimal header string. HeaderDirectiveIterator iK = HeaderDirectives.lower_bound(Key); // Locate the lower bound. // At this point we have found a reasonable starting place for the // header directives that might match this header. We will scan through // them looking for a match. Since all matches should be grouped together // in the set we will set a flag so that on the first non-match after that // we can stop looking. int CurrentOrdinal = 0; // Keep the current ordinal in scope. bool FoundFirstMatch = false; // Have we found our first match? for(;iK != HeaderDirectives.end();iK++) { // Scan through the directives. const HeaderFinderPattern& P = (*iK); // Make a handy handle. if(0 == Header.compare(0, P.Header.length(), P.Header)) { // Check for a matching header. if(false == FoundFirstMatch) { // If this is our first match FoundFirstMatch = true; // then set our first match flag CurrentOrdinal = Ordinals[P.Header]; // and get the Ordinal. Then increment Ordinals[P.Header] = CurrentOrdinal + 1; // the Ordinal for next time. } if(CurrentOrdinal == P.Ordinal) { // If the Ordinal matches our Directive CheckContent(Header, P); // then check the content of the header. } else if(CurrentOrdinal < P.Ordinal) { // If we're into Directives bigger than return; // our Ordinal then we're done. } } else { // If the header doesn't match and we if(FoundFirstMatch) return; // were matching before then we're done. if(Header.at(0)!=P.Header.at(0)) return; // If first bytes don't match, so done! } } // Move on to the next directive. } bool HeaderFinder::ByteIsImpossible(unsigned char b) { // Is b not first byte of any pattern? if(ImpossibleBytes[b]) return true; // Don't look if we already know. HeaderFinderPattern Key; // We will need a handy key. Key.Header.push_back(b); // Set up a minimal header string. HeaderDirectiveIterator iK = HeaderDirectives.lower_bound(Key); // Locate the lower bound. if(iK == HeaderDirectives.end()) return (ImpossibleBytes[b] = true); // If we find nothing or the first byte if((*iK).Header.at(0) != b) return (ImpossibleBytes[b] = true); // byte doesn't match it's impossible. return false; // Otherwise we might find it ;-) } bool TrimToNextHeader(int& Pos, const unsigned char* Bfr, const int Len) { // Move Pos & check for EOH. for(;(Pos < (Len-2));Pos++) { // Scan through the Bfr (stay in range). switch(Bfr[Pos]) { // React to the byte at hand: case '\t': case '\r': case ' ': { // Ordinary spaces and \r we skip. break; } case '\n': { // On Newlines we check to see if if( // this is the end of the headers. ('\r' == Bfr[Pos+1] && '\n' == Bfr[Pos+2]) || // Either \n\r\n or ('\n' == Bfr[Pos+1] ) // \n\n means EOH. ) { return false; // If EOH, no more headers, send false. } break; // If not EOH then keep going. } default: { // Any other byte and we are done. return true; // We have another header, send true. } } } // If we run out of bytes then we return false; // are also out of headers, send false. } void eatThisHeader(int& Pos, const unsigned char* Bfr, const int Len) { // Eat up to the next header. for(;(Pos < (Len-1));Pos++) { // Scan through this header. if('\n' == Bfr[Pos]) { // When we get to a new line check if(' ' == Bfr[Pos+1] || '\t' == Bfr[Pos+1]) continue; // for and skip any folding. Anything return; // other than folding and we're done. } } } void eatOrdinarySpace(int& Pos, const unsigned char* Bfr, const int Len) { // Eat all spaces (dedup, unfold, etc) for(;Pos < Len;Pos++) { // Scan through the buffer. switch(Bfr[Pos]) { // React to each byte. case ' ': // Simply skip all ordinary spaces case '\t': { // or tabs. break; } default: { // At the first other byte return; // we are done. } } } } void captureThisHeader( // Capture the header and move pos. string& Output, // Here is the output string. int& Pos, // Here is the current position. const unsigned char* Bfr, // Here is the buffer pointer. const int Len // Here is the length of the buffer. ) { Output.clear(); // Clear the output. for(;(Pos < (Len-1)); Pos++) { // Scan through the header. switch(Bfr[Pos]) { // React to each byte. case '\r': { // If we find a ignore it. break; } case '\n': { // If we find a check for folding. if(' ' == Bfr[Pos+1] || '\t' == Bfr[Pos+1]) { // If we find folding then ++Pos; // move to the space eatOrdinarySpace(Pos, Bfr, Len); // and gobble it up. Output.push_back(' '); // output a single ordinary space --Pos; // and drop back one for the loop's ++. } else { // If the wasn't part of a fold return; // then we are done with this header. } break; // Skip the rest of the switch. } case '\t': // When we come across a tab or case ' ': { // a space then we will eat them eatOrdinarySpace(Pos, Bfr, Len); // and any extras so they are converted Output.push_back(' '); // into a single ordinary space. --Pos; // Drop back one for the loop's ++. break; } default: { // For all ordinary bytes we simply Output.push_back(Bfr[Pos]); // add the byte to the string. break; } } } } void HeaderFinder::UnfoldHeaders() { // Unfold and check headers. if(0 >= HeaderDirectives.size()) return; // Skip this if we have no patterns. if(0 >= Len) return; // Skip if we have no message. string TestHeader; // The header under test. int Position = 0; // Position in Bfr. for(;;) { // Scan through all of the headers. // Skip any leading or leftover whitespace. Be sure to exit when we // reach a blank new line. The capture routine later on will not eat // the white space - that way we can check for the EOH in this one spot. if(false == TrimToNextHeader(Position, Bfr, Len)) return; // If no more headers then we're done. // Skip Impossible Headers -- no such first character. if(ByteIsImpossible(Bfr[Position])) { // If we have no patterns for this eatThisHeader(Position, Bfr, Len); // header then skip it and continue on continue; // to the next one. } // Capture and unfold the header to test. captureThisHeader(TestHeader, Position, Bfr, Len); // Unfold the header into TestHeader. // Test the header. MatchHeaders(TestHeader); // Match and activate header directives. } }