// snf_HeaderFinder.cpp // Copyright (C) 2007 - 2009 ARM Research Labs, LLC. // See www.armresearch.com for the copyright terms. // // See snf_HeaderFinder.hpp for details #include "snf_HeaderFinder.hpp" #include "snfLOGmgr.hpp" #include "snfCFGmgr.hpp" const int NumberOfByteValues = 256; // Number of possible byte values. HeaderFinder::HeaderFinder( // To construct one of these: snfScanData* EngineScanData, // -- Scanner control data ptr. const HeaderDirectiveSet& Patterns, // -- this is the set of patterns. const unsigned char* MessageBuffer, // -- this is the message buffer. const int MessageLength // -- this is the length of the buffer. ) : ScanData(EngineScanData), // Grab the scan control block. HeaderDirectives(Patterns), // Grab the Directives and Bfr(MessageBuffer), // the message buffer. Len(MessageLength), ImpossibleBytes(NumberOfByteValues, false), // Clear the impossible bytes cache. Directives(0) { // Zero the composite result. UnfoldHeaders(); // Unfold the headers. } IP4Address extractIPFromSourceHeader(string& Header) { // Return first IP found in header. const string Digits = "0123456789"; unsigned int EndOfName = Header.find_first_of(":"); unsigned int StartOfIP = Header.find_first_of(Digits, EndOfName); const string IPCharacters = ".0123456789"; unsigned int EndOfIP = Header.find_first_not_of(IPCharacters, StartOfIP); bool NoExtraCharactersAfterIP = (string::npos == EndOfIP); if(NoExtraCharactersAfterIP) EndOfIP = Header.length(); unsigned int IPLength = EndOfIP - StartOfIP; IP4Address ExtractedIP = Header.substr(StartOfIP, IPLength); return ExtractedIP; } void HeaderFinder::CheckContent(string& Header, const HeaderFinderPattern& P) { // Check for a match in the header. bool HeaderContainsFinderPattern = ( string::npos != Header.find(P.Contains, P.Header.length()) ); if(HeaderContainsFinderPattern) { switch(P.Directive) { case HeaderDirectiveBypass: case HeaderDirectiveWhite: { Directives |= P.Directive; // Add the flags to our output. break; } case HeaderDirectiveDrillDown: { ScanData->drillPastOrdinal(P.Ordinal); // Mark the IP DrillDown flag. Directives |= P.Directive; // Add the flags to our output. break; } case HeaderDirectiveContext: { ActivatedContexts.insert(P.Context); // Activate the context. Directives |= P.Directive; // Add the flags to our output. break; } case HeaderDirectiveSource: { bool HeaderDirectiveSourceIPNotSet = ( 0UL == ScanData->HeaderDirectiveSourceIP() ); bool SourceContextActive = ( ActivatedContexts.end() != ActivatedContexts.find(P.Context) ); if(HeaderDirectiveSourceIPNotSet && SourceContextActive) { ScanData->HeaderDirectiveSourceIP( extractIPFromSourceHeader(Header) ); Directives |= P.Directive; // Add the flags to our output. } break; } } } } void HeaderFinder::MatchHeaders(string& Header) { // Check that the header matches. if(0 >= Header.length()) return; // If there's nothing to look at, done! HeaderFinderPattern Key; // We will need a handy key. Key.Header.push_back(Header.at(0)); // Set up a minimal header string. HeaderDirectiveIterator iK = HeaderDirectives.lower_bound(Key); // Locate the lower bound. // At this point we have found a reasonable starting place for the // header directives that might match this header. We will scan through // them looking for a match. Since all matches should be grouped together // in the set we will set a flag so that on the first non-match after that // we can stop looking. int CurrentOrdinal = 0; // Keep the current ordinal in scope. bool FoundFirstMatch = false; // Have we found our first match? for(;iK != HeaderDirectives.end();iK++) { // Scan through the directives. const HeaderFinderPattern& P = (*iK); // Make a handy handle. if(0 == Header.compare(0, P.Header.length(), P.Header)) { // Check for a matching header. if(false == FoundFirstMatch) { // If this is our first match FoundFirstMatch = true; // then set our first match flag CurrentOrdinal = Ordinals[P.Header]; // and get the Ordinal. Then increment Ordinals[P.Header] = CurrentOrdinal + 1; // the Ordinal for next time. } if(CurrentOrdinal == P.Ordinal) { // If the Ordinal matches our Directive CheckContent(Header, P); // then check the content of the header. } else if(CurrentOrdinal < P.Ordinal) { // If we're into Directives bigger than return; // our Ordinal then we're done. } } else { // If the header doesn't match and we if(FoundFirstMatch) return; // were matching before then we're done. if(Header.at(0)!=P.Header.at(0)) return; // If first bytes don't match, so done! } } // Move on to the next directive. } bool HeaderFinder::ByteIsImpossible(unsigned char b) { // Is b not first byte of any pattern? if(ImpossibleBytes[b]) return true; // Don't look if we already know. HeaderFinderPattern Key; // We will need a handy key. Key.Header.push_back(b); // Set up a minimal header string. HeaderDirectiveIterator iK = HeaderDirectives.lower_bound(Key); // Locate the lower bound. if(iK == HeaderDirectives.end()) return (ImpossibleBytes[b] = true); // If we find nothing or the first byte if((*iK).Header.at(0) != b) return (ImpossibleBytes[b] = true); // byte doesn't match it's impossible. return false; // Otherwise we might find it ;-) } bool TrimToNextHeader(int& Pos, const unsigned char* Bfr, const int Len) { // Move Pos & check for EOH. for(;(Pos < (Len-2));Pos++) { // Scan through the Bfr (stay in range). switch(Bfr[Pos]) { // React to the byte at hand: case '\t': case '\r': case ' ': { // Ordinary spaces and \r we skip. break; } case '\n': { // On Newlines we check to see if if( // this is the end of the headers. ('\r' == Bfr[Pos+1] && '\n' == Bfr[Pos+2]) || // Either \n\r\n or ('\n' == Bfr[Pos+1] ) // \n\n means EOH. ) { return false; // If EOH, no more headers, send false. } break; // If not EOH then keep going. } default: { // Any other byte and we are done. return true; // We have another header, send true. } } } // If we run out of bytes then we return false; // are also out of headers, send false. } void eatThisHeader(int& Pos, const unsigned char* Bfr, const int Len) { // Eat up to the next header. for(;(Pos < (Len-1));Pos++) { // Scan through this header. if('\n' == Bfr[Pos]) { // When we get to a new line check if(' ' == Bfr[Pos+1] || '\t' == Bfr[Pos+1]) continue; // for and skip any folding. Anything return; // other than folding and we're done. } } } void eatOrdinarySpace(int& Pos, const unsigned char* Bfr, const int Len) { // Eat all spaces (dedup, unfold, etc) for(;Pos < Len;Pos++) { // Scan through the buffer. switch(Bfr[Pos]) { // React to each byte. case ' ': // Simply skip all ordinary spaces case '\t': { // or tabs. break; } default: { // At the first other byte return; // we are done. } } } } void captureThisHeader( // Capture the header and move pos. string& Output, // Here is the output string. int& Pos, // Here is the current position. const unsigned char* Bfr, // Here is the buffer pointer. const int Len // Here is the length of the buffer. ) { Output.clear(); // Clear the output. for(;(Pos < (Len-1)); Pos++) { // Scan through the header. switch(Bfr[Pos]) { // React to each byte. case '\r': { // If we find a ignore it. break; } case '\n': { // If we find a check for folding. if(' ' == Bfr[Pos+1] || '\t' == Bfr[Pos+1]) { // If we find folding then ++Pos; // move to the space eatOrdinarySpace(Pos, Bfr, Len); // and gobble it up. Output.push_back(' '); // output a single ordinary space --Pos; // and drop back one for the loop's ++. } else { // If the wasn't part of a fold return; // then we are done with this header. } break; // Skip the rest of the switch. } case '\t': // When we come across a tab or case ' ': { // a space then we will eat them eatOrdinarySpace(Pos, Bfr, Len); // and any extras so they are converted Output.push_back(' '); // into a single ordinary space. --Pos; // Drop back one for the loop's ++. break; } default: { // For all ordinary bytes we simply Output.push_back(Bfr[Pos]); // add the byte to the string. break; } } } } void HeaderFinder::UnfoldHeaders() { // Unfold and check headers. if(0 >= HeaderDirectives.size()) return; // Skip this if we have no patterns. if(0 >= Len) return; // Skip if we have no message. string TestHeader; // The header under test. int Position = 0; // Position in Bfr. for(;;) { // Scan through all of the headers. // Skip any leading or leftover whitespace. Be sure to exit when we // reach a blank new line. The capture routine later on will not eat // the white space - that way we can check for the EOH in this one spot. if(false == TrimToNextHeader(Position, Bfr, Len)) return; // If no more headers then we're done. // Skip Impossible Headers -- no such first character. if(ByteIsImpossible(Bfr[Position])) { // If we have no patterns for this eatThisHeader(Position, Bfr, Len); // header then skip it and continue on continue; // to the next one. } // Capture and unfold the header to test. captureThisHeader(TestHeader, Position, Bfr, Len); // Unfold the header into TestHeader. // Test the header. MatchHeaders(TestHeader); // Match and activate header directives. } }