123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305 |
- // snf_HeaderFinder.cpp
- // Copyright (C) 2007 - 2020 ARM Research Labs, LLC.
- // See www.armresearch.com for the copyright terms.
- //
- // See snf_HeaderFinder.hpp for details
-
- #include "snf_HeaderFinder.hpp"
-
- #include "snfLOGmgr.hpp"
- #include "snfCFGmgr.hpp"
-
- namespace cd = codedweller;
-
- const int NumberOfByteValues = 256; // Number of possible byte values.
-
- const bool HeaderFinderPattern::operator<(const HeaderFinderPattern& R) const { // Comparator for set<> living.
- if(Header < R.Header) { // If the Header name is < then true!
- return true;
- } else
- if(Header == R.Header) { // If the Header name is == then
- if(Ordinal < R.Ordinal) { // check the Ordinal. If it's < then
- return true; // true!
- } else
- if(Ordinal == R.Ordinal) { // If the Ordinal == then
- if(Contains < R.Contains) { // check the Contains. If it is < then
- return true; // true!
- } else
- if(Context < R.Context) {
- return true;
- }
- }
- }
- return false; // In all other cases this is not < R
- }
-
- HeaderFinderPattern::HeaderFinderPattern(const HeaderFinderPattern& P) { // Copy constructor.
- Header = P.Header;
- Ordinal = P.Ordinal;
- Context = P.Context;
- Directive = P.Directive;
- Contains = P.Contains;
- }
-
- void HeaderFinderPattern::clear() { // Do this to make fresh and clean.
- Header.clear();
- Ordinal = Context = Directive = 0;
- Contains.clear();
- }
-
- HeaderFinderPattern&
- HeaderFinderPattern::operator=(const HeaderFinderPattern& R) { // Assignment operator.
- Header = R.Header;
- Ordinal = R.Ordinal;
- Context = R.Context;
- Directive = R.Directive;
- Contains = R.Contains;
- return *this;
- }
-
-
- const unsigned long int HeaderFinder::operator()() const { // Return the Directives.
- return Directives;
- }
-
- HeaderFinder::HeaderFinder( // To construct one of these:
- snfScanData* EngineScanData, // -- Scanner control data ptr.
- const HeaderDirectiveSet& Patterns, // -- this is the set of patterns.
- const unsigned char* MessageBuffer, // -- this is the message buffer.
- const int MessageLength // -- this is the length of the buffer.
- ) :
- ScanData(EngineScanData), // Grab the scan control block.
- HeaderDirectives(Patterns), // Grab the Directives and
- Bfr(MessageBuffer), // the message buffer.
- Len(MessageLength),
- ImpossibleBytes(NumberOfByteValues, false), // Clear the impossible bytes cache.
- Directives(0) { // Zero the composite result.
- UnfoldHeaders(); // Unfold the headers.
- }
-
- cd::IP4Address extractIPFromSourceHeader(std::string& Header) { // Return first IP found in header.
- const std::string Digits = "0123456789";
- unsigned int EndOfName = Header.find_first_of(":");
-
- unsigned int StartOfIP = Header.find_first_of(Digits, EndOfName);
- const std::string IPCharacters = ".0123456789";
- unsigned int EndOfIP = Header.find_first_not_of(IPCharacters, StartOfIP);
- bool NoExtraCharactersAfterIP = (std::string::npos == EndOfIP);
- if(NoExtraCharactersAfterIP) EndOfIP = Header.length();
- unsigned int IPLength = EndOfIP - StartOfIP;
- cd::IP4Address ExtractedIP = Header.substr(StartOfIP, IPLength);
-
- return ExtractedIP;
- }
-
- void HeaderFinder::CheckContent(std::string& Header, const HeaderFinderPattern& P) { // Check for a match in the header.
- bool HeaderContainsFinderPattern = (
- std::string::npos != Header.find(P.Contains, P.Header.length())
- );
-
- if(HeaderContainsFinderPattern) {
-
- switch(P.Directive) {
- case HeaderDirectiveBypass:
- case HeaderDirectiveWhite: {
- Directives |= P.Directive; // Add the flags to our output.
- break;
- }
-
- case HeaderDirectiveDrillDown: {
- ScanData->drillPastOrdinal(P.Ordinal); // Mark the IP DrillDown flag.
- Directives |= P.Directive; // Add the flags to our output.
- break;
- }
-
- case HeaderDirectiveContext: {
- ActivatedContexts.insert(P.Context); // Activate the context.
- Directives |= P.Directive; // Add the flags to our output.
- break;
- }
-
- case HeaderDirectiveSource: {
-
- bool HeaderDirectiveSourceIPNotSet = (
- 0UL == ScanData->HeaderDirectiveSourceIP()
- );
-
- bool SourceContextActive = (
- ActivatedContexts.end() != ActivatedContexts.find(P.Context)
- );
-
- if(HeaderDirectiveSourceIPNotSet && SourceContextActive) {
- ScanData->HeaderDirectiveSourceIP(
- extractIPFromSourceHeader(Header)
- );
- Directives |= P.Directive; // Add the flags to our output.
- }
- break;
- }
- }
- }
- }
-
-
- void HeaderFinder::MatchHeaders(std::string& Header) { // Check that the header matches.
- if(0 >= Header.length()) return; // If there's nothing to look at, done!
- HeaderFinderPattern Key; // We will need a handy key.
- Key.Header.push_back(Header.at(0)); // Set up a minimal header string.
- HeaderDirectiveIterator iK = HeaderDirectives.lower_bound(Key); // Locate the lower bound.
-
- // At this point we have found a reasonable starting place for the
- // header directives that might match this header. We will scan through
- // them looking for a match. Since all matches should be grouped together
- // in the set we will set a flag so that on the first non-match after that
- // we can stop looking.
-
- int CurrentOrdinal = 0; // Keep the current ordinal in scope.
- bool FoundFirstMatch = false; // Have we found our first match?
- for(;iK != HeaderDirectives.end();iK++) { // Scan through the directives.
- const HeaderFinderPattern& P = (*iK); // Make a handy handle.
- if(0 == Header.compare(0, P.Header.length(), P.Header)) { // Check for a matching header.
- if(false == FoundFirstMatch) { // If this is our first match
- FoundFirstMatch = true; // then set our first match flag
- CurrentOrdinal = Ordinals[P.Header]; // and get the Ordinal. Then increment
- Ordinals[P.Header] = CurrentOrdinal + 1; // the Ordinal for next time.
- }
- if(CurrentOrdinal == P.Ordinal) { // If the Ordinal matches our Directive
- CheckContent(Header, P); // then check the content of the header.
- } else
- if(CurrentOrdinal < P.Ordinal) { // If we're into Directives bigger than
- return; // our Ordinal then we're done.
- }
- } else { // If the header doesn't match and we
- if(FoundFirstMatch) return; // were matching before then we're done.
- if(Header.at(0)!=P.Header.at(0)) return; // If first bytes don't match, so done!
- }
- } // Move on to the next directive.
- }
-
- bool HeaderFinder::ByteIsImpossible(unsigned char b) { // Is b not first byte of any pattern?
- if(ImpossibleBytes[b]) return true; // Don't look if we already know.
- HeaderFinderPattern Key; // We will need a handy key.
- Key.Header.push_back(b); // Set up a minimal header string.
- HeaderDirectiveIterator iK = HeaderDirectives.lower_bound(Key); // Locate the lower bound.
- if(iK == HeaderDirectives.end()) return (ImpossibleBytes[b] = true); // If we find nothing or the first byte
- if((*iK).Header.at(0) != b) return (ImpossibleBytes[b] = true); // byte doesn't match it's impossible.
- return false; // Otherwise we might find it ;-)
- }
-
- bool TrimToNextHeader(int& Pos, const unsigned char* Bfr, const int Len) { // Move Pos & check for EOH.
- for(;(Pos < (Len-2));Pos++) { // Scan through the Bfr (stay in range).
- switch(Bfr[Pos]) { // React to the byte at hand:
- case '\t':
- case '\r':
- case ' ': { // Ordinary spaces and \r we skip.
- break;
- }
- case '\n': { // On Newlines we check to see if
- if( // this is the end of the headers.
- ('\r' == Bfr[Pos+1] && '\n' == Bfr[Pos+2]) || // Either \n\r\n or
- ('\n' == Bfr[Pos+1] ) // \n\n means EOH.
- ) {
- return false; // If EOH, no more headers, send false.
- }
- break; // If not EOH then keep going.
- }
- default: { // Any other byte and we are done.
- return true; // We have another header, send true.
- }
- }
- } // If we run out of bytes then we
- return false; // are also out of headers, send false.
- }
-
- void eatThisHeader(int& Pos, const unsigned char* Bfr, const int Len) { // Eat up to the next header.
- for(;(Pos < (Len-1));Pos++) { // Scan through this header.
- if('\n' == Bfr[Pos]) { // When we get to a new line check
- if(' ' == Bfr[Pos+1] || '\t' == Bfr[Pos+1]) continue; // for and skip any folding. Anything
- return; // other than folding and we're done.
- }
- }
- }
-
- void eatOrdinarySpace(int& Pos, const unsigned char* Bfr, const int Len) { // Eat all spaces (dedup, unfold, etc)
- for(;Pos < Len;Pos++) { // Scan through the buffer.
- switch(Bfr[Pos]) { // React to each byte.
- case ' ': // Simply skip all ordinary spaces
- case '\t': { // or tabs.
- break;
- }
- default: { // At the first other byte
- return; // we are done.
- }
- }
- }
- }
-
- void captureThisHeader( // Capture the header and move pos.
- std::string& Output, // Here is the output string.
- int& Pos, // Here is the current position.
- const unsigned char* Bfr, // Here is the buffer pointer.
- const int Len // Here is the length of the buffer.
- ) {
- Output.clear(); // Clear the output.
- for(;(Pos < (Len-1)); Pos++) { // Scan through the header.
- switch(Bfr[Pos]) { // React to each byte.
- case '\r': { // If we find a <cr> ignore it.
- break;
- }
- case '\n': { // If we find a <nl> check for folding.
- if(' ' == Bfr[Pos+1] || '\t' == Bfr[Pos+1]) { // If we find folding then
- ++Pos; // move to the space
- eatOrdinarySpace(Pos, Bfr, Len); // and gobble it up.
- Output.push_back(' '); // output a single ordinary space
- --Pos; // and drop back one for the loop's ++.
- } else { // If the <nl> wasn't part of a fold
- return; // then we are done with this header.
- }
- break; // Skip the rest of the switch.
- }
- case '\t': // When we come across a tab or
- case ' ': { // a space then we will eat them
- eatOrdinarySpace(Pos, Bfr, Len); // and any extras so they are converted
- Output.push_back(' '); // into a single ordinary space.
- --Pos; // Drop back one for the loop's ++.
- break;
- }
- default: { // For all ordinary bytes we simply
- Output.push_back(Bfr[Pos]); // add the byte to the string.
- break;
- }
- }
- }
- }
-
- void HeaderFinder::UnfoldHeaders() { // Unfold and check headers.
- if(0 >= HeaderDirectives.size()) return; // Skip this if we have no patterns.
- if(0 >= Len) return; // Skip if we have no message.
- std::string TestHeader; // The header under test.
-
- int Position = 0; // Position in Bfr.
- for(;;) { // Scan through all of the headers.
-
- // Skip any leading or leftover whitespace. Be sure to exit when we
- // reach a blank new line. The capture routine later on will not eat
- // the white space - that way we can check for the EOH in this one spot.
-
- if(false == TrimToNextHeader(Position, Bfr, Len)) return; // If no more headers then we're done.
-
- // Skip Impossible Headers -- no such first character.
-
- if(ByteIsImpossible(Bfr[Position])) { // If we have no patterns for this
- eatThisHeader(Position, Bfr, Len); // header then skip it and continue on
- continue; // to the next one.
- }
-
- // Capture and unfold the header to test.
-
- captureThisHeader(TestHeader, Position, Bfr, Len); // Unfold the header into TestHeader.
-
- // Test the header.
-
- MatchHeaders(TestHeader); // Match and activate header directives.
- }
- }
|