// snf_HeaderFinder.cpp
// Copyright (C) 2007 - 2009 ARM Research Labs, LLC.
// See www.armresearch.com for the copyright terms.
//
// See snf_HeaderFinder.hpp for details

#include "snf_HeaderFinder.hpp"

#include "snfLOGmgr.hpp"
#include "snfCFGmgr.hpp"

const int NumberOfByteValues = 256;                                             // Number of possible byte values.

HeaderFinder::HeaderFinder(                                                     // To construct one of these:
      snfScanData* EngineScanData,                                              // -- Scanner control data ptr.
      const HeaderDirectiveSet& Patterns,                                       // -- this is the set of patterns.
      const unsigned char* MessageBuffer,                                       // -- this is the message buffer.
      const int MessageLength                                                   // -- this is the length of the buffer.
) :
  ScanData(EngineScanData),                                                     // Grab the scan control block.
  HeaderDirectives(Patterns),                                                   // Grab the Directives and
  Bfr(MessageBuffer),                                                           // the message buffer.
  Len(MessageLength),
  ImpossibleBytes(NumberOfByteValues, false),                                   // Clear the impossible bytes cache.
  Directives(0) {                                                               // Zero the composite result.
    UnfoldHeaders();                                                            // Unfold the headers.
}

IP4Address extractIPFromSourceHeader(string& Header) {                          // Return first IP found in header.
    const string Digits = "0123456789";
    unsigned int EndOfName = Header.find_first_of(":");

    unsigned int StartOfIP = Header.find_first_of(Digits, EndOfName);
    const string IPCharacters = ".0123456789";
    unsigned int EndOfIP = Header.find_first_not_of(IPCharacters, StartOfIP);
    bool NoExtraCharactersAfterIP = (string::npos == EndOfIP);
    if(NoExtraCharactersAfterIP) EndOfIP = Header.length();
    unsigned int IPLength = EndOfIP - StartOfIP;
    IP4Address ExtractedIP = Header.substr(StartOfIP, IPLength);

    return ExtractedIP;
}

void HeaderFinder::CheckContent(string& Header, const HeaderFinderPattern& P) { // Check for a match in the header.
    bool HeaderContainsFinderPattern = (
      string::npos != Header.find(P.Contains, P.Header.length())
    );

    if(HeaderContainsFinderPattern) {

        switch(P.Directive) {
            case HeaderDirectiveBypass:
            case HeaderDirectiveWhite: {
                Directives |= P.Directive;                                      // Add the flags to our output.
                break;
            }

            case HeaderDirectiveDrillDown: {
                ScanData->drillPastOrdinal(P.Ordinal);                          // Mark the IP DrillDown flag.
                Directives |= P.Directive;                                      // Add the flags to our output.
                break;
            }

            case HeaderDirectiveContext: {
                ActivatedContexts.insert(P.Context);                            // Activate the context.
                Directives |= P.Directive;                                      // Add the flags to our output.
                break;
            }

            case HeaderDirectiveSource: {

                bool HeaderDirectiveSourceIPNotSet = (
                  0UL == ScanData->HeaderDirectiveSourceIP()
                );

                bool SourceContextActive = (
                  ActivatedContexts.end() != ActivatedContexts.find(P.Context)
                );

                if(HeaderDirectiveSourceIPNotSet && SourceContextActive) {
                    ScanData->HeaderDirectiveSourceIP(
                      extractIPFromSourceHeader(Header)
                    );
                    Directives |= P.Directive;                                  // Add the flags to our output.
                }
                break;
            }
        }
    }
}


void HeaderFinder::MatchHeaders(string& Header) {                               // Check that the header matches.
    if(0 >= Header.length()) return;                                            // If there's nothing to look at, done!
    HeaderFinderPattern Key;                                                    // We will need a handy key.
    Key.Header.push_back(Header.at(0));                                         // Set up a minimal header string.
    HeaderDirectiveIterator iK = HeaderDirectives.lower_bound(Key);             // Locate the lower bound.

    // At this point we have found a reasonable starting place for the
    // header directives that might match this header. We will scan through
    // them looking for a match. Since all matches should be grouped together
    // in the set we will set a flag so that on the first non-match after that
    // we can stop looking.

    int CurrentOrdinal = 0;                                                     // Keep the current ordinal in scope.
    bool FoundFirstMatch = false;                                               // Have we found our first match?
    for(;iK != HeaderDirectives.end();iK++) {                                   // Scan through the directives.
        const HeaderFinderPattern& P = (*iK);                                   // Make a handy handle.
        if(0 == Header.compare(0, P.Header.length(), P.Header)) {               // Check for a matching header.
            if(false == FoundFirstMatch) {                                      // If this is our first match
                FoundFirstMatch = true;                                         // then set our first match flag
                CurrentOrdinal = Ordinals[P.Header];                            // and get the Ordinal. Then increment
                Ordinals[P.Header] = CurrentOrdinal + 1;                        // the Ordinal for next time.
            }
            if(CurrentOrdinal == P.Ordinal) {                                   // If the Ordinal matches our Directive
                CheckContent(Header, P);                                        // then check the content of the header.
            } else
            if(CurrentOrdinal < P.Ordinal) {                                    // If we're into Directives bigger than
                return;                                                         // our Ordinal then we're done.
            }
        } else {                                                                // If the header doesn't match and we
            if(FoundFirstMatch) return;                                         // were matching before then we're done.
            if(Header.at(0)!=P.Header.at(0)) return;                            // If first bytes don't match, so done!
        }
    }                                                                           // Move on to the next directive.
}

bool HeaderFinder::ByteIsImpossible(unsigned char b) {                          // Is b not first byte of any pattern?
    if(ImpossibleBytes[b]) return true;                                         // Don't look if we already know.
    HeaderFinderPattern Key;                                                    // We will need a handy key.
    Key.Header.push_back(b);                                                    // Set up a minimal header string.
    HeaderDirectiveIterator iK = HeaderDirectives.lower_bound(Key);             // Locate the lower bound.
    if(iK == HeaderDirectives.end()) return (ImpossibleBytes[b] = true);        // If we find nothing or the first byte
    if((*iK).Header.at(0) != b) return (ImpossibleBytes[b] = true);             // byte doesn't match it's impossible.
    return false;                                                               // Otherwise we might find it ;-)
}

bool TrimToNextHeader(int& Pos, const unsigned char* Bfr, const int Len) {      // Move Pos & check for EOH.
    for(;(Pos < (Len-2));Pos++) {                                               // Scan through the Bfr (stay in range).
        switch(Bfr[Pos]) {                                                      // React to the byte at hand:
            case '\t':
            case '\r':
            case ' ': {                                                         // Ordinary spaces and \r we skip.
                break;
            }
            case '\n': {                                                        // On Newlines we check to see if
                if(                                                             // this is the end of the headers.
                  ('\r' == Bfr[Pos+1] && '\n' == Bfr[Pos+2]) ||                 // Either \n\r\n or
                  ('\n' == Bfr[Pos+1] )                                         // \n\n means EOH.
                  ) {
                    return false;                                               // If EOH, no more headers, send false.
                }
                break;                                                          // If not EOH then keep going.
            }
            default: {                                                          // Any other byte and we are done.
                return true;                                                    // We have another header, send true.
            }
        }
    }                                                                           // If we run out of bytes then we
    return false;                                                               // are also out of headers, send false.
}

void eatThisHeader(int& Pos, const unsigned char* Bfr, const int Len) {         // Eat up to the next header.
    for(;(Pos < (Len-1));Pos++) {                                               // Scan through this header.
        if('\n' == Bfr[Pos]) {                                                  // When we get to a new line check
            if(' ' == Bfr[Pos+1] || '\t' == Bfr[Pos+1]) continue;               // for and skip any folding. Anything
            return;                                                             // other than folding and we're done.
        }
    }
}

void eatOrdinarySpace(int& Pos, const unsigned char* Bfr, const int Len) {      // Eat all spaces (dedup, unfold, etc)
    for(;Pos < Len;Pos++) {                                                     // Scan through the buffer.
        switch(Bfr[Pos]) {                                                      // React to each byte.
            case ' ':                                                           // Simply skip all ordinary spaces
            case '\t': {                                                        // or tabs.
                break;
            }
            default: {                                                          // At the first other byte
                return;                                                         // we are done.
            }
        }
    }
}

void captureThisHeader(                                                         // Capture the header and move pos.
  string& Output,                                                               // Here is the output string.
  int& Pos,                                                                     // Here is the current position.
  const unsigned char* Bfr,                                                     // Here is the buffer pointer.
  const int Len                                                                 // Here is the length of the buffer.
  ) {
    Output.clear();                                                             // Clear the output.
    for(;(Pos < (Len-1)); Pos++) {                                              // Scan through the header.
        switch(Bfr[Pos]) {                                                      // React to each byte.
            case '\r': {                                                        // If we find a <cr> ignore it.
                break;
            }
            case '\n': {                                                        // If we find a <nl> check for folding.
                if(' ' == Bfr[Pos+1] || '\t' == Bfr[Pos+1]) {                   // If we find folding then
                    ++Pos;                                                      // move to the space
                    eatOrdinarySpace(Pos, Bfr, Len);                            // and gobble it up.
                    Output.push_back(' ');                                      // output a single ordinary space
                    --Pos;                                                      // and drop back one for the loop's ++.
                } else {                                                        // If the <nl> wasn't part of a fold
                    return;                                                     // then we are done with this header.
                }
                break;                                                          // Skip the rest of the switch.
            }
            case '\t':                                                          // When we come across a tab or
            case ' ': {                                                         // a space then we will eat them
                eatOrdinarySpace(Pos, Bfr, Len);                                // and any extras so they are converted
                Output.push_back(' ');                                          // into a single ordinary space.
                --Pos;                                                          // Drop back one for the loop's ++.
                break;
            }
            default: {                                                          // For all ordinary bytes we simply
                Output.push_back(Bfr[Pos]);                                     // add the byte to the string.
                break;
            }
        }
    }
}

void HeaderFinder::UnfoldHeaders() {                                            // Unfold and check headers.
    if(0 >= HeaderDirectives.size()) return;                                    // Skip this if we have no patterns.
    if(0 >= Len) return;                                                        // Skip if we have no message.
    string TestHeader;                                                          // The header under test.

    int Position = 0;                                                           // Position in Bfr.
    for(;;) {                                                                   // Scan through all of the headers.

        // Skip any leading or leftover whitespace. Be sure to exit when we
        // reach a blank new line. The capture routine later on will not eat
        // the white space - that way we can check for the EOH in this one spot.

        if(false == TrimToNextHeader(Position, Bfr, Len)) return;               // If no more headers then we're done.

        // Skip Impossible Headers -- no such first character.

        if(ByteIsImpossible(Bfr[Position])) {                                   // If we have no patterns for this
            eatThisHeader(Position, Bfr, Len);                                  // header then skip it and continue on
            continue;                                                           // to the next one.
        }

        // Capture and unfold the header to test.

        captureThisHeader(TestHeader, Position, Bfr, Len);                      // Unfold the header into TestHeader.

        // Test the header.

        MatchHeaders(TestHeader);                                               // Match and activate header directives.
    }
}