You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

snf_HeaderFinder.cpp 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. // snf_HeaderFinder.cpp
  2. // Copyright (C) 2007 - 2020 ARM Research Labs, LLC.
  3. // See www.armresearch.com for the copyright terms.
  4. //
  5. // See snf_HeaderFinder.hpp for details
  6. #include "snf_HeaderFinder.hpp"
  7. #include "snfLOGmgr.hpp"
  8. #include "snfCFGmgr.hpp"
  9. namespace cd = codedweller;
  10. const int NumberOfByteValues = 256; // Number of possible byte values.
  11. const bool HeaderFinderPattern::operator<(const HeaderFinderPattern& R) const { // Comparator for set<> living.
  12. if(Header < R.Header) { // If the Header name is < then true!
  13. return true;
  14. } else
  15. if(Header == R.Header) { // If the Header name is == then
  16. if(Ordinal < R.Ordinal) { // check the Ordinal. If it's < then
  17. return true; // true!
  18. } else
  19. if(Ordinal == R.Ordinal) { // If the Ordinal == then
  20. if(Contains < R.Contains) { // check the Contains. If it is < then
  21. return true; // true!
  22. } else
  23. if(Context < R.Context) {
  24. return true;
  25. }
  26. }
  27. }
  28. return false; // In all other cases this is not < R
  29. }
  30. HeaderFinderPattern::HeaderFinderPattern(const HeaderFinderPattern& P) { // Copy constructor.
  31. Header = P.Header;
  32. Ordinal = P.Ordinal;
  33. Context = P.Context;
  34. Directive = P.Directive;
  35. Contains = P.Contains;
  36. }
  37. void HeaderFinderPattern::clear() { // Do this to make fresh and clean.
  38. Header.clear();
  39. Ordinal = Context = Directive = 0;
  40. Contains.clear();
  41. }
  42. HeaderFinderPattern&
  43. HeaderFinderPattern::operator=(const HeaderFinderPattern& R) { // Assignment operator.
  44. Header = R.Header;
  45. Ordinal = R.Ordinal;
  46. Context = R.Context;
  47. Directive = R.Directive;
  48. Contains = R.Contains;
  49. return *this;
  50. }
  51. const unsigned long int HeaderFinder::operator()() const { // Return the Directives.
  52. return Directives;
  53. }
  54. HeaderFinder::HeaderFinder( // To construct one of these:
  55. snfScanData* EngineScanData, // -- Scanner control data ptr.
  56. const HeaderDirectiveSet& Patterns, // -- this is the set of patterns.
  57. const unsigned char* MessageBuffer, // -- this is the message buffer.
  58. const int MessageLength // -- this is the length of the buffer.
  59. ) :
  60. ScanData(EngineScanData), // Grab the scan control block.
  61. HeaderDirectives(Patterns), // Grab the Directives and
  62. Bfr(MessageBuffer), // the message buffer.
  63. Len(MessageLength),
  64. ImpossibleBytes(NumberOfByteValues, false), // Clear the impossible bytes cache.
  65. Directives(0) { // Zero the composite result.
  66. UnfoldHeaders(); // Unfold the headers.
  67. }
  68. cd::IP4Address extractIPFromSourceHeader(std::string& Header) { // Return first IP found in header.
  69. const std::string Digits = "0123456789";
  70. unsigned int EndOfName = Header.find_first_of(":");
  71. unsigned int StartOfIP = Header.find_first_of(Digits, EndOfName);
  72. const std::string IPCharacters = ".0123456789";
  73. unsigned int EndOfIP = Header.find_first_not_of(IPCharacters, StartOfIP);
  74. bool NoExtraCharactersAfterIP = (std::string::npos == EndOfIP);
  75. if(NoExtraCharactersAfterIP) EndOfIP = Header.length();
  76. unsigned int IPLength = EndOfIP - StartOfIP;
  77. cd::IP4Address ExtractedIP = Header.substr(StartOfIP, IPLength);
  78. return ExtractedIP;
  79. }
  80. void HeaderFinder::CheckContent(std::string& Header, const HeaderFinderPattern& P) { // Check for a match in the header.
  81. bool HeaderContainsFinderPattern = (
  82. std::string::npos != Header.find(P.Contains, P.Header.length())
  83. );
  84. if(HeaderContainsFinderPattern) {
  85. switch(P.Directive) {
  86. case HeaderDirectiveBypass:
  87. case HeaderDirectiveWhite: {
  88. Directives |= P.Directive; // Add the flags to our output.
  89. break;
  90. }
  91. case HeaderDirectiveDrillDown: {
  92. ScanData->drillPastOrdinal(P.Ordinal); // Mark the IP DrillDown flag.
  93. Directives |= P.Directive; // Add the flags to our output.
  94. break;
  95. }
  96. case HeaderDirectiveContext: {
  97. ActivatedContexts.insert(P.Context); // Activate the context.
  98. Directives |= P.Directive; // Add the flags to our output.
  99. break;
  100. }
  101. case HeaderDirectiveSource: {
  102. bool HeaderDirectiveSourceIPNotSet = (
  103. 0UL == ScanData->HeaderDirectiveSourceIP()
  104. );
  105. bool SourceContextActive = (
  106. ActivatedContexts.end() != ActivatedContexts.find(P.Context)
  107. );
  108. if(HeaderDirectiveSourceIPNotSet && SourceContextActive) {
  109. ScanData->HeaderDirectiveSourceIP(
  110. extractIPFromSourceHeader(Header)
  111. );
  112. Directives |= P.Directive; // Add the flags to our output.
  113. }
  114. break;
  115. }
  116. }
  117. }
  118. }
  119. void HeaderFinder::MatchHeaders(std::string& Header) { // Check that the header matches.
  120. if(0 >= Header.length()) return; // If there's nothing to look at, done!
  121. HeaderFinderPattern Key; // We will need a handy key.
  122. Key.Header.push_back(Header.at(0)); // Set up a minimal header string.
  123. HeaderDirectiveIterator iK = HeaderDirectives.lower_bound(Key); // Locate the lower bound.
  124. // At this point we have found a reasonable starting place for the
  125. // header directives that might match this header. We will scan through
  126. // them looking for a match. Since all matches should be grouped together
  127. // in the set we will set a flag so that on the first non-match after that
  128. // we can stop looking.
  129. int CurrentOrdinal = 0; // Keep the current ordinal in scope.
  130. bool FoundFirstMatch = false; // Have we found our first match?
  131. for(;iK != HeaderDirectives.end();iK++) { // Scan through the directives.
  132. const HeaderFinderPattern& P = (*iK); // Make a handy handle.
  133. if(0 == Header.compare(0, P.Header.length(), P.Header)) { // Check for a matching header.
  134. if(false == FoundFirstMatch) { // If this is our first match
  135. FoundFirstMatch = true; // then set our first match flag
  136. CurrentOrdinal = Ordinals[P.Header]; // and get the Ordinal. Then increment
  137. Ordinals[P.Header] = CurrentOrdinal + 1; // the Ordinal for next time.
  138. }
  139. if(CurrentOrdinal == P.Ordinal) { // If the Ordinal matches our Directive
  140. CheckContent(Header, P); // then check the content of the header.
  141. } else
  142. if(CurrentOrdinal < P.Ordinal) { // If we're into Directives bigger than
  143. return; // our Ordinal then we're done.
  144. }
  145. } else { // If the header doesn't match and we
  146. if(FoundFirstMatch) return; // were matching before then we're done.
  147. if(Header.at(0)!=P.Header.at(0)) return; // If first bytes don't match, so done!
  148. }
  149. } // Move on to the next directive.
  150. }
  151. bool HeaderFinder::ByteIsImpossible(unsigned char b) { // Is b not first byte of any pattern?
  152. if(ImpossibleBytes[b]) return true; // Don't look if we already know.
  153. HeaderFinderPattern Key; // We will need a handy key.
  154. Key.Header.push_back(b); // Set up a minimal header string.
  155. HeaderDirectiveIterator iK = HeaderDirectives.lower_bound(Key); // Locate the lower bound.
  156. if(iK == HeaderDirectives.end()) return (ImpossibleBytes[b] = true); // If we find nothing or the first byte
  157. if((*iK).Header.at(0) != b) return (ImpossibleBytes[b] = true); // byte doesn't match it's impossible.
  158. return false; // Otherwise we might find it ;-)
  159. }
  160. bool TrimToNextHeader(int& Pos, const unsigned char* Bfr, const int Len) { // Move Pos & check for EOH.
  161. for(;(Pos < (Len-2));Pos++) { // Scan through the Bfr (stay in range).
  162. switch(Bfr[Pos]) { // React to the byte at hand:
  163. case '\t':
  164. case '\r':
  165. case ' ': { // Ordinary spaces and \r we skip.
  166. break;
  167. }
  168. case '\n': { // On Newlines we check to see if
  169. if( // this is the end of the headers.
  170. ('\r' == Bfr[Pos+1] && '\n' == Bfr[Pos+2]) || // Either \n\r\n or
  171. ('\n' == Bfr[Pos+1] ) // \n\n means EOH.
  172. ) {
  173. return false; // If EOH, no more headers, send false.
  174. }
  175. break; // If not EOH then keep going.
  176. }
  177. default: { // Any other byte and we are done.
  178. return true; // We have another header, send true.
  179. }
  180. }
  181. } // If we run out of bytes then we
  182. return false; // are also out of headers, send false.
  183. }
  184. void eatThisHeader(int& Pos, const unsigned char* Bfr, const int Len) { // Eat up to the next header.
  185. for(;(Pos < (Len-1));Pos++) { // Scan through this header.
  186. if('\n' == Bfr[Pos]) { // When we get to a new line check
  187. if(' ' == Bfr[Pos+1] || '\t' == Bfr[Pos+1]) continue; // for and skip any folding. Anything
  188. return; // other than folding and we're done.
  189. }
  190. }
  191. }
  192. void eatOrdinarySpace(int& Pos, const unsigned char* Bfr, const int Len) { // Eat all spaces (dedup, unfold, etc)
  193. for(;Pos < Len;Pos++) { // Scan through the buffer.
  194. switch(Bfr[Pos]) { // React to each byte.
  195. case ' ': // Simply skip all ordinary spaces
  196. case '\t': { // or tabs.
  197. break;
  198. }
  199. default: { // At the first other byte
  200. return; // we are done.
  201. }
  202. }
  203. }
  204. }
  205. void captureThisHeader( // Capture the header and move pos.
  206. std::string& Output, // Here is the output string.
  207. int& Pos, // Here is the current position.
  208. const unsigned char* Bfr, // Here is the buffer pointer.
  209. const int Len // Here is the length of the buffer.
  210. ) {
  211. Output.clear(); // Clear the output.
  212. for(;(Pos < (Len-1)); Pos++) { // Scan through the header.
  213. switch(Bfr[Pos]) { // React to each byte.
  214. case '\r': { // If we find a <cr> ignore it.
  215. break;
  216. }
  217. case '\n': { // If we find a <nl> check for folding.
  218. if(' ' == Bfr[Pos+1] || '\t' == Bfr[Pos+1]) { // If we find folding then
  219. ++Pos; // move to the space
  220. eatOrdinarySpace(Pos, Bfr, Len); // and gobble it up.
  221. Output.push_back(' '); // output a single ordinary space
  222. --Pos; // and drop back one for the loop's ++.
  223. } else { // If the <nl> wasn't part of a fold
  224. return; // then we are done with this header.
  225. }
  226. break; // Skip the rest of the switch.
  227. }
  228. case '\t': // When we come across a tab or
  229. case ' ': { // a space then we will eat them
  230. eatOrdinarySpace(Pos, Bfr, Len); // and any extras so they are converted
  231. Output.push_back(' '); // into a single ordinary space.
  232. --Pos; // Drop back one for the loop's ++.
  233. break;
  234. }
  235. default: { // For all ordinary bytes we simply
  236. Output.push_back(Bfr[Pos]); // add the byte to the string.
  237. break;
  238. }
  239. }
  240. }
  241. }
  242. void HeaderFinder::UnfoldHeaders() { // Unfold and check headers.
  243. if(0 >= HeaderDirectives.size()) return; // Skip this if we have no patterns.
  244. if(0 >= Len) return; // Skip if we have no message.
  245. std::string TestHeader; // The header under test.
  246. int Position = 0; // Position in Bfr.
  247. for(;;) { // Scan through all of the headers.
  248. // Skip any leading or leftover whitespace. Be sure to exit when we
  249. // reach a blank new line. The capture routine later on will not eat
  250. // the white space - that way we can check for the EOH in this one spot.
  251. if(false == TrimToNextHeader(Position, Bfr, Len)) return; // If no more headers then we're done.
  252. // Skip Impossible Headers -- no such first character.
  253. if(ByteIsImpossible(Bfr[Position])) { // If we have no patterns for this
  254. eatThisHeader(Position, Bfr, Len); // header then skip it and continue on
  255. continue; // to the next one.
  256. }
  257. // Capture and unfold the header to test.
  258. captureThisHeader(TestHeader, Position, Bfr, Len); // Unfold the header into TestHeader.
  259. // Test the header.
  260. MatchHeaders(TestHeader); // Match and activate header directives.
  261. }
  262. }