You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

snf_HeaderFinder.cpp 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. // snf_HeaderFinder.cpp
  2. // Copyright (C) 2007 - 2009 ARM Research Labs, LLC.
  3. // See www.armresearch.com for the copyright terms.
  4. //
  5. // See snf_HeaderFinder.hpp for details
  6. #include "SNFMulti/snf_HeaderFinder.hpp"
  7. #include "SNFMulti/snfLOGmgr.hpp"
  8. #include "SNFMulti/snfCFGmgr.hpp"
  9. using namespace std;
  10. using namespace CodeDweller;
  11. namespace SNFMulti {
  12. const int NumberOfByteValues = 256; // Number of possible byte values.
  13. HeaderFinder::HeaderFinder( // To construct one of these:
  14. snfScanData* EngineScanData, // -- Scanner control data ptr.
  15. const HeaderDirectiveSet& Patterns, // -- this is the set of patterns.
  16. const unsigned char* MessageBuffer, // -- this is the message buffer.
  17. const int MessageLength // -- this is the length of the buffer.
  18. ) :
  19. ScanData(EngineScanData), // Grab the scan control block.
  20. HeaderDirectives(Patterns), // Grab the Directives and
  21. Bfr(MessageBuffer), // the message buffer.
  22. Len(MessageLength),
  23. ImpossibleBytes(NumberOfByteValues, false), // Clear the impossible bytes cache.
  24. Directives(0) { // Zero the composite result.
  25. UnfoldHeaders(); // Unfold the headers.
  26. }
  27. IP4Address extractIPFromSourceHeader(string& Header) { // Return first IP found in header.
  28. const string Digits = "0123456789";
  29. unsigned int EndOfName = Header.find_first_of(":");
  30. unsigned int StartOfIP = Header.find_first_of(Digits, EndOfName);
  31. const string IPCharacters = ".0123456789";
  32. unsigned int EndOfIP = Header.find_first_not_of(IPCharacters, StartOfIP);
  33. bool NoExtraCharactersAfterIP = (string::npos == EndOfIP);
  34. if(NoExtraCharactersAfterIP) EndOfIP = Header.length();
  35. unsigned int IPLength = EndOfIP - StartOfIP;
  36. IP4Address ExtractedIP = Header.substr(StartOfIP, IPLength);
  37. return ExtractedIP;
  38. }
  39. void HeaderFinder::CheckContent(string& Header, const HeaderFinderPattern& P) { // Check for a match in the header.
  40. bool HeaderContainsFinderPattern = (
  41. string::npos != Header.find(P.Contains, P.Header.length())
  42. );
  43. if(HeaderContainsFinderPattern) {
  44. switch(P.Directive) {
  45. case HeaderDirectiveBypass:
  46. case HeaderDirectiveWhite: {
  47. Directives |= P.Directive; // Add the flags to our output.
  48. break;
  49. }
  50. case HeaderDirectiveDrillDown: {
  51. ScanData->drillPastOrdinal(P.Ordinal); // Mark the IP DrillDown flag.
  52. Directives |= P.Directive; // Add the flags to our output.
  53. break;
  54. }
  55. case HeaderDirectiveContext: {
  56. ActivatedContexts.insert(P.Context); // Activate the context.
  57. Directives |= P.Directive; // Add the flags to our output.
  58. break;
  59. }
  60. case HeaderDirectiveSource: {
  61. bool HeaderDirectiveSourceIPNotSet = (
  62. 0UL == ScanData->HeaderDirectiveSourceIP()
  63. );
  64. bool SourceContextActive = (
  65. ActivatedContexts.end() != ActivatedContexts.find(P.Context)
  66. );
  67. if(HeaderDirectiveSourceIPNotSet && SourceContextActive) {
  68. ScanData->HeaderDirectiveSourceIP(
  69. extractIPFromSourceHeader(Header)
  70. );
  71. Directives |= P.Directive; // Add the flags to our output.
  72. }
  73. break;
  74. }
  75. }
  76. }
  77. }
  78. void HeaderFinder::MatchHeaders(string& Header) { // Check that the header matches.
  79. if(0 >= Header.length()) return; // If there's nothing to look at, done!
  80. HeaderFinderPattern Key; // We will need a handy key.
  81. Key.Header.push_back(Header.at(0)); // Set up a minimal header string.
  82. HeaderDirectiveIterator iK = HeaderDirectives.lower_bound(Key); // Locate the lower bound.
  83. // At this point we have found a reasonable starting place for the
  84. // header directives that might match this header. We will scan through
  85. // them looking for a match. Since all matches should be grouped together
  86. // in the set we will set a flag so that on the first non-match after that
  87. // we can stop looking.
  88. int CurrentOrdinal = 0; // Keep the current ordinal in scope.
  89. bool FoundFirstMatch = false; // Have we found our first match?
  90. for(;iK != HeaderDirectives.end();iK++) { // Scan through the directives.
  91. const HeaderFinderPattern& P = (*iK); // Make a handy handle.
  92. if(0 == Header.compare(0, P.Header.length(), P.Header)) { // Check for a matching header.
  93. if(false == FoundFirstMatch) { // If this is our first match
  94. FoundFirstMatch = true; // then set our first match flag
  95. CurrentOrdinal = Ordinals[P.Header]; // and get the Ordinal. Then increment
  96. Ordinals[P.Header] = CurrentOrdinal + 1; // the Ordinal for next time.
  97. }
  98. if(CurrentOrdinal == P.Ordinal) { // If the Ordinal matches our Directive
  99. CheckContent(Header, P); // then check the content of the header.
  100. } else
  101. if(CurrentOrdinal < P.Ordinal) { // If we're into Directives bigger than
  102. return; // our Ordinal then we're done.
  103. }
  104. } else { // If the header doesn't match and we
  105. if(FoundFirstMatch) return; // were matching before then we're done.
  106. if(Header.at(0)!=P.Header.at(0)) return; // If first bytes don't match, so done!
  107. }
  108. } // Move on to the next directive.
  109. }
  110. bool HeaderFinder::ByteIsImpossible(unsigned char b) { // Is b not first byte of any pattern?
  111. if(ImpossibleBytes[b]) return true; // Don't look if we already know.
  112. HeaderFinderPattern Key; // We will need a handy key.
  113. Key.Header.push_back(b); // Set up a minimal header string.
  114. HeaderDirectiveIterator iK = HeaderDirectives.lower_bound(Key); // Locate the lower bound.
  115. if(iK == HeaderDirectives.end()) return (ImpossibleBytes[b] = true); // If we find nothing or the first byte
  116. if((*iK).Header.at(0) != b) return (ImpossibleBytes[b] = true); // byte doesn't match it's impossible.
  117. return false; // Otherwise we might find it ;-)
  118. }
  119. bool TrimToNextHeader(int& Pos, const unsigned char* Bfr, const int Len) { // Move Pos & check for EOH.
  120. for(;(Pos < (Len-2));Pos++) { // Scan through the Bfr (stay in range).
  121. switch(Bfr[Pos]) { // React to the byte at hand:
  122. case '\t':
  123. case '\r':
  124. case ' ': { // Ordinary spaces and \r we skip.
  125. break;
  126. }
  127. case '\n': { // On Newlines we check to see if
  128. if( // this is the end of the headers.
  129. ('\r' == Bfr[Pos+1] && '\n' == Bfr[Pos+2]) || // Either \n\r\n or
  130. ('\n' == Bfr[Pos+1] ) // \n\n means EOH.
  131. ) {
  132. return false; // If EOH, no more headers, send false.
  133. }
  134. break; // If not EOH then keep going.
  135. }
  136. default: { // Any other byte and we are done.
  137. return true; // We have another header, send true.
  138. }
  139. }
  140. } // If we run out of bytes then we
  141. return false; // are also out of headers, send false.
  142. }
  143. void eatThisHeader(int& Pos, const unsigned char* Bfr, const int Len) { // Eat up to the next header.
  144. for(;(Pos < (Len-1));Pos++) { // Scan through this header.
  145. if('\n' == Bfr[Pos]) { // When we get to a new line check
  146. if(' ' == Bfr[Pos+1] || '\t' == Bfr[Pos+1]) continue; // for and skip any folding. Anything
  147. return; // other than folding and we're done.
  148. }
  149. }
  150. }
  151. void eatOrdinarySpace(int& Pos, const unsigned char* Bfr, const int Len) { // Eat all spaces (dedup, unfold, etc)
  152. for(;Pos < Len;Pos++) { // Scan through the buffer.
  153. switch(Bfr[Pos]) { // React to each byte.
  154. case ' ': // Simply skip all ordinary spaces
  155. case '\t': { // or tabs.
  156. break;
  157. }
  158. default: { // At the first other byte
  159. return; // we are done.
  160. }
  161. }
  162. }
  163. }
  164. void captureThisHeader( // Capture the header and move pos.
  165. string& Output, // Here is the output string.
  166. int& Pos, // Here is the current position.
  167. const unsigned char* Bfr, // Here is the buffer pointer.
  168. const int Len // Here is the length of the buffer.
  169. ) {
  170. Output.clear(); // Clear the output.
  171. for(;(Pos < (Len-1)); Pos++) { // Scan through the header.
  172. switch(Bfr[Pos]) { // React to each byte.
  173. case '\r': { // If we find a <cr> ignore it.
  174. break;
  175. }
  176. case '\n': { // If we find a <nl> check for folding.
  177. if(' ' == Bfr[Pos+1] || '\t' == Bfr[Pos+1]) { // If we find folding then
  178. ++Pos; // move to the space
  179. eatOrdinarySpace(Pos, Bfr, Len); // and gobble it up.
  180. Output.push_back(' '); // output a single ordinary space
  181. --Pos; // and drop back one for the loop's ++.
  182. } else { // If the <nl> wasn't part of a fold
  183. return; // then we are done with this header.
  184. }
  185. break; // Skip the rest of the switch.
  186. }
  187. case '\t': // When we come across a tab or
  188. case ' ': { // a space then we will eat them
  189. eatOrdinarySpace(Pos, Bfr, Len); // and any extras so they are converted
  190. Output.push_back(' '); // into a single ordinary space.
  191. --Pos; // Drop back one for the loop's ++.
  192. break;
  193. }
  194. default: { // For all ordinary bytes we simply
  195. Output.push_back(Bfr[Pos]); // add the byte to the string.
  196. break;
  197. }
  198. }
  199. }
  200. }
  201. void HeaderFinder::UnfoldHeaders() { // Unfold and check headers.
  202. if(0 >= HeaderDirectives.size()) return; // Skip this if we have no patterns.
  203. if(0 >= Len) return; // Skip if we have no message.
  204. string TestHeader; // The header under test.
  205. int Position = 0; // Position in Bfr.
  206. for(;;) { // Scan through all of the headers.
  207. // Skip any leading or leftover whitespace. Be sure to exit when we
  208. // reach a blank new line. The capture routine later on will not eat
  209. // the white space - that way we can check for the EOH in this one spot.
  210. if(false == TrimToNextHeader(Position, Bfr, Len)) return; // If no more headers then we're done.
  211. // Skip Impossible Headers -- no such first character.
  212. if(ByteIsImpossible(Bfr[Position])) { // If we have no patterns for this
  213. eatThisHeader(Position, Bfr, Len); // header then skip it and continue on
  214. continue; // to the next one.
  215. }
  216. // Capture and unfold the header to test.
  217. captureThisHeader(TestHeader, Position, Bfr, Len); // Unfold the header into TestHeader.
  218. // Test the header.
  219. MatchHeaders(TestHeader); // Match and activate header directives.
  220. }
  221. }
  222. }