You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

snf_HeaderFinder.cpp 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234
  1. // snf_HeaderFinder.cpp
  2. // Copyright (C) 2007 - 2009 ARM Research Labs, LLC.
  3. // See www.armresearch.com for the copyright terms.
  4. //
  5. // See snf_HeaderFinder.hpp for details
  6. #include "snf_HeaderFinder.hpp"
  7. #include "snfLOGmgr.hpp"
  8. #include "snfCFGmgr.hpp"
  9. const int NumberOfByteValues = 256; // Number of possible byte values.
  10. HeaderFinder::HeaderFinder( // To construct one of these:
  11. snfScanData* EngineScanData, // -- Scanner control data ptr.
  12. const HeaderDirectiveSet& Patterns, // -- this is the set of patterns.
  13. const unsigned char* MessageBuffer, // -- this is the message buffer.
  14. const int MessageLength // -- this is the length of the buffer.
  15. ) :
  16. ScanData(EngineScanData), // Grab the scan control block.
  17. HeaderDirectives(Patterns), // Grab the Directives and
  18. Bfr(MessageBuffer), // the message buffer.
  19. Len(MessageLength),
  20. ImpossibleBytes(NumberOfByteValues, false), // Clear the impossible bytes cache.
  21. Directives(0) { // Zero the composite result.
  22. UnfoldHeaders(); // Unfold the headers.
  23. }
  24. void HeaderFinder::CheckContent(string& Header, const HeaderFinderPattern& P) { // Check for a match in the header.
  25. if(string::npos != Header.find(P.Contains, P.Header.length())) { // If we find the required contents:
  26. /*** if/else laddar - too complex for switch ***/
  27. if(
  28. HeaderDirectiveBypass == P.Directive || // If this is a bypass directive or
  29. HeaderDirectiveWhite == P.Directive // a white header directive:
  30. ) {
  31. Directives |= P.Directive; // Add the flags to our output.
  32. } else
  33. if(HeaderDirectiveDrillDown == P.Directive) { // If this is a DrillDown rule
  34. ScanData->drillPastOrdinal(P.Ordinal); // mark the IP DrillDown flag.
  35. Directives |= P.Directive; // Add the flags to our output.
  36. } else
  37. if(HeaderDirectiveContext == P.Directive) { // If this is a context activation
  38. ActivatedContexts.insert(P.Context); // header then activate the context.
  39. Directives |= P.Directive; // Add the flags to our output.
  40. } else
  41. if( // Are we forcing the message source?
  42. HeaderDirectiveSource == P.Directive && // If we matched a source directive and
  43. false == ScanData->FoundSourceIP() && // the source is not already set and
  44. ActivatedContexts.end() != ActivatedContexts.find(P.Context) // and the source context is active then
  45. ) { // we set the source from this header.
  46. // Extract the IP from the header.
  47. const string digits = "0123456789"; // These are valid digits.
  48. unsigned int IPStart =
  49. Header.find_first_of(digits, P.Header.length()); // Find the first digit in the header.
  50. if(string::npos == IPStart) return; // If we don't find it we're done.
  51. const string ipchars = ".0123456789"; // These are valid IP characters.
  52. unsigned int IPEnd = Header.find_first_not_of(ipchars, IPStart); // Find the end of the IP.
  53. if(string::npos == IPEnd) IPEnd = Header.length(); // Correct for end of string cases.
  54. ScanData->HeaderDirectiveSourceIP( // Extract the IP from the header and
  55. Header.substr(IPStart, (IPEnd - IPStart)) // expose it to the calling scanner.
  56. );
  57. Directives |= P.Directive; // Add the flags to our output.
  58. }
  59. }
  60. }
  61. void HeaderFinder::MatchHeaders(string& Header) { // Check that the header matches.
  62. if(0 >= Header.length()) return; // If there's nothing to look at, done!
  63. HeaderFinderPattern Key; // We will need a handy key.
  64. Key.Header.push_back(Header.at(0)); // Set up a minimal header string.
  65. HeaderDirectiveIterator iK = HeaderDirectives.lower_bound(Key); // Locate the lower bound.
  66. // At this point we have found a reasonable starting place for the
  67. // header directives that might match this header. We will scan through
  68. // them looking for a match. Since all matches should be grouped together
  69. // in the set we will set a flag so that on the first non-match after that
  70. // we can stop looking.
  71. int CurrentOrdinal = 0; // Keep the current ordinal in scope.
  72. bool FoundFirstMatch = false; // Have we found our first match?
  73. for(;iK != HeaderDirectives.end();iK++) { // Scan through the directives.
  74. const HeaderFinderPattern& P = (*iK); // Make a handy handle.
  75. if(0 == Header.compare(0, P.Header.length(), P.Header)) { // Check for a matching header.
  76. if(false == FoundFirstMatch) { // If this is our first match
  77. FoundFirstMatch = true; // then set our first match flag
  78. CurrentOrdinal = Ordinals[P.Header]; // and get the Ordinal. Then increment
  79. Ordinals[P.Header] = CurrentOrdinal + 1; // the Ordinal for next time.
  80. }
  81. if(CurrentOrdinal == P.Ordinal) { // If the Ordinal matches our Directive
  82. CheckContent(Header, P); // then check the content of the header.
  83. } else
  84. if(CurrentOrdinal < P.Ordinal) { // If we're into Directives bigger than
  85. return; // our Ordinal then we're done.
  86. }
  87. } else { // If the header doesn't match and we
  88. if(FoundFirstMatch) return; // were matching before then we're done.
  89. if(Header.at(0)!=P.Header.at(0)) return; // If first bytes don't match, so done!
  90. }
  91. } // Move on to the next directive.
  92. }
  93. bool HeaderFinder::ByteIsImpossible(unsigned char b) { // Is b not first byte of any pattern?
  94. if(ImpossibleBytes[b]) return true; // Don't look if we already know.
  95. HeaderFinderPattern Key; // We will need a handy key.
  96. Key.Header.push_back(b); // Set up a minimal header string.
  97. HeaderDirectiveIterator iK = HeaderDirectives.lower_bound(Key); // Locate the lower bound.
  98. if(iK == HeaderDirectives.end()) return (ImpossibleBytes[b] = true); // If we find nothing or the first byte
  99. if((*iK).Header.at(0) != b) return (ImpossibleBytes[b] = true); // byte doesn't match it's impossible.
  100. return false; // Otherwise we might find it ;-)
  101. }
  102. bool TrimToNextHeader(int& Pos, const unsigned char* Bfr, const int Len) { // Move Pos & check for EOH.
  103. for(;(Pos < (Len-2));Pos++) { // Scan through the Bfr (stay in range).
  104. switch(Bfr[Pos]) { // React to the byte at hand:
  105. case '\t':
  106. case '\r':
  107. case ' ': { // Ordinary spaces and \r we skip.
  108. break;
  109. }
  110. case '\n': { // On Newlines we check to see if
  111. if( // this is the end of the headers.
  112. ('\r' == Bfr[Pos+1] && '\n' == Bfr[Pos+2]) || // Either \n\r\n or
  113. ('\n' == Bfr[Pos+1] ) // \n\n means EOH.
  114. ) {
  115. return false; // If EOH, no more headers, send false.
  116. }
  117. break; // If not EOH then keep going.
  118. }
  119. default: { // Any other byte and we are done.
  120. return true; // We have another header, send true.
  121. }
  122. }
  123. } // If we run out of bytes then we
  124. return false; // are also out of headers, send false.
  125. }
  126. void eatThisHeader(int& Pos, const unsigned char* Bfr, const int Len) { // Eat up to the next header.
  127. for(;(Pos < (Len-1));Pos++) { // Scan through this header.
  128. if('\n' == Bfr[Pos]) { // When we get to a new line check
  129. if(' ' == Bfr[Pos+1] || '\t' == Bfr[Pos+1]) continue; // for and skip any folding. Anything
  130. return; // other than folding and we're done.
  131. }
  132. }
  133. }
  134. void eatOrdinarySpace(int& Pos, const unsigned char* Bfr, const int Len) { // Eat all spaces (dedup, unfold, etc)
  135. for(;Pos < Len;Pos++) { // Scan through the buffer.
  136. switch(Bfr[Pos]) { // React to each byte.
  137. case ' ': // Simply skip all ordinary spaces
  138. case '\t': { // or tabs.
  139. break;
  140. }
  141. default: { // At the first other byte
  142. return; // we are done.
  143. }
  144. }
  145. }
  146. }
  147. void captureThisHeader( // Capture the header and move pos.
  148. string& Output, // Here is the output string.
  149. int& Pos, // Here is the current position.
  150. const unsigned char* Bfr, // Here is the buffer pointer.
  151. const int Len // Here is the length of the buffer.
  152. ) {
  153. Output.clear(); // Clear the output.
  154. for(;(Pos < (Len-1)); Pos++) { // Scan through the header.
  155. switch(Bfr[Pos]) { // React to each byte.
  156. case '\r': { // If we find a <cr> ignore it.
  157. break;
  158. }
  159. case '\n': { // If we find a <nl> check for folding.
  160. if(' ' == Bfr[Pos+1] || '\t' == Bfr[Pos+1]) { // If we find folding then
  161. ++Pos; // move to the space
  162. eatOrdinarySpace(Pos, Bfr, Len); // and gobble it up.
  163. Output.push_back(' '); // output a single ordinary space
  164. --Pos; // and drop back one for the loop's ++.
  165. } else { // If the <nl> wasn't part of a fold
  166. return; // then we are done with this header.
  167. }
  168. break; // Skip the rest of the switch.
  169. }
  170. case '\t': // When we come across a tab or
  171. case ' ': { // a space then we will eat them
  172. eatOrdinarySpace(Pos, Bfr, Len); // and any extras so they are converted
  173. Output.push_back(' '); // into a single ordinary space.
  174. --Pos; // Drop back one for the loop's ++.
  175. break;
  176. }
  177. default: { // For all ordinary bytes we simply
  178. Output.push_back(Bfr[Pos]); // add the byte to the string.
  179. break;
  180. }
  181. }
  182. }
  183. }
  184. void HeaderFinder::UnfoldHeaders() { // Unfold and check headers.
  185. if(0 >= HeaderDirectives.size()) return; // Skip this if we have no patterns.
  186. if(0 >= Len) return; // Skip if we have no message.
  187. string TestHeader; // The header under test.
  188. int Position = 0; // Position in Bfr.
  189. for(;;) { // Scan through all of the headers.
  190. // Skip any leading or leftover whitespace. Be sure to exit when we
  191. // reach a blank new line. The capture routine later on will not eat
  192. // the white space - that way we can check for the EOH in this one spot.
  193. if(false == TrimToNextHeader(Position, Bfr, Len)) return; // If no more headers then we're done.
  194. // Skip Impossible Headers -- no such first character.
  195. if(ByteIsImpossible(Bfr[Position])) { // If we have no patterns for this
  196. eatThisHeader(Position, Bfr, Len); // header then skip it and continue on
  197. continue; // to the next one.
  198. }
  199. // Capture and unfold the header to test.
  200. captureThisHeader(TestHeader, Position, Bfr, Len); // Unfold the header into TestHeader.
  201. // Test the header.
  202. MatchHeaders(TestHeader); // Match and activate header directives.
  203. }
  204. }