You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

scanner.cpp 5.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. // scanner.cpp
  2. //
  3. // (C) 2002-2009 MicroNeil Research Corporation
  4. // 20041117 _M - Included new improved Filter Chain module UrlDecode. This module
  5. // scans each anchor or image tag for URL encoded characters and converts them to
  6. // their singly byte counterparts. If a characters is converted then the decoded
  7. // anchor tag is injected into the scan stream immediately after the source link.
  8. // 20041114 _M - Included new Filter Chain module: Defunker. The Defunker re-emits
  9. // the message to the scanner with all of the HTML and some coding removed. This
  10. // allows HTML obfuscated patterns to be recognized by the scanning engine.
  11. // 20040113 _M - New Reset() method used in ScanMessage() to keep things nice and
  12. // tidy. Also, modified ScanText() to create a new evaluation matrix if it is
  13. // needed, and to append to the existing one if there is one.
  14. // 20030928 _M - Moving toward the peer-server architecture and V3. The message
  15. // scanning component has been moved into it's own object called "scanner". From
  16. // now on, a message, or text will be passed to the scanner and the scanner will
  17. // return an evaulation matrix. As always, if something goes wrong it will throw.
  18. // This allows us to separate the creation of a scanner, and it's use, from any
  19. // other nifty logic. So, if I'm in a server mode, I can take my scanner and throw
  20. // messages at it as often as I like. Each message I pump in one side comes out the
  21. // other side as an evaluation matrix. This will work well for SMTP based engines
  22. // as well as peer-server, or any other "service pipeline".
  23. //
  24. // Note that the scanner object has two ways it will accept data. One way is as a
  25. // message via .ScanMessage(c_str). This method employs the filter chain system and
  26. // expects to see an SMTP message. The second way is as plain text via .ScanText(c_str).
  27. // This method is useful for "internal" purposes such as secondary scans used to
  28. // locate compound rules or parameter scans used to pick up tuning data from the
  29. // rulebase.
  30. #include "SNFMulti/scanner.hpp"
  31. using namespace std;
  32. namespace SNFMulti {
  33. // Scanner::LoadRuleBase(RuleFileName, SecurityKey)
  34. void Scanner::LoadRuleBase(string& RuleFileName, string& SecurityKey) {
  35. RuleBase.Load(RuleFileName); // Load the rulebase file.
  36. RuleBase.Validate(SecurityKey); // Validate the rulebase file.
  37. }
  38. // Scanner::ScanMessage(MessageBuffer)
  39. EvaluationMatrix* Scanner::ScanMessage(unsigned char* MessageBuffer) { // Scan with the filter chain.
  40. FilterChainCString IV(MessageBuffer); // Set up the filter chain.
  41. FilterChainBase64 IW(&IV); // Include Base64 decoding.
  42. FilterChainQuotedPrintable IX(&IW); // Include Quoted Printable decoding.
  43. FilterChainUrlDecode IY(&IX); // Include URL decoder.
  44. FilterChainDefunker IZ(&IY); // Include Defunking.
  45. // Reset and create a new EvaluationMatrix object to use for this scan.
  46. // ScanMessage is always called with a new message.
  47. Reset(); // Reset for the new message.
  48. myEvaluationMatrix = // Allocate a new evaluation matrix
  49. new EvaluationMatrix(&RuleBase); // using the current rulebase.
  50. if(!myEvaluationMatrix) // If the allocation fails then
  51. throw BadMatrixAllocation(); // throw an appropriate exception.
  52. try {
  53. // Message header rules in earlier versions occasionally failed because there was not
  54. // a new-line character in front of the very first header. So, now we insert one :-)
  55. // This allows all header rules to start off with a ^ indicating the start of the line.
  56. myEvaluationMatrix->EvaluateThis('\n'); // Insert a newline ahead of each message.
  57. // Scan each byte in the file up to the horizon or the end of the message.
  58. // If something goes wrong, an exception will be thrown.
  59. while(myEvaluationMatrix->CountOfCharacters < ScanHorizon)
  60. myEvaluationMatrix->EvaluateThis(IZ.GetByte());
  61. }
  62. catch(FilterChain::Empty) { // We're expecting this so it's ok, but
  63. } // anything else will still be thrown!
  64. return myEvaluationMatrix; // Return our results.
  65. }
  66. // Scanner::ScanText(TextBuffer)
  67. EvaluationMatrix* Scanner::ScanText(unsigned char* TextBuffer) { // Scan without the filter chain.
  68. // If needed, create a new EvaluationMatrix object to use for this scan.
  69. // If not needed, we'll add this scanning to the existing matrix.
  70. if(!myEvaluationMatrix) {
  71. myEvaluationMatrix = // Allocate a new evaluation matrix
  72. new EvaluationMatrix(&RuleBase); // using the current rulebase.
  73. if(!myEvaluationMatrix) // If the allocation fails then
  74. throw BadMatrixAllocation(); // throw an appropriate exception.
  75. }
  76. int index=0; // Set up an index at zero...
  77. while( // For as long as we're
  78. TextBuffer[index]!=0 && // not yet terminated and
  79. myEvaluationMatrix->CountOfCharacters < ScanHorizon) // not at the horizon then
  80. myEvaluationMatrix->EvaluateThis(TextBuffer[index++]); // scan this byte & move.
  81. return myEvaluationMatrix; // Return our results.
  82. }
  83. }