You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. // scanner.cpp
  2. //
  3. // (C) 2002-2020 MicroNeil Research Corporation
  4. // 20041117 _M - Included new improved Filter Chain module UrlDecode. This module
  5. // scans each anchor or image tag for URL encoded characters and converts them to
  6. // their singly byte counterparts. If a characters is converted then the decoded
  7. // anchor tag is injected into the scan stream immediately after the source link.
  8. // 20041114 _M - Included new Filter Chain module: Defunker. The Defunker re-emits
  9. // the message to the scanner with all of the HTML and some coding removed. This
  10. // allows HTML obfuscated patterns to be recognized by the scanning engine.
  11. // 20040113 _M - New Reset() method used in ScanMessage() to keep things nice and
  12. // tidy. Also, modified ScanText() to create a new evaluation matrix if it is
  13. // needed, and to append to the existing one if there is one.
  14. // 20030928 _M - Moving toward the peer-server architecture and V3. The message
  15. // scanning component has been moved into it's own object called "scanner". From
  16. // now on, a message, or text will be passed to the scanner and the scanner will
  17. // return an evaulation matrix. As always, if something goes wrong it will throw.
  18. // This allows us to separate the creation of a scanner, and it's use, from any
  19. // other nifty logic. So, if I'm in a server mode, I can take my scanner and throw
  20. // messages at it as often as I like. Each message I pump in one side comes out the
  21. // other side as an evaluation matrix. This will work well for SMTP based engines
  22. // as well as peer-server, or any other "service pipeline".
  23. //
  24. // Note that the scanner object has two ways it will accept data. One way is as a
  25. // message via .ScanMessage(c_str). This method employs the filter chain system and
  26. // expects to see an SMTP message. The second way is as plain text via .ScanText(c_str).
  27. // This method is useful for "internal" purposes such as secondary scans used to
  28. // locate compound rules or parameter scans used to pick up tuning data from the
  29. // rulebase.
  30. #include "scanner.hpp"
  31. // Scanner::LoadRuleBase(RuleFileName, SecurityKey)
  32. void Scanner::LoadRuleBase(std::string& RuleFileName, std::string& SecurityKey) {
  33. RuleBase.Load(RuleFileName); // Load the rulebase file.
  34. RuleBase.Validate(SecurityKey); // Validate the rulebase file.
  35. }
  36. // Scanner::ScanMessage(MessageBuffer)
  37. EvaluationMatrix* Scanner::ScanMessage(unsigned char* MessageBuffer) { // Scan with the filter chain.
  38. FilterChainCString IV(MessageBuffer); // Set up the filter chain.
  39. FilterChainBase64 IW(&IV); // Include Base64 decoding.
  40. FilterChainQuotedPrintable IX(&IW); // Include Quoted Printable decoding.
  41. FilterChainUrlDecode IY(&IX); // Include URL decoder.
  42. FilterChainDefunker IZ(&IY); // Include Defunking.
  43. // Reset and create a new EvaluationMatrix object to use for this scan.
  44. // ScanMessage is always called with a new message.
  45. Reset(); // Reset for the new message.
  46. myEvaluationMatrix = // Allocate a new evaluation matrix
  47. new EvaluationMatrix(&RuleBase); // using the current rulebase.
  48. if(!myEvaluationMatrix) // If the allocation fails then
  49. throw BadMatrixAllocation(); // throw an appropriate exception.
  50. try {
  51. // Message header rules in earlier versions occasionally failed because there was not
  52. // a new-line character in front of the very first header. So, now we insert one :-)
  53. // This allows all header rules to start off with a ^ indicating the start of the line.
  54. myEvaluationMatrix->EvaluateThis('\n'); // Insert a newline ahead of each message.
  55. // Scan each byte in the file up to the horizon or the end of the message.
  56. // If something goes wrong, an exception will be thrown.
  57. while(myEvaluationMatrix->CountOfCharacters < ScanHorizon)
  58. myEvaluationMatrix->EvaluateThis(IZ.GetByte());
  59. }
  60. catch(const FilterChain::Empty&) { // We're expecting this so it's ok, but
  61. } // anything else will still be thrown!
  62. return myEvaluationMatrix; // Return our results.
  63. }
  64. // Scanner::ScanText(TextBuffer)
  65. EvaluationMatrix* Scanner::ScanText(unsigned char* TextBuffer) { // Scan without the filter chain.
  66. // If needed, create a new EvaluationMatrix object to use for this scan.
  67. // If not needed, we'll add this scanning to the existing matrix.
  68. if(!myEvaluationMatrix) {
  69. myEvaluationMatrix = // Allocate a new evaluation matrix
  70. new EvaluationMatrix(&RuleBase); // using the current rulebase.
  71. if(!myEvaluationMatrix) // If the allocation fails then
  72. throw BadMatrixAllocation(); // throw an appropriate exception.
  73. }
  74. int index=0; // Set up an index at zero...
  75. while( // For as long as we're
  76. TextBuffer[index]!=0 && // not yet terminated and
  77. myEvaluationMatrix->CountOfCharacters < ScanHorizon) // not at the horizon then
  78. myEvaluationMatrix->EvaluateThis(TextBuffer[index++]); // scan this byte & move.
  79. return myEvaluationMatrix; // Return our results.
  80. }