123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112 |
- // scanner.cpp
- //
- // (C) 2002-2009 MicroNeil Research Corporation
-
- // 20041117 _M - Included new improved Filter Chain module UrlDecode. This module
- // scans each anchor or image tag for URL encoded characters and converts them to
- // their singly byte counterparts. If a characters is converted then the decoded
- // anchor tag is injected into the scan stream immediately after the source link.
-
- // 20041114 _M - Included new Filter Chain module: Defunker. The Defunker re-emits
- // the message to the scanner with all of the HTML and some coding removed. This
- // allows HTML obfuscated patterns to be recognized by the scanning engine.
-
- // 20040113 _M - New Reset() method used in ScanMessage() to keep things nice and
- // tidy. Also, modified ScanText() to create a new evaluation matrix if it is
- // needed, and to append to the existing one if there is one.
-
- // 20030928 _M - Moving toward the peer-server architecture and V3. The message
- // scanning component has been moved into it's own object called "scanner". From
- // now on, a message, or text will be passed to the scanner and the scanner will
- // return an evaulation matrix. As always, if something goes wrong it will throw.
- // This allows us to separate the creation of a scanner, and it's use, from any
- // other nifty logic. So, if I'm in a server mode, I can take my scanner and throw
- // messages at it as often as I like. Each message I pump in one side comes out the
- // other side as an evaluation matrix. This will work well for SMTP based engines
- // as well as peer-server, or any other "service pipeline".
- //
- // Note that the scanner object has two ways it will accept data. One way is as a
- // message via .ScanMessage(c_str). This method employs the filter chain system and
- // expects to see an SMTP message. The second way is as plain text via .ScanText(c_str).
- // This method is useful for "internal" purposes such as secondary scans used to
- // locate compound rules or parameter scans used to pick up tuning data from the
- // rulebase.
-
- #include "scanner.hpp"
-
- // Scanner::LoadRuleBase(RuleFileName, SecurityKey)
-
- void Scanner::LoadRuleBase(string& RuleFileName, string& SecurityKey) {
-
- RuleBase.Load(RuleFileName); // Load the rulebase file.
- RuleBase.Validate(SecurityKey); // Validate the rulebase file.
-
- }
-
- // Scanner::ScanMessage(MessageBuffer)
-
- EvaluationMatrix* Scanner::ScanMessage(unsigned char* MessageBuffer) { // Scan with the filter chain.
-
- FilterChainCString IV(MessageBuffer); // Set up the filter chain.
- FilterChainBase64 IW(&IV); // Include Base64 decoding.
- FilterChainQuotedPrintable IX(&IW); // Include Quoted Printable decoding.
- FilterChainUrlDecode IY(&IX); // Include URL decoder.
- FilterChainDefunker IZ(&IY); // Include Defunking.
-
- // Reset and create a new EvaluationMatrix object to use for this scan.
- // ScanMessage is always called with a new message.
-
- Reset(); // Reset for the new message.
-
- myEvaluationMatrix = // Allocate a new evaluation matrix
- new EvaluationMatrix(&RuleBase); // using the current rulebase.
-
- if(!myEvaluationMatrix) // If the allocation fails then
- throw BadMatrixAllocation(); // throw an appropriate exception.
-
- try {
-
- // Message header rules in earlier versions occasionally failed because there was not
- // a new-line character in front of the very first header. So, now we insert one :-)
- // This allows all header rules to start off with a ^ indicating the start of the line.
-
- myEvaluationMatrix->EvaluateThis('\n'); // Insert a newline ahead of each message.
-
- // Scan each byte in the file up to the horizon or the end of the message.
- // If something goes wrong, an exception will be thrown.
-
- while(myEvaluationMatrix->CountOfCharacters < ScanHorizon)
- myEvaluationMatrix->EvaluateThis(IZ.GetByte());
- }
-
- catch(FilterChain::Empty) { // We're expecting this so it's ok, but
- } // anything else will still be thrown!
-
- return myEvaluationMatrix; // Return our results.
- }
-
- // Scanner::ScanText(TextBuffer)
-
- EvaluationMatrix* Scanner::ScanText(unsigned char* TextBuffer) { // Scan without the filter chain.
-
- // If needed, create a new EvaluationMatrix object to use for this scan.
- // If not needed, we'll add this scanning to the existing matrix.
-
- if(!myEvaluationMatrix) {
- myEvaluationMatrix = // Allocate a new evaluation matrix
- new EvaluationMatrix(&RuleBase); // using the current rulebase.
-
- if(!myEvaluationMatrix) // If the allocation fails then
- throw BadMatrixAllocation(); // throw an appropriate exception.
- }
-
- int index=0; // Set up an index at zero...
-
- while( // For as long as we're
- TextBuffer[index]!=0 && // not yet terminated and
- myEvaluationMatrix->CountOfCharacters < ScanHorizon) // not at the horizon then
- myEvaluationMatrix->EvaluateThis(TextBuffer[index++]); // scan this byte & move.
-
- return myEvaluationMatrix; // Return our results.
-
- }
|