// scanner.cpp // // (C) 2002-2009 MicroNeil Research Corporation // 20041117 _M - Included new improved Filter Chain module UrlDecode. This module // scans each anchor or image tag for URL encoded characters and converts them to // their singly byte counterparts. If a characters is converted then the decoded // anchor tag is injected into the scan stream immediately after the source link. // 20041114 _M - Included new Filter Chain module: Defunker. The Defunker re-emits // the message to the scanner with all of the HTML and some coding removed. This // allows HTML obfuscated patterns to be recognized by the scanning engine. // 20040113 _M - New Reset() method used in ScanMessage() to keep things nice and // tidy. Also, modified ScanText() to create a new evaluation matrix if it is // needed, and to append to the existing one if there is one. // 20030928 _M - Moving toward the peer-server architecture and V3. The message // scanning component has been moved into it's own object called "scanner". From // now on, a message, or text will be passed to the scanner and the scanner will // return an evaulation matrix. As always, if something goes wrong it will throw. // This allows us to separate the creation of a scanner, and it's use, from any // other nifty logic. So, if I'm in a server mode, I can take my scanner and throw // messages at it as often as I like. Each message I pump in one side comes out the // other side as an evaluation matrix. This will work well for SMTP based engines // as well as peer-server, or any other "service pipeline". // // Note that the scanner object has two ways it will accept data. One way is as a // message via .ScanMessage(c_str). This method employs the filter chain system and // expects to see an SMTP message. The second way is as plain text via .ScanText(c_str). // This method is useful for "internal" purposes such as secondary scans used to // locate compound rules or parameter scans used to pick up tuning data from the // rulebase. #include "scanner.hpp" // Scanner::LoadRuleBase(RuleFileName, SecurityKey) void Scanner::LoadRuleBase(string& RuleFileName, string& SecurityKey) { RuleBase.Load(RuleFileName); // Load the rulebase file. RuleBase.Validate(SecurityKey); // Validate the rulebase file. } // Scanner::ScanMessage(MessageBuffer) EvaluationMatrix* Scanner::ScanMessage(unsigned char* MessageBuffer) { // Scan with the filter chain. FilterChainCString IV(MessageBuffer); // Set up the filter chain. FilterChainBase64 IW(&IV); // Include Base64 decoding. FilterChainQuotedPrintable IX(&IW); // Include Quoted Printable decoding. FilterChainUrlDecode IY(&IX); // Include URL decoder. FilterChainDefunker IZ(&IY); // Include Defunking. // Reset and create a new EvaluationMatrix object to use for this scan. // ScanMessage is always called with a new message. Reset(); // Reset for the new message. myEvaluationMatrix = // Allocate a new evaluation matrix new EvaluationMatrix(&RuleBase); // using the current rulebase. if(!myEvaluationMatrix) // If the allocation fails then throw BadMatrixAllocation(); // throw an appropriate exception. try { // Message header rules in earlier versions occasionally failed because there was not // a new-line character in front of the very first header. So, now we insert one :-) // This allows all header rules to start off with a ^ indicating the start of the line. myEvaluationMatrix->EvaluateThis('\n'); // Insert a newline ahead of each message. // Scan each byte in the file up to the horizon or the end of the message. // If something goes wrong, an exception will be thrown. while(myEvaluationMatrix->CountOfCharacters < ScanHorizon) myEvaluationMatrix->EvaluateThis(IZ.GetByte()); } catch(FilterChain::Empty) { // We're expecting this so it's ok, but } // anything else will still be thrown! return myEvaluationMatrix; // Return our results. } // Scanner::ScanText(TextBuffer) EvaluationMatrix* Scanner::ScanText(unsigned char* TextBuffer) { // Scan without the filter chain. // If needed, create a new EvaluationMatrix object to use for this scan. // If not needed, we'll add this scanning to the existing matrix. if(!myEvaluationMatrix) { myEvaluationMatrix = // Allocate a new evaluation matrix new EvaluationMatrix(&RuleBase); // using the current rulebase. if(!myEvaluationMatrix) // If the allocation fails then throw BadMatrixAllocation(); // throw an appropriate exception. } int index=0; // Set up an index at zero... while( // For as long as we're TextBuffer[index]!=0 && // not yet terminated and myEvaluationMatrix->CountOfCharacters < ScanHorizon) // not at the horizon then myEvaluationMatrix->EvaluateThis(TextBuffer[index++]); // scan this byte & move. return myEvaluationMatrix; // Return our results. }