#
# SpamAssassin SNF4SA Plugin for SNFServer.
#
# This plugin implements a SpamAssassin rule to use SNFServer to test
# whether an email is spam.
#
# Copyright (C) 2009 ARM Research Labs, LLC.
#
# snf4sa.pm
#
# The plugin implements a single evaluation rule, which passes the
# email message through SNFServer. The communication with SNFServer
# is through XCI and a temporary file on disk which contains the email
# message truncated to the frist 64K bytes.
#
package Snf4sa;
use strict;
use Mail::SpamAssassin;
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::PerMsgStatus;
use Mail::SpamAssassin::Logger;
use IO::Socket;
use IO::File;
use File::Temp qw/ tempfile tempdir /;
our @ISA = qw(Mail::SpamAssassin::Plugin);
# Convenience variables and pseudo-constants
my $CRLF = "\x0d\x0a";
# translation table for SNF rule codes
my $rule_code_xlat = {
0 => 'Standard White Rules',
20 => 'GBUdb Truncate (superblack)',
40 => 'GBUdb Caution (suspicious)',
47 => 'Travel',
48 => 'Insurance',
49 => 'Antivirus Push',
50 => 'Media Theft',
51 => 'Spamware',
52 => 'Snake Oil',
53 => 'Scam Patterns',
54 => 'Porn/Adult',
55 => 'Malware & Scumware Greetings',
56 => 'Ink & Toner',
57 => 'Get Rich',
58 => 'Debt & Credit',
59 => 'Casinos & Gambling',
60 => 'Ungrouped Black Rules',
61 => 'Experimental Abstract',
62 => 'Obfuscation Techniques',
63 => 'Experimental Received [ip]',
};
sub new {
my ($class, $mailsa) = @_;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsa);
bless ($self, $class);
# Name of evaluation rule.
$self->register_eval_rule ("snf4sa_sacheck");
# Use localhost.
$self->{SNF_Host} = "localhost";
# Use default port.
$self->{SNF_Port} = 9001;
# Timeout.
$self->{SNF_Timeout} = 1;
# Directory for files containing emails read by SNFServer.
$self->{Temp_Dir} = '/tmp/snf4sa';
# Maximum email message size (including headers).
$self->{SNF_MaxTempFileSize} = 64 * 1024;
# Key for confidence in mail header inserted by SNFServer.
$self->{GBUdb_ConfidenceKey} = "c=";
# Key for probability in mail header inserted by SNFServer.
$self->{GBUdb_ProbabilityKey} = "p=";
# Key for GBUdb maximum weight in the configuration file.
$self->{GBUdb_MaxWeightKey} = "gbudb_max_weight";
# Key for SNFServer code in configuration file.
$self->{SNF_CodeKey} = "snf_result";
# Key for SA score increment in configuration file.
$self->{SA_DeltaScoreKey} = "sa_score";
# Key for short circuit in configuration file.
$self->{SA_ShortCircuitYesKey} = "short_circuit_yes";
# Key for no short circuit in configuration file.
$self->{SA_ShortCircuitNoKey} = "short_circuit_no";
# Key for plugin score threshold in the configuration file.
$self->{Plugin_score_thresholdKey} = "pre_3.2_plugin_score_threshold";
return $self;
}
# DEBUG/TEST.
#sub extract_metadata {
#
# my ($self, $opts) = @_;
#
# print "***********************\n";
# print "extract_metadata called\n";
# print "***********************\n";
#
# $opts->{msg}->put_metadata("X-Extract-Metadata:", "Test header");
#
#}
# END OF DEBUG/TEST.
sub have_shortcircuited {
my ($self, $options) = @_;
if (defined($options->{permsgstatus}->{shortCircuit})) {
return $options->{permsgstatus}->{shortCircuit};
}
return 0;
}
sub parse_config {
my ($self, $options) = @_;
# DEBUG.
#print "parse_confg. key: $options->{key}\n";
#print "parse_config. line: $options->{line}\n";
#print "parse_config. value: $options->{value}\n";
#END OF DEBUG.
# Process GBUdb_max_weight.
if (lc($options->{key}) eq $self->{GBUdb_MaxWeightKey}) {
# GBUdb maximum weight.
my $tempValue = $options->{value};
# Test that the value was a number.
#$self->log_debug("Found $self->{GBUdb_MaxWeightKey} . " value: $options->{value}, tempValue: $tempValue\n"; # DEBUG.
if ($tempValue =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/) {
# Value was a number. Load and return success.
$options->{conf}->{gbuDbMaxWeight} = $tempValue;
$self->inhibit_further_callbacks();
return 1;
} else {
$self->log_debug("Invalid value for $self->{GBUdb_MaxWeightKey} " .
$tempValue);
}
} elsif (lc($options->{key}) eq $self->{Plugin_score_thresholdKey}) {
# Plugin score threshold.
my $tempValue = $options->{value};
# Test that the value was a number.
#print "Found $self->{Plugin_score_thresholdKey} value: $options->{value}\n"; # DEBUG.
if ($tempValue =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/) {
# Value was a number. Load and return success.
$options->{conf}->{pluginScoreThreshold} = $tempValue;
$self->inhibit_further_callbacks();
return 1;
} else {
$self->log_debug("Invalid value for $self->{Plugin_score_thresholdKey} " .
$tempValue);
}
} elsif (lc($options->{key}) eq $self->{SNF_CodeKey}) {
# Relationship between SNFServer code and SA score delta.
my $snf = $self->parse_snf_sa_mapping($options);
if (defined($snf)) {
my @snfCode = @{$snf->{snfCode}};
#print "snf->{snfCode}: @snfCode\n"; # DEBUG.
#print "snf->{deltaScore}: $snf->{deltaScore}\n"; # DEBUG.
#print "snf->{shortCircuit}: $snf->{shortCircuit}\n"; # DEBUG.
# Save configuration.
foreach my $i (@{$snf->{snfCode}}) {
# Create (or update) an element in the mapping array
# that snfSaMapping is a reference to.
$options->{conf}->{snfSaMapping}->[$i] = {
deltaScore => $snf->{deltaScore},
shortCircuit => $snf->{shortCircuit}
};
}
# DEBUG.
#for (my $i = 0; $i < @{$options->{conf}->{snfSaMapping}}; $i++) {
# if (! defined($options->{conf}->{snfSaMapping}->[$i])) {
# print "No configuration for SNFServer code $i\n";
# next;
# }
# print "SNFServer code: $i, " .
# "deltaScore: " .
# "$options->{conf}->{snfSaMapping}->[$i]->{deltaScore}, " .
# "shortCircuit: " .
# "$options->{conf}->{snfSaMapping}->[$i]->{shortCircuit}\n";
#}
# END OF DEBUG.
# Successfully parsed.
$self->inhibit_further_callbacks();
return 1;
}
}
# Wasn't handled.
return 0;
}
# Parse a snf_result configuration line.
#
# Input--
#
# $line--String containing the snf_result line without the first word.
#
# Returns a reference with the following fields (if no error)--
#
# snfCode--Array of SNFServer result codes that this configuration
# line specifies.
#
# deltaScore--SA score increment for the codes in @snfCode.
#
# shortCircuit--True if a SNFServer code in @snfCode is to
# short-circuit the message scan, false otherwise.
#
# If the line cannot be parsed, the return value is undef.
#
sub parse_snf_sa_mapping
{
my ($self, $options) = @_;
my $value = $options->{value};
my $ret_hash = {
snfCode => undef,
deltaScore => undef,
shortCircuit => undef
};
# SNFServer codes found.
my @snfCode = ();
# Remove leading and trailing whitespace.
$value =~ s/^\s+//;
$value =~ s/\s+$//;
# Convert to lower case.
$value = lc($value);
# Split up by white space.
my @specVal = split(/\s+/, $value);
if (0 == @specVal) {
# No separate words.
$self->log_debug("No separate words found in configuration line '" .
$options->{line} . "'");
return undef;
}
# Convert each SNFServer result specification into an integer.
my $lastSpec;
for ($lastSpec = 0; $lastSpec < @specVal; $lastSpec++) {
# Check for next keyword.
if ($specVal[$lastSpec] eq $self->{SA_DeltaScoreKey}) {
# We've completed the processing of the SNFServer result
# codes.
last;
}
# Get the code values.
my @codeVal = $self->get_code_values($specVal[$lastSpec]);
if (0 == @codeVal) {
# No code values were obtained.
$self->log_debug("Couldn't parse all the SNFServer code values " .
"in configuration line '" .
$options->{line} . "'");
return undef;
}
# Add to the list of codes.
@snfCode = (@snfCode, @codeVal);
}
# Sort the SNFServer result codes and remove duplicates.
@snfCode = sort { $a <=> $b } @snfCode;
my $prev = -1;
my @temp = grep($_ != $prev && ($prev = $_), @snfCode);
$ret_hash->{snfCode} = \@temp;
# The $specVal[$lastSpec] is $self->{SA_DeltaScoreKey}. Return if
# there aren't enough parameters.
$lastSpec++;
if ($lastSpec >= @specVal) {
# Not enough parameters.
$self->log_debug("Not enough parameters in configuration line '" .
$options->{line} . "'");
return undef;
}
# Extract the SA delta score.
$ret_hash->{deltaScore} = $specVal[$lastSpec];
if (!($ret_hash->{deltaScore} =~
/^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/)) {
# SA delta score isn't a number.
$self->log_debug("Value after '" . $self->{SA_DeltaScoreKey} .
"' ($specVal[$lastSpec]) must be a number " .
"in configuration line '" .
$options->{line} . "'");
return undef;
}
# Get short circuit spec.
$lastSpec++;
$ret_hash->{shortCircuit} = 0;
if ( ($lastSpec + 1) == @specVal) {
# A parameter was specified.
my $shortCircuitSpec = $specVal[$lastSpec];
if ($self->{SA_ShortCircuitYesKey} eq $shortCircuitSpec) {
# Specified short-circuit evaluation.
$ret_hash->{shortCircuit} = 1;
} elsif ($self->{SA_ShortCircuitNoKey} ne $shortCircuitSpec) {
# Invalid short-circuit specification.
$self->log_debug("Invalid short-circuit specification: '" .
$specVal[$lastSpec] .
"' in configuration line '" . $options->{line} .
"'. Must be '$self->{SA_ShortCircuitYesKey}' " .
" or '$self->{SA_ShortCircuitNoKey}'.");
return undef;
}
} elsif ($lastSpec != @specVal) {
# Too many parameters were specified.
$self->log_debug("Too many parameters were specified in " .
"configuration line '" . $options->{line} . "'");
return undef;
}
return $ret_hash;
}
sub get_code_values
{
my ($self, $specElement) = @_;
my @snfCode = ();
# Split the specification.
my @codeVal = split(/-/, $specElement);
#$self->log_debug("snf4sa: get_code_values. specElement: $specElement. codeVal: @codeVal"); # DEBUG
if (1 == @codeVal) {
if ($specElement =~ /^\d+$/) {
# Found a single code.
$snfCode[0] = 1 * $specElement;
}
} elsif (2 == @codeVal) {
# Check range.
if ( ($codeVal[0] =~ /^\d+$/) && ($codeVal[1] =~ /^\d+$/) ) {
# Found a range of codes.
$codeVal[0] = 1 * $codeVal[0];
$codeVal[1] = 1 * $codeVal[1];
if ($codeVal[0] <= $codeVal[1]) {
# Add these SNF codes.
for (my $i = $codeVal[0]; $i <= $codeVal[1]; $i++) {
push(@snfCode, $i);
}
}
}
}
return @snfCode;
}
# Output a debug message.
#
# Input--
#
# $message--String containing the message to output.
#
sub log_debug
{
my ($self, $message) = @_;
dbg("snf4sa: $message");
}
# Check the message with SNFServer.
sub snf4sa_sacheck {
my ($self, $permsgstatus, $fulltext) = @_;
my $response ='';
my $exitvalue;
# Make sure we have a temp dir
unless(-d $self->{Temp_Dir}) {
mkdir($self->{Temp_Dir});
chmod(0777, $self->{Temp_Dir});
};
# Truncate the message.
my $mailtext = substr( ${$fulltext}, 0, $self->{SNF_MaxTempFileSize});
# create our temp file, $filename will contain the full path
my ($fh, $filename) = tempfile( DIR => $self->{Temp_Dir} );
# spew our mail into the temp file
my $SNF_fh = IO::File->new( $filename, "w" ) ||
die(__PACKAGE__ . ": Unable to create temporary file '" . $filename . "'");
$SNF_fh->print($mailtext) ||
$self->cleanup_die($filename,
__PACKAGE__ . ": Unable to write to temporary file '" .
$filename . "'");
$SNF_fh->close ||
$self->cleanup_die($filename,
__PACKAGE__ . ": Unable to close temporary file '" .
$filename . "'");
# Change permissions.
my $cnt = chmod(0666, $filename) ||
$self->cleanup_die($filename, __PACKAGE__ .
": Unable to change permissions of temporary file '" .
$filename . "'");
# xci_scan connects to SNFServer with XCI to scan the message
my $SNF_XCI_Return = $self->xci_scan( $filename );
#print "header:\n\n$SNF_XCI_Return->{header}\n\n"; # DEBUG
# Remove the temp file, we are done with it.
unlink($filename);
# Check response from SNFServer.
if (! $SNF_XCI_Return ) {
die(__PACKAGE__ . ": Internal error");
}
# Check for success.
if (! $SNF_XCI_Return->{"success"}) {
die(__PACKAGE__ . ": Error from SNFServer: " .
$SNF_XCI_Return->{"message"});
}
# get the return code and translation
my ( $rc, $rcx ) = ( $SNF_XCI_Return->{"code"},
$rule_code_xlat->{ $SNF_XCI_Return->{"code"} } );
$rc = -1 unless defined $rc; # default values
$rcx = 'Unknown' unless $rcx;
my $rch = $SNF_XCI_Return->{"header"}; # the SNF header(s)
# Initialize the change in the SA score.
my $deltaScore = 0.0;
# Add the score from the SNFServer return.
if (defined($permsgstatus->{main}->{conf}->{snfSaMapping}->[$rc])) {
$deltaScore +=
$permsgstatus->{main}->{conf}->{snfSaMapping}->[$rc]->{deltaScore};
$permsgstatus->{shortCircuit} =
$permsgstatus->{main}->{conf}->{snfSaMapping}->[$rc]->{shortCircuit};
}
# Perform GBUdb processing.
if (defined($permsgstatus->{main}->{conf}->{gbuDbMaxWeight})) {
#print "gbudbMaxWeight: $permsgstatus->{main}->{conf}->{gbuDbMaxWeight}\n\n"; # DEBUG.
# Calculate the contribution to the scrore from the GBUdb results.
$deltaScore +=
$self->calc_GBUdb($SNF_XCI_Return->{header},
$permsgstatus->{main}->{conf}->{gbuDbMaxWeight});
}
# Add the headers.
$permsgstatus->set_tag("SNFRESULTTAG", "$rc ($rcx)");
$permsgstatus->set_tag("SNFMESSAGESNIFFERSCANRESULT",
$self->extract_header_body($SNF_XCI_Return->{header},
"X-MessageSniffer-Scan-Result"));
$permsgstatus->set_tag("SNFMESSAGESNIFFERRULES",
$self->extract_header_body($SNF_XCI_Return->{header},
"X-MessageSniffer-Rules"));
$permsgstatus->set_tag("SNFGBUDBANALYSIS",
$self->extract_header_body($SNF_XCI_Return->{header},
"X-GBUdb-Analysis"));
# If dynamic scoring isn't supported, this rule fires if the score is
# above the threshold.
if (Mail::SpamAssassin::Version() le "3.2.0") {
my $pluginScoreThreshold = 2.5; # Default value.
if (defined($permsgstatus->{main}->{conf}->{pluginScoreThreshold})) {
$pluginScoreThreshold = $permsgstatus->{main}->{conf}->{pluginScoreThreshold};
}
#print "Before comparison. pluginScoreThreshold: $pluginScoreThreshold deltaScore: $deltaScore\n"; # DEBUG
if ($deltaScore < $pluginScoreThreshold) {
$deltaScore = 0.0;
}
}
#print "After comparison. deltaScore: $deltaScore\n"; # DEBUG
# Submit the score.
if ($deltaScore) {
$permsgstatus->got_hit("SNF4SA", "", score => $deltaScore);
for my $set (0..3) {
$permsgstatus->{conf}->{scoreset}->[$set]->{"SNF4SA"} =
sprintf("%0.3f", $deltaScore);
}
}
# Always return zero, since the score was submitted via got_hit()
# above.
return 0;
}
# Calculate the contribution of the GBUdb scan to the SA score.
#
# Input--
#
# $headers--String containing the headers.
#
# $weight--Weight used to calculate the contribution.
#
# Returns the contribution to the SA score (float).
#
sub calc_GBUdb
{
my ( $self, $headers, $weight ) = @_;
# Split the header into lines.
my @headerLine = split(/\n/, $headers);
# Find the line containing the GBUdb results.
my $line;
foreach $line (@headerLine) {
# Search for the tag.
if ($line =~ /^X-GBUdb-Analysis:/) {
# GBUdb analysis was done. Extract the values.
my $ind0 = index($line, $self->{GBUdb_ConfidenceKey});
my $ind1 = index($line, " ", $ind0 + 2);
if (-1 == $ind0) {
return 0.0;
}
my $c = 1.0 * substr($line, $ind0 + 2, $ind1 - $ind0 - 2);
#print "calc_GBUdb. line: $line\n"; # DEBUG
#print "calc_GBUdb. c: $c, ind0: $ind0, ind1: $ind1\n"; # DEBUG
$ind0 = index($line, $self->{GBUdb_ProbabilityKey});
$ind1 = index($line, " ", $ind0 + 2);
if (-1 == $ind0) {
return 0.0;
}
my $p = 1.0 * substr($line, $ind0 + 2, $ind1 - $ind0 - 2);
#print "calc_GBUdb. p: $p, ind0: $ind0, ind1: $ind1\n"; # DEBUG
# Calculate and return the score.
my $score = abs($p * $c) ** 0.5;
$score *= $weight;
if ($p < 0.0) {
$score *= -1.0;
}
# DEBUG.
#print "calc_GBUdb. p: $p, c: $c, weight: $weight\n";
#print "calc_GBUdb. score: $score\n";
# END OF DEBUG.
return $score;
}
}
}
# Extract the specified header body from a string containing all the
# headers.
#
# Input--
#
# $headers--String containing the headers.
#
# $head--String containing the head of the header to extract.
#
# Returns the body of the header.
#
sub extract_header_body
{
my ( $self, $headers, $head ) = @_;
my $body = "";
if ($headers =~ /$head:(.*)/s) {
my $temp = $1;
$temp =~ /(.*)\nX-(.*)/s;
$body = $1;
}
return $body;
}
# xci_scan( $file )
# returns hashref:
# success : true/false
# code : response code from SNF
# message : scalar message (if any)
sub xci_scan
{
my ( $self, $file ) = @_;
return undef unless $self and $file;
my $ret_hash = {
success => undef,
code => undef,
message => undef,
header => undef,
xml => undef
};
my $xci = $self->connect_socket( $self->{SNF_Host}, $self->{SNF_Port} )
or return $self->err_hash("cannot connect to socket ($!)");
$xci->print("\n");
my $rc = $ret_hash->{xml} = $self->socket_response($xci, $file);
$xci->close;
if ( $rc =~ /^/ ) {
$ret_hash->{success} = 1;
$ret_hash->{code} = $1;
$rc =~ /(.*)<\/xhdr>/s and $ret_hash->{header} = $1;
} elsif ( $rc =~ /^{message} = $1;
} else {
$ret_hash->{message} = "unknown XCI response: $rc";
}
return $ret_hash;
}
# connect_socket( $host, $port )
# returns IO::Socket handle
sub connect_socket
{
my ( $self, $host, $port ) = @_;
return undef unless $self and $host and $port;
my $protoname = 'tcp'; # Proto should default to tcp but it's not expensive to specify
$self->{XCI_Socket} = IO::Socket::INET->new(
PeerAddr => $host,
PeerPort => $port,
Proto => $protoname,
Timeout => $self->{SNF_Timeout} ) or return undef;
$self->{XCI_Socket}->autoflush(1); # make sure autoflush is on -- legacy
return $self->{XCI_Socket}; # return the socket handle
}
# socket_response( $socket_handle )
# returns scalar string
sub socket_response
{
my ( $self, $rs, $file ) = @_;
my $buf = ''; # buffer for response
# blocking timeout for servers who accept but don't answer
eval {
local $SIG{ALRM} = sub { die "timeout\n" }; # set up the interrupt
alarm $self->{SNF_Timeout}; # set up the alarm
while (<$rs>) { # read the socket
$buf .= $_;
}
alarm 0; # reset the alarm
};
# report a blocking timeout
if ( $@ eq "timeout\n" ) {
$self->cleanup_die($file,
__PACKAGE__ . ": Timeout waiting for response from SNFServer");
} elsif ( $@ =~ /alarm.*unimplemented/ ) { # no signals on Win32
while (<$rs>) { # get whatever's left
# in the socket.
$buf .= $_;
}
}
return $buf;
}
# return an error message for xci_scan
sub err_hash
{
my ( $self, $message ) = @_;
return {
success => undef,
code => undef,
message => $message
};
}
sub cleanup_die
{
my ( $self, $file, $message ) = @_;
unlink($file);
die($message);
}
1;