123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726 |
- #
- # SpamAssassin SNF4SA Plugin for SNFServer.
- #
- # This plugin implements a SpamAssassin rule to use SNFServer to test
- # whether an email is spam.
- #
- # Copyright (C) 2009 ARM Research Labs, LLC.
- #
- # snf4sa.pm
- #
- # The plugin implements a single evaluation rule, which passes the
- # email message through SNFServer. The communication with SNFServer
- # is through XCI and a temporary file on disk which contains the email
- # message truncated to the frist 64K bytes.
- #
-
- package Snf4sa;
- use strict;
- use Mail::SpamAssassin;
- use Mail::SpamAssassin::Plugin;
- use Mail::SpamAssassin::PerMsgStatus;
- use Mail::SpamAssassin::Logger;
- use IO::Socket;
- use IO::File;
-
- use File::Temp qw/ tempfile tempdir /;
- our @ISA = qw(Mail::SpamAssassin::Plugin);
-
- # Convenience variables and pseudo-constants
- my $CRLF = "\x0d\x0a";
-
- # translation table for SNF rule codes
- my $rule_code_xlat = {
- 0 => 'Standard White Rules',
- 20 => 'GBUdb Truncate (superblack)',
- 40 => 'GBUdb Caution (suspicious)',
- 47 => 'Travel',
- 48 => 'Insurance',
- 49 => 'Antivirus Push',
- 50 => 'Media Theft',
- 51 => 'Spamware',
- 52 => 'Snake Oil',
- 53 => 'Scam Patterns',
- 54 => 'Porn/Adult',
- 55 => 'Malware & Scumware Greetings',
- 56 => 'Ink & Toner',
- 57 => 'Get Rich',
- 58 => 'Debt & Credit',
- 59 => 'Casinos & Gambling',
- 60 => 'Ungrouped Black Rules',
- 61 => 'Experimental Abstract',
- 62 => 'Obfuscation Techniques',
- 63 => 'Experimental Received [ip]',
- };
-
- sub new {
- my ($class, $mailsa) = @_;
- $class = ref($class) || $class;
- my $self = $class->SUPER::new($mailsa);
- bless ($self, $class);
-
- # Name of evaluation rule.
- $self->register_eval_rule ("snf4sa_sacheck");
-
- # Use localhost.
- $self->{SNF_Host} = "localhost";
-
- # Use default port.
- $self->{SNF_Port} = 9001;
-
- # Timeout.
- $self->{SNF_Timeout} = 1;
-
- # Directory for files containing emails read by SNFServer.
- $self->{Temp_Dir} = '/tmp/snf4sa';
-
- # Maximum email message size (including headers).
- $self->{SNF_MaxTempFileSize} = 64 * 1024;
-
- # Key for confidence in mail header inserted by SNFServer.
- $self->{GBUdb_ConfidenceKey} = "c=";
-
- # Key for probability in mail header inserted by SNFServer.
- $self->{GBUdb_ProbabilityKey} = "p=";
-
- # Key for GBUdb maximum weight in the configuration file.
- $self->{GBUdb_MaxWeightKey} = "gbudb_max_weight";
-
- # Key for SNFServer code in configuration file.
- $self->{SNF_CodeKey} = "snf_result";
-
- # Key for SA score increment in configuration file.
- $self->{SA_DeltaScoreKey} = "sa_score";
-
- # Key for short circuit in configuration file.
- $self->{SA_ShortCircuitYesKey} = "short_circuit_yes";
-
- # Key for no short circuit in configuration file.
- $self->{SA_ShortCircuitNoKey} = "short_circuit_no";
-
- return $self;
- }
-
- # DEBUG/TEST.
- #sub extract_metadata {
- #
- # my ($self, $opts) = @_;
- #
- # print "***********************\n";
- # print "extract_metadata called\n";
- # print "***********************\n";
- #
- # $opts->{msg}->put_metadata("X-Extract-Metadata:", "Test header");
- #
- #}
- # END OF DEBUG/TEST.
-
- sub have_shortcircuited {
-
- my ($self, $options) = @_;
-
- if (defined($options->{permsgstatus}->{shortCircuit})) {
-
- return $options->{permsgstatus}->{shortCircuit};
-
- }
- return 0;
- }
-
- sub parse_config {
-
- my ($self, $options) = @_;
-
- # DEBUG.
- #print "parse_confg. key: $options->{key}\n";
- #print "parse_config. line: $options->{line}\n";
- #print "parse_config. value: $options->{value}\n";
- #END OF DEBUG.
-
- # Process GBUdb_max_weight.
- if (lc($options->{key}) eq $self->{GBUdb_MaxWeightKey}) {
-
- # GBUdb maximum weight.
- my $tempValue = $options->{value};
- # Test that the value was a number.
- #$self->log_debug("Found $self->{GBUdb_MaxWeightKey} . " value: $options->{value}, tempValue: $tempValue\n"; # DEBUG.
-
- if ($tempValue =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/) {
-
- # Value was a number. Load and return success.
- $options->{conf}->{gbuDbMaxWeight} = $tempValue;
- $self->inhibit_further_callbacks();
- return 1;
-
- } else {
-
- $self->log_debug("Invalid value for $self->{GBUdb_MaxWeightKey} " .
- $tempValue);
-
- }
- } elsif (lc($options->{key}) eq $self->{SNF_CodeKey}) {
-
- # Relationship between SNFServer code and SA score delta.
- my $snf = $self->parse_snf_sa_mapping($options);
- if (defined($snf)) {
- my @snfCode = @{$snf->{snfCode}};
- #print "snf->{snfCode}: @snfCode\n"; # DEBUG.
- #print "snf->{deltaScore}: $snf->{deltaScore}\n"; # DEBUG.
- #print "snf->{shortCircuit}: $snf->{shortCircuit}\n"; # DEBUG.
-
- # Save configuration.
- foreach my $i (@{$snf->{snfCode}}) {
-
- # Create (or update) an element in the mapping array
- # that snfSaMapping is a reference to.
- $options->{conf}->{snfSaMapping}->[$i] = {
- deltaScore => $snf->{deltaScore},
- shortCircuit => $snf->{shortCircuit}
- };
-
- }
-
- # DEBUG.
- #for (my $i = 0; $i < @{$options->{conf}->{snfSaMapping}}; $i++) {
- # if (! defined($options->{conf}->{snfSaMapping}->[$i])) {
- # print "No configuration for SNFServer code $i\n";
- # next;
- # }
- # print "SNFServer code: $i, " .
- # "deltaScore: " .
- # "$options->{conf}->{snfSaMapping}->[$i]->{deltaScore}, " .
- # "shortCircuit: " .
- # "$options->{conf}->{snfSaMapping}->[$i]->{shortCircuit}\n";
- #}
- # END OF DEBUG.
-
- # Successfully parsed.
- $self->inhibit_further_callbacks();
- return 1;
- }
- }
-
- # Wasn't handled.
- return 0;
-
- }
-
- # Parse a snf_result configuration line.
- #
- # Input--
- #
- # $line--String containing the snf_result line without the first word.
- #
- # Returns a reference with the following fields (if no error)--
- #
- # snfCode--Array of SNFServer result codes that this configuration
- # line specifies.
- #
- # deltaScore--SA score increment for the codes in @snfCode.
- #
- # shortCircuit--True if a SNFServer code in @snfCode is to
- # short-circuit the message scan, false otherwise.
- #
- # If the line cannot be parsed, the return value is undef.
- #
- sub parse_snf_sa_mapping
- {
- my ($self, $options) = @_;
-
- my $value = $options->{value};
-
- my $ret_hash = {
- snfCode => undef,
- deltaScore => undef,
- shortCircuit => undef
- };
-
- # SNFServer codes found.
- my @snfCode = ();
-
- # Remove leading and trailing whitespace.
- $value =~ s/^\s+//;
- $value =~ s/\s+$//;
-
- # Convert to lower case.
- $value = lc($value);
-
- # Split up by white space.
- my @specVal = split(/\s+/, $value);
-
- if (0 == @specVal) {
-
- # No separate words.
- $self->log_debug("No separate words found in configuration line '" .
- $options->{line} . "'");
- return undef;
-
- }
-
- # Convert each SNFServer result specification into an integer.
- my $lastSpec;
- for ($lastSpec = 0; $lastSpec < @specVal; $lastSpec++) {
-
- # Check for next keyword.
- if ($specVal[$lastSpec] eq $self->{SA_DeltaScoreKey}) {
-
- # We've completed the processing of the SNFServer result
- # codes.
- last;
-
- }
-
- # Get the code values.
- my @codeVal = $self->get_code_values($specVal[$lastSpec]);
- if (0 == @codeVal) {
-
- # No code values were obtained.
- $self->log_debug("Couldn't parse all the SNFServer code values " .
- "in configuration line '" .
- $options->{line} . "'");
- return undef;
-
- }
-
- # Add to the list of codes.
- @snfCode = (@snfCode, @codeVal);
-
- }
-
- # Sort the SNFServer result codes and remove duplicates.
- @snfCode = sort { $a <=> $b } @snfCode;
- my $prev = -1;
- my @temp = grep($_ != $prev && ($prev = $_), @snfCode);
- $ret_hash->{snfCode} = \@temp;
-
- # The $specVal[$lastSpec] is $self->{SA_DeltaScoreKey}. Return if
- # there aren't enough parameters.
- $lastSpec++;
- if ($lastSpec >= @specVal) {
-
- # Not enough parameters.
- $self->log_debug("Not enough parameters in configuration line '" .
- $options->{line} . "'");
- return undef;
-
- }
-
- # Extract the SA delta score.
- $ret_hash->{deltaScore} = $specVal[$lastSpec];
- if (!($ret_hash->{deltaScore} =~
- /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/)) {
-
- # SA delta score isn't a number.
- $self->log_debug("Value after '" . $self->{SA_DeltaScoreKey} .
- "' ($specVal[$lastSpec]) must be a number " .
- "in configuration line '" .
- $options->{line} . "'");
- return undef;
-
- }
-
- # Get short circuit spec.
- $lastSpec++;
- $ret_hash->{shortCircuit} = 0;
- if ( ($lastSpec + 1) == @specVal) {
-
- # A parameter was specified.
- my $shortCircuitSpec = $specVal[$lastSpec];
- if ($self->{SA_ShortCircuitYesKey} eq $shortCircuitSpec) {
-
- # Specified short-circuit evaluation.
- $ret_hash->{shortCircuit} = 1;
-
- } elsif ($self->{SA_ShortCircuitNoKey} ne $shortCircuitSpec) {
-
- # Invalid short-circuit specification.
- $self->log_debug("Invalid short-circuit specification: '" .
- $specVal[$lastSpec] .
- "' in configuration line '" . $options->{line} .
- "'. Must be '$self->{SA_ShortCircuitYesKey}' " .
- " or '$self->{SA_ShortCircuitNoKey}'.");
- return undef;
-
- }
- } elsif ($lastSpec != @specVal) {
-
- # Too many parameters were specified.
- $self->log_debug("Too many parameters were specified in " .
- "configuration line '" . $options->{line} . "'");
- return undef;
-
- }
-
- return $ret_hash;
-
- }
-
- sub get_code_values
- {
- my ($self, $specElement) = @_;
-
- my @snfCode = ();
-
- # Split the specification.
- my @codeVal = split(/-/, $specElement);
-
- #$self->log_debug("snf4sa: get_code_values. specElement: $specElement. codeVal: @codeVal"); # DEBUG
-
- if (1 == @codeVal) {
-
- if ($specElement =~ /^\d+$/) {
-
- # Found a single code.
- $snfCode[0] = 1 * $specElement;
-
- }
-
- } elsif (2 == @codeVal) {
-
- # Check range.
- if ( ($codeVal[0] =~ /^\d+$/) && ($codeVal[1] =~ /^\d+$/) ) {
-
- # Found a range of codes.
- $codeVal[0] = 1 * $codeVal[0];
- $codeVal[1] = 1 * $codeVal[1];
- if ($codeVal[0] <= $codeVal[1]) {
-
- # Add these SNF codes.
- for (my $i = $codeVal[0]; $i <= $codeVal[1]; $i++) {
-
- push(@snfCode, $i);
-
- }
-
- }
- }
- }
- return @snfCode;
- }
-
- # Output a debug message.
- #
- # Input--
- #
- # $message--String containing the message to output.
- #
- sub log_debug
- {
- my ($self, $message) = @_;
- dbg("snf4sa: $message");
- }
-
- # Check the message with SNFServer.
- sub snf4sa_sacheck {
-
- my ($self, $permsgstatus, $fulltext) = @_;
-
- my $response ='';
- my $exitvalue;
-
- # Make sure we have a temp dir
- unless(-d $self->{Temp_Dir}) {
- mkdir($self->{Temp_Dir});
- chmod(0777, $self->{Temp_Dir});
- };
-
- # Truncate the message.
- my $mailtext = substr( ${$fulltext}, 0, $self->{SNF_MaxTempFileSize});
-
- # create our temp file, $filename will contain the full path
- my ($fh, $filename) = tempfile( DIR => $self->{Temp_Dir} );
-
- # spew our mail into the temp file
- my $SNF_fh = IO::File->new( $filename, "w" ) ||
- die(__PACKAGE__ . ": Unable to create temporary file '" . $filename . "'");
- $SNF_fh->print($mailtext) ||
- $self->cleanup_die($filename,
- __PACKAGE__ . ": Unable to write to temporary file '" .
- $filename . "'");
- $SNF_fh->close ||
- $self->cleanup_die($filename,
- __PACKAGE__ . ": Unable to close temporary file '" .
- $filename . "'");
-
- # Change permissions.
- my $cnt = chmod(0666, $filename) ||
- $self->cleanup_die($filename, __PACKAGE__ .
- ": Unable to change permissions of temporary file '" .
- $filename . "'");
-
- # xci_scan connects to SNFServer with XCI to scan the message
- my $SNF_XCI_Return = $self->xci_scan( $filename );
-
- #print "header:\n\n$SNF_XCI_Return->{header}\n\n"; # DEBUG
-
- # Remove the temp file, we are done with it.
- unlink($filename);
-
- # Check response from SNFServer.
- if (! $SNF_XCI_Return ) {
- die(__PACKAGE__ . ": Internal error");
- }
-
- # Check for success.
- if (! $SNF_XCI_Return->{"success"}) {
- die(__PACKAGE__ . ": Error from SNFServer: " .
- $SNF_XCI_Return->{"message"});
- }
-
- # get the return code and translation
- my ( $rc, $rcx ) = ( $SNF_XCI_Return->{"code"},
- $rule_code_xlat->{ $SNF_XCI_Return->{"code"} } );
-
- $rc = -1 unless defined $rc; # default values
- $rcx = 'Unknown' unless $rcx;
- my $rch = $SNF_XCI_Return->{"header"}; # the SNF header(s)
-
- # Initialize the change in the SA score.
- my $deltaScore = 0.0;
-
- # Add the score from the SNFServer return.
- if (defined($permsgstatus->{main}->{conf}->{snfSaMapping}->[$rc])) {
-
- $deltaScore +=
- $permsgstatus->{main}->{conf}->{snfSaMapping}->[$rc]->{deltaScore};
-
- $permsgstatus->{shortCircuit} =
- $permsgstatus->{main}->{conf}->{snfSaMapping}->[$rc]->{shortCircuit};
-
- }
-
- # Perform GBUdb processing.
- if (defined($permsgstatus->{main}->{conf}->{gbuDbMaxWeight})) {
-
- #print "gbudbMaxWeight: $permsgstatus->{main}->{conf}->{gbuDbMaxWeight}\n\n"; # DEBUG.
-
- # Calculate the contribution to the scrore from the GBUdb results.
- $deltaScore +=
- $self->calc_GBUdb($SNF_XCI_Return->{header},
- $permsgstatus->{main}->{conf}->{gbuDbMaxWeight});
-
- }
-
- # Add the headers.
- $permsgstatus->set_tag("SNFRESULTTAG", "$rc ($rcx)");
- $permsgstatus->set_tag("SNFMESSAGESNIFFERSCANRESULT",
- $self->extract_header_body($SNF_XCI_Return->{header},
- "X-MessageSniffer-Scan-Result"));
- $permsgstatus->set_tag("SNFMESSAGESNIFFERRULES",
- $self->extract_header_body($SNF_XCI_Return->{header},
- "X-MessageSniffer-Rules"));
- $permsgstatus->set_tag("SNFGBUDBANALYSIS",
- $self->extract_header_body($SNF_XCI_Return->{header},
- "X-GBUdb-Analysis"));
-
- # Submit the score.
- if ($deltaScore) {
-
- $permsgstatus->got_hit("SNF4SA", "", score => $deltaScore);
- for my $set (0..3) {
- $permsgstatus->{conf}->{scoreset}->[$set]->{"SNF4SA"} =
- sprintf("%0.3f", $deltaScore);
- }
-
- }
-
- # Always return zero, since the score was submitted via got_hit()
- # above.
- return 0;
-
- }
-
- # Calculate the contribution of the GBUdb scan to the SA score.
- #
- # Input--
- #
- # $headers--String containing the headers.
- #
- # $weight--Weight used to calculate the contribution.
- #
- # Returns the contribution to the SA score (float).
- #
- sub calc_GBUdb
- {
- my ( $self, $headers, $weight ) = @_;
-
- # Split the header into lines.
- my @headerLine = split(/\n/, $headers);
-
- # Find the line containing the GBUdb results.
- my $line;
- foreach $line (@headerLine) {
-
- # Search for the tag.
- if ($line =~ /^X-GBUdb-Analysis:/) {
-
- # GBUdb analysis was done. Extract the values.
- my $ind0 = index($line, $self->{GBUdb_ConfidenceKey});
- my $ind1 = index($line, " ", $ind0 + 2);
- if (-1 == $ind0) {
-
- return 0.0;
- }
- my $c = 1.0 * substr($line, $ind0 + 2, $ind1 - $ind0 - 2);
- #print "calc_GBUdb. line: $line\n"; # DEBUG
- #print "calc_GBUdb. c: $c, ind0: $ind0, ind1: $ind1\n"; # DEBUG
- $ind0 = index($line, $self->{GBUdb_ProbabilityKey});
- $ind1 = index($line, " ", $ind0 + 2);
- if (-1 == $ind0) {
-
- return 0.0;
- }
- my $p = 1.0 * substr($line, $ind0 + 2, $ind1 - $ind0 - 2);
-
- #print "calc_GBUdb. p: $p, ind0: $ind0, ind1: $ind1\n"; # DEBUG
-
- # Calculate and return the score.
- my $score = abs($p * $c) ** 0.5;
- $score *= $weight;
- if ($p < 0.0) {
- $score *= -1.0;
- }
- # DEBUG.
- #print "calc_GBUdb. p: $p, c: $c, weight: $weight\n";
- #print "calc_GBUdb. score: $score\n";
- # END OF DEBUG.
- return $score;
-
- }
- }
- }
-
- # Extract the specified header body from a string containing all the
- # headers.
- #
- # Input--
- #
- # $headers--String containing the headers.
- #
- # $head--String containing the head of the header to extract.
- #
- # Returns the body of the header.
- #
- sub extract_header_body
- {
- my ( $self, $headers, $head ) = @_;
- my $body = "";
- if ($headers =~ /$head:(.*)/s) {
-
- my $temp = $1;
- $temp =~ /(.*)\nX-(.*)/s;
- $body = $1;
-
- }
- return $body;
- }
-
- # xci_scan( $file )
- # returns hashref:
- # success : true/false
- # code : response code from SNF
- # message : scalar message (if any)
- sub xci_scan
- {
- my ( $self, $file ) = @_;
- return undef unless $self and $file;
-
- my $ret_hash = {
- success => undef,
- code => undef,
- message => undef,
- header => undef,
- xml => undef
- };
-
- my $xci = $self->connect_socket( $self->{SNF_Host}, $self->{SNF_Port} )
- or return $self->err_hash("cannot connect to socket ($!)");
-
- $xci->print("<snf><xci><scanner><scan file='$file' xhdr='yes' /></scanner></xci></snf>\n");
- my $rc = $ret_hash->{xml} = $self->socket_response($xci, $file);
- $xci->close;
-
-
- if ( $rc =~ /^<snf><xci><scanner><result code='(\d*)'>/ ) {
- $ret_hash->{success} = 1;
- $ret_hash->{code} = $1;
- $rc =~ /<xhdr>(.*)<\/xhdr>/s and $ret_hash->{header} = $1;
- } elsif ( $rc =~ /^<snf><xci><error message='(.*)'/ ) {
- $ret_hash->{message} = $1;
- } else {
- $ret_hash->{message} = "unknown XCI response: $rc";
- }
-
- return $ret_hash;
- }
-
- # connect_socket( $host, $port )
- # returns IO::Socket handle
- sub connect_socket
- {
- my ( $self, $host, $port ) = @_;
- return undef unless $self and $host and $port;
- my $protoname = 'tcp'; # Proto should default to tcp but it's not expensive to specify
-
- $self->{XCI_Socket} = IO::Socket::INET->new(
- PeerAddr => $host,
- PeerPort => $port,
- Proto => $protoname,
- Timeout => $self->{SNF_Timeout} ) or return undef;
-
- $self->{XCI_Socket}->autoflush(1); # make sure autoflush is on -- legacy
- return $self->{XCI_Socket}; # return the socket handle
- }
-
- # socket_response( $socket_handle )
- # returns scalar string
- sub socket_response
- {
- my ( $self, $rs, $file ) = @_;
- my $buf = ''; # buffer for response
-
- # blocking timeout for servers who accept but don't answer
- eval {
- local $SIG{ALRM} = sub { die "timeout\n" }; # set up the interrupt
- alarm $self->{SNF_Timeout}; # set up the alarm
-
- while (<$rs>) { # read the socket
- $buf .= $_;
- }
-
- alarm 0; # reset the alarm
- };
-
- # report a blocking timeout
- if ( $@ eq "timeout\n" ) {
- $self->cleanup_die($file,
- __PACKAGE__ . ": Timeout waiting for response from SNFServer");
- } elsif ( $@ =~ /alarm.*unimplemented/ ) { # no signals on Win32
- while (<$rs>) { # get whatever's left
- # in the socket.
- $buf .= $_;
- }
- }
- return $buf;
- }
-
- # return an error message for xci_scan
- sub err_hash
- {
- my ( $self, $message ) = @_;
-
- return {
- success => undef,
- code => undef,
- message => $message
- };
- }
-
- sub cleanup_die
- {
- my ( $self, $file, $message ) = @_;
-
- unlink($file);
- die($message);
- }
- 1;
|