You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

snf4sa.pm 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650
  1. #
  2. # SpamAssassin SNF4SA Plugin for SNFServer.
  3. #
  4. # This plugin implements a SpamAssassin rule to use SNFServer to test
  5. # whether an email is spam.
  6. #
  7. # Copyright (C) 2009 ARM Research Labs, LLC.
  8. #
  9. # snf4sa.pm
  10. #
  11. # The plugin implements a single evaluation rule, which passes the
  12. # email message through SNFServer. The communication with SNFServer
  13. # is through XCI and a temporary file on disk which contains the email
  14. # message truncated to the frist 64K bytes.
  15. #
  16. package Snf4sa;
  17. use strict;
  18. use Mail::SpamAssassin;
  19. use Mail::SpamAssassin::Plugin;
  20. use Mail::SpamAssassin::PerMsgStatus;
  21. use Mail::SpamAssassin::Logger;
  22. use IO::Socket;
  23. use IO::File;
  24. use File::Temp qw/ tempfile tempdir /;
  25. our @ISA = qw(Mail::SpamAssassin::Plugin);
  26. # Convenience variables and pseudo-constants
  27. my $CRLF = "\x0d\x0a";
  28. # translation table for SNF rule codes
  29. my $rule_code_xlat = {
  30. 0 => 'Standard White Rules',
  31. 20 => 'GBUdb Truncate (superblack)',
  32. 40 => 'GBUdb Caution (suspicious)',
  33. 47 => 'Travel',
  34. 48 => 'Insurance',
  35. 49 => 'Antivirus Push',
  36. 50 => 'Media Theft',
  37. 51 => 'Spamware',
  38. 52 => 'Snake Oil',
  39. 53 => 'Scam Patterns',
  40. 54 => 'Porn/Adult',
  41. 55 => 'Malware & Scumware Greetings',
  42. 56 => 'Ink & Toner',
  43. 57 => 'Get Rich',
  44. 58 => 'Debt & Credit',
  45. 59 => 'Casinos & Gambling',
  46. 60 => 'Ungrouped Black Rules',
  47. 61 => 'Experimental Abstract',
  48. 62 => 'Obfuscation Techniques',
  49. 63 => 'Experimental Received [ip]',
  50. };
  51. sub new {
  52. my ($class, $mailsa) = @_;
  53. $class = ref($class) || $class;
  54. my $self = $class->SUPER::new($mailsa);
  55. bless ($self, $class);
  56. # Name of evaluation rule.
  57. $self->register_eval_rule ("snf4sa_sacheck");
  58. # Use localhost.
  59. $self->{SNF_Host} = "localhost";
  60. # Use default port.
  61. $self->{SNF_Port} = 9001;
  62. # Timeout.
  63. $self->{SNF_Timeout} = 1;
  64. # Directory for files containing emails read by SNFServer.
  65. $self->{Temp_Dir} = '/tmp/snf4sa';
  66. # Maximum email message size (including headers).
  67. $self->{SNF_MaxTempFileSize} = 64 * 1024;
  68. # Key for GBUdb maximum weight.
  69. $self->{GBUdb_MaxWeightKey} = "gbudb_max_weight";
  70. # Key for SNFServer code in configuration file.
  71. $self->{SNF_CodeKey} = "snf_result";
  72. # Key for SA score increment in configuration file.
  73. $self->{SA_DeltaScoreKey} = "sa_score";
  74. # Key for short circuit in configuration file.
  75. $self->{SA_ShortCircuitYesKey} = "short_circuit_yes";
  76. # Key for no short circuit in configuration file.
  77. $self->{SA_ShortCircuitNoKey} = "short_circuit_no";
  78. return $self;
  79. }
  80. # DEBUG/TEST.
  81. #sub extract_metadata {
  82. #
  83. # my ($self, $opts) = @_;
  84. #
  85. # print "***********************\n";
  86. # print "extract_metadata called\n";
  87. # print "***********************\n";
  88. #
  89. # $opts->{msg}->put_metadata("X-Extract-Metadata:", "Test header");
  90. #
  91. #}
  92. # END OF DEBUG/TEST.
  93. sub have_shortcircuited {
  94. my ($self, $permsgstatus) = @_;
  95. # print "************************************\n";
  96. # print "****have_shortcircuited returning 0\n";
  97. # print "************************************\n";
  98. return 0;
  99. }
  100. sub parse_config {
  101. my ($self, $options) = @_;
  102. # DEBUG.
  103. #print "parse_confg. key: $options->{key}\n";
  104. #print "parse_config. line: $options->{line}\n";
  105. #print "parse_config. value: $options->{value}\n";
  106. #END OF DEBUG.
  107. # Process GBUdb_max_weight.
  108. if (lc($options->{key}) eq $self->{GBUdb_MaxWeightKey}) {
  109. # GBUdb maximum weight.
  110. my $tempValue = $options->{value};
  111. # Test that the value was a number.
  112. #$self->log_debug("Found $self->{GBUdb_MaxWeightKey} . " value: $options->{value}, tempValue: $tempValue\n"; # DEBUG.
  113. if ($tempValue =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/) {
  114. # Value was a number. Load and return success.
  115. $options->{conf}->{gbuDbMaxWeight} = $tempValue;
  116. $self->inhibit_further_callbacks();
  117. return 1;
  118. } else {
  119. $self->log_debug("Invalid value for $self->{GBUdb_MaxWeightKey} " .
  120. $tempValue);
  121. }
  122. } elsif (lc($options->{key}) eq $self->{SNF_CodeKey}) {
  123. # Relationship between SNFServer code and SA score delta.
  124. my $snf = $self->parse_snf_sa_mapping($options);
  125. if (defined($snf)) {
  126. my @codes = @{$snf->{snfCode}};
  127. print "snf->{snfCode}: @codes\n";
  128. print "snf->{deltaScore}: $snf->{deltaScore}\n";
  129. print "snf->{shortCircuit}: $snf->{shortCircuit}\n";
  130. # Save configuration.
  131. # Successfully parsed.
  132. return 1;
  133. }
  134. }
  135. # Wasn't handled.
  136. return 0;
  137. }
  138. # Parse a snf_result configuration line.
  139. #
  140. # Input--
  141. #
  142. # $line--String containing the snf_result line without the first word.
  143. #
  144. # Returns has reference with the following fields (if no error)--
  145. #
  146. # snfCode--Array of SNFServer result codes that this configuration
  147. # line specifies.
  148. #
  149. # deltaScore--SA score increment for the codes in @snfCode.
  150. #
  151. # shortCircuit--True if a SNFServer code in @snfCode is to
  152. # short-circuit the message scan, false otherwise.
  153. #
  154. # If the line cannot be parsed, the return value is undef.
  155. #
  156. sub parse_snf_sa_mapping
  157. {
  158. my ($self, $options) = @_;
  159. my $value = $options->{value};
  160. my $ret_hash = {
  161. snfCode => undef,
  162. deltaScore => undef,
  163. shortCircuit => undef
  164. };
  165. # SNFServer codes found.
  166. my @snfCode = ();
  167. # Remove leading and trailing whitespace.
  168. $value =~ s/^\s+//;
  169. $value =~ s/\s+$//;
  170. # Convert to lower case.
  171. $value = lc($value);
  172. # Split up by white space.
  173. my @specVal = split(/\s+/, $value);
  174. if (0 == @specVal) {
  175. # No separate words.
  176. $self->log_debug("No separate words found in configuration line '" .
  177. $options->{line} . "'");
  178. return undef;
  179. }
  180. # Convert each SNFServer result specification into an integer.
  181. my $lastSpec;
  182. for ($lastSpec = 0; $lastSpec < @specVal; $lastSpec++) {
  183. # Check for next keyword.
  184. if ($specVal[$lastSpec] eq $self->{SA_DeltaScoreKey}) {
  185. # We've completed the processing of the SNFServer result
  186. # codes.
  187. last;
  188. }
  189. # Get the code values.
  190. my @codeVal = $self->get_code_values($specVal[$lastSpec]);
  191. if (0 == @codeVal) {
  192. # No code values were obtained.
  193. $self->log_debug("Couldn't parse all the SNFServer code values " .
  194. "in configuration line '" .
  195. $options->{line} . "'");
  196. return undef;
  197. }
  198. # Add to the list of codes.
  199. @snfCode = (@snfCode, @codeVal);
  200. }
  201. # Sort the SNFServer result codes and remove duplicates.
  202. @snfCode = sort { $a <=> $b } @snfCode;
  203. my $prev = -1;
  204. my @temp = grep($_ != $prev && ($prev = $_), @snfCode);
  205. $ret_hash->{snfCode} = \@temp;
  206. # The $specVal[$lastSpec] is $self->{SA_DeltaScoreKey}. Return if
  207. # there aren't enough parameters.
  208. $lastSpec++;
  209. if ($lastSpec >= @specVal) {
  210. # Not enough parameters.
  211. $self->log_debug("Not enough parameters in configuration line '" .
  212. $options->{line} . "'");
  213. return undef;
  214. }
  215. # Extract the SA delta score.
  216. $ret_hash->{deltaScore} = $specVal[$lastSpec];
  217. if (!($ret_hash->{deltaScore} =~
  218. /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/)) {
  219. # SA delta score isn't a number.
  220. $self->log_debug("Value after '" . $self->{SA_DeltaScoreKey} .
  221. "' ($specVal[$lastSpec]) must be a number " .
  222. "in configuration line '" .
  223. $options->{line} . "'");
  224. return undef;
  225. }
  226. # Get short circuit spec.
  227. $lastSpec++;
  228. $ret_hash->{shortCircuit} = 0;
  229. if ( ($lastSpec + 1) == @specVal) {
  230. # A parameter was specified.
  231. my $shortCircuitSpec = $specVal[$lastSpec];
  232. if ($self->{SA_ShortCircuitYesKey} eq $shortCircuitSpec) {
  233. # Specified short-circuit evaluation.
  234. $ret_hash->{shortCircuit} = 1;
  235. } elsif ($self->{SA_ShortCircuitNoKey} ne $shortCircuitSpec) {
  236. # Invalid short-circuit specification.
  237. $self->log_debug("Invalid short-circuit specification: '" .
  238. $specVal[$lastSpec] .
  239. "' in configuration line '" . $options->{line} .
  240. "'. Must be '$self->{SA_ShortCircuitYesKey}' " .
  241. " or '$self->{SA_ShortCircuitNoKey}'.");
  242. return undef;
  243. }
  244. } elsif ($lastSpec != @specVal) {
  245. # Too many parameters were specified.
  246. $self->log_debug("Too many parameters were specified in " .
  247. "configuration line '" . $options->{line} . "'");
  248. return undef;
  249. }
  250. return $ret_hash;
  251. }
  252. sub get_code_values
  253. {
  254. my ($self, $specElement) = @_;
  255. my @snfCode = ();
  256. # Split the specification.
  257. my @codeVal = split(/-/, $specElement);
  258. #$self->log_debug("snf4sa: get_code_values. specElement: $specElement. codeVal: @codeVal"); # DEBUG
  259. if (1 == @codeVal) {
  260. if ($specElement =~ /^\d+$/) {
  261. # Found a single code.
  262. $snfCode[0] = 1 * $specElement;
  263. }
  264. } elsif (2 == @codeVal) {
  265. # Check range.
  266. if ( ($codeVal[0] =~ /^\d+$/) && ($codeVal[1] =~ /^\d+$/) ) {
  267. # Found a range of codes.
  268. $codeVal[0] = 1 * $codeVal[0];
  269. $codeVal[1] = 1 * $codeVal[1];
  270. if ($codeVal[0] <= $codeVal[1]) {
  271. # Add these SNF codes.
  272. for (my $i = $codeVal[0]; $i <= $codeVal[1]; $i++) {
  273. push(@snfCode, $i);
  274. }
  275. }
  276. }
  277. }
  278. return @snfCode;
  279. }
  280. # Output a debug message.
  281. #
  282. # Input--
  283. #
  284. # $message--String containing the message to output.
  285. #
  286. sub log_debug
  287. {
  288. my ($self, $message) = @_;
  289. dbg("snf4sa: $message");
  290. }
  291. # Check the message with SNFServer.
  292. sub snf4sa_sacheck {
  293. my ($self, $permsgstatus, $fulltext) = @_;
  294. my $testscore = 0;
  295. my $response ='';
  296. my $exitvalue;
  297. # Make sure we have a temp dir
  298. unless(-d $self->{Temp_Dir}) {
  299. mkdir($self->{Temp_Dir});
  300. chmod(0777, $self->{Temp_Dir});
  301. };
  302. # Truncate the message.
  303. my $mailtext = substr( ${$fulltext}, 0, $self->{SNF_MaxTempFileSize});
  304. # create our temp file, $filename will contain the full path
  305. my ($fh, $filename) = tempfile( DIR => $self->{Temp_Dir} );
  306. # spew our mail into the temp file
  307. my $SNF_fh = IO::File->new( $filename, "w" ) ||
  308. die(__PACKAGE__ . ": Unable to create temporary file '" . $filename . "'");
  309. $SNF_fh->print($mailtext) ||
  310. $self->cleanup_die($filename,
  311. __PACKAGE__ . ": Unable to write to temporary file '" .
  312. $filename . "'");
  313. $SNF_fh->close ||
  314. $self->cleanup_die($filename,
  315. __PACKAGE__ . ": Unable to close temporary file '" .
  316. $filename . "'");
  317. # Change permissions.
  318. my $cnt = chmod(0666, $filename) ||
  319. $self->cleanup_die($filename, __PACKAGE__ .
  320. ": Unable to change permissions of temporary file '" .
  321. $filename . "'");
  322. # xci_scan connects to SNFServer with XCI to scan the message
  323. my $SNF_XCI_Return = $self->xci_scan( $filename );
  324. #print "header:\n\n$SNF_XCI_Return->{header}"; # DEBUG
  325. #print "\nEnd of header\n\n"; # DEBUG
  326. # Initialize the change in the SA score.
  327. my $deltaScore = 0.0;
  328. # Perform GBUdb processing.
  329. if (defined($permsgstatus->{main}->{conf}->{gbuDbMaxWeight})) {
  330. #print "gbudbMaxWeight: $permsgstatus->{main}->{conf}->{gbuDbMaxWeight}\n\n"; # DEBUG.
  331. # Calculate the contribution to the scrore from the GBUdb results.
  332. $deltaScore +=
  333. $self->calc_GBUdb($SNF_XCI_Return->{header},
  334. $permsgstatus->{main}->{conf}->{gbuDbMaxWeight});
  335. }
  336. # Remove the temp file, we are done with it.
  337. unlink($filename);
  338. # Check response from SNFServer.
  339. if (! $SNF_XCI_Return ) {
  340. die(__PACKAGE__ . ": Internal error");
  341. }
  342. # Check for success.
  343. if (! $SNF_XCI_Return->{"success"}) {
  344. die(__PACKAGE__ . ": Error from SNFServer: " .
  345. $SNF_XCI_Return->{"message"});
  346. }
  347. # get the return code and translation
  348. my ( $rc, $rcx ) = ( $SNF_XCI_Return->{"code"},
  349. $rule_code_xlat->{ $SNF_XCI_Return->{"code"} } );
  350. $rc = -1 unless defined $rc; # default values
  351. $rcx = 'Unknown' unless $rcx;
  352. my $rch = $SNF_XCI_Return->{"header"}; # the SNF header(s)
  353. # Result code of 0 indicates non-spam. Any other value indicates
  354. # spam.
  355. if ($rc >= 1) {
  356. $testscore=1;
  357. } else {
  358. $testscore=0;
  359. }
  360. # Add the header.
  361. $permsgstatus->set_tag("SNFRESULTTAG", "$rc ($rcx)");
  362. # Submit the score.
  363. if ($deltaScore) {
  364. $permsgstatus->got_hit("SNF4SA", "", score => $deltaScore);
  365. for my $set (0..3) {
  366. $permsgstatus->{scoreset}->[$set]->{"SNF4SA"} =
  367. sprintf("%0.3f", $deltaScore);
  368. }
  369. }
  370. # Always return zero, since the score was submitted via got_hit()
  371. # above.
  372. return 0;
  373. }
  374. sub calc_GBUdb
  375. {
  376. my ( $self, $headers, $weight ) = @_;
  377. # Split the header into lines.
  378. my @headerLine = split(/\n/, $headers);
  379. # Find the line containing the GBUdb results.
  380. my $line;
  381. foreach $line (@headerLine) {
  382. # Search for the tag.
  383. if ($line =~ /^X-GBUdb-Analysis:/) {
  384. # GBUdb analysis was done. Extract the values.
  385. my $ind0 = index($line, "c=");
  386. my $ind1 = index($line, " ", $ind0 + 2);
  387. if (-1 == $ind0) {
  388. return 0.0;
  389. }
  390. my $c = 1.0 * substr($line, $ind0 + 2, $ind1 - $ind0 - 2);
  391. print "calc_GBUdb. line: $line\n"; # DEBUG
  392. print "calc_GBUdb. c: $c, ind0: $ind0, ind1: $ind1\n"; # DEBUG
  393. $ind0 = index($line, "p=");
  394. $ind1 = index($line, " ", $ind0 + 2);
  395. if (-1 == $ind0) {
  396. return 0.0;
  397. }
  398. my $p = 1.0 * substr($line, $ind0 + 2, $ind1 - $ind0 - 2);
  399. print "calc_GBUdb. p: $p, ind0: $ind0, ind1: $ind1\n"; # DEBUG
  400. # Calculate and return the score.
  401. my $score = ($p * $c);
  402. $score *= $score * $weight;
  403. if ($p < 0.0) {
  404. $score *= -1.0;
  405. }
  406. print "calc_GBUdb. score: $score\n"; # DEBUG
  407. return $score;
  408. }
  409. }
  410. }
  411. sub abort
  412. {
  413. my ( $self, $message ) = @_;
  414. }
  415. # xci_scan( $file )
  416. # returns hashref:
  417. # success : true/false
  418. # code : response code from SNF
  419. # message : scalar message (if any)
  420. sub xci_scan
  421. {
  422. my ( $self, $file ) = @_;
  423. return undef unless $self and $file;
  424. my $ret_hash = {
  425. success => undef,
  426. code => undef,
  427. message => undef,
  428. header => undef,
  429. xml => undef
  430. };
  431. my $xci = $self->connect_socket( $self->{SNF_Host}, $self->{SNF_Port} )
  432. or return $self->err_hash("cannot connect to socket ($!)");
  433. $xci->print("<snf><xci><scanner><scan file='$file' xhdr='yes' /></scanner></xci></snf>\n");
  434. my $rc = $ret_hash->{xml} = $self->socket_response($xci, $file);
  435. $xci->close;
  436. if ( $rc =~ /^<snf><xci><scanner><result code='(\d*)'>/ ) {
  437. $ret_hash->{success} = 1;
  438. $ret_hash->{code} = $1;
  439. $rc =~ /<xhdr>(.*)<\/xhdr>/s and $ret_hash->{header} = $1;
  440. } elsif ( $rc =~ /^<snf><xci><error message='(.*)'/ ) {
  441. $ret_hash->{message} = $1;
  442. } else {
  443. $ret_hash->{message} = "unknown XCI response: $rc";
  444. }
  445. return $ret_hash;
  446. }
  447. # connect_socket( $host, $port )
  448. # returns IO::Socket handle
  449. sub connect_socket
  450. {
  451. my ( $self, $host, $port ) = @_;
  452. return undef unless $self and $host and $port;
  453. my $protoname = 'tcp'; # Proto should default to tcp but it's not expensive to specify
  454. $self->{XCI_Socket} = IO::Socket::INET->new(
  455. PeerAddr => $host,
  456. PeerPort => $port,
  457. Proto => $protoname,
  458. Timeout => $self->{SNF_Timeout} ) or return undef;
  459. $self->{XCI_Socket}->autoflush(1); # make sure autoflush is on -- legacy
  460. return $self->{XCI_Socket}; # return the socket handle
  461. }
  462. # socket_response( $socket_handle )
  463. # returns scalar string
  464. sub socket_response
  465. {
  466. my ( $self, $rs, $file ) = @_;
  467. my $buf = ''; # buffer for response
  468. # blocking timeout for servers who accept but don't answer
  469. eval {
  470. local $SIG{ALRM} = sub { die "timeout\n" }; # set up the interrupt
  471. alarm $self->{SNF_Timeout}; # set up the alarm
  472. while (<$rs>) { # read the socket
  473. $buf .= $_;
  474. }
  475. alarm 0; # reset the alarm
  476. };
  477. # report a blocking timeout
  478. if ( $@ eq "timeout\n" ) {
  479. $self->cleanup_die($file,
  480. __PACKAGE__ . ": Timeout waiting for response from SNFServer");
  481. } elsif ( $@ =~ /alarm.*unimplemented/ ) { # no signals on Win32
  482. while (<$rs>) { # get whatever's left
  483. # in the socket.
  484. $buf .= $_;
  485. }
  486. }
  487. return $buf;
  488. }
  489. # return an error message for xci_scan
  490. sub err_hash
  491. {
  492. my ( $self, $message ) = @_;
  493. return {
  494. success => undef,
  495. code => undef,
  496. message => $message
  497. };
  498. }
  499. sub cleanup_die
  500. {
  501. my ( $self, $file, $message ) = @_;
  502. unlink($file);
  503. die($message);
  504. }
  505. 1;