#! /usr/local/bin/perl ################################################################# ### Cleanfeed tester, v0.1 ### Test your cleanfeed modifcations against a known corpus of ### spam / ham. ### ### Put this module in the cleanfeed dir, create the corpus ### directory structure in the same dir : ### corpus/ ### corpus/good/ ### corpus/bad/ ### ### Populate with files, and run. ### ### This software is free to redistribute and modify, and comes ### without any guaarntee whatsoever. Use at your own risk. ### You may eventually contact me at fred@lacave.net. ### -- Fred # With this set, you can put exclusions in your cleanfeed.local # based on this, for instance if you don't want to pollute your hash # databases with the tests. $MODE = 'test'; # If you use a server which sends all headers, turn this to off. $restricthdr = 1; # Turn to off if you wish to show only the final report. $verbose = 1; # This is taken directly from innd.c. %khdr = ( "Approved" => 1, "Control" => 1, "Date" => 1, "Distribution" => 1, "Expires" => 1, "From" => 1, "Lines" => 1, "Message-ID" => 1, "Newsgroups" => 1, "Path" => 1, "Reply-To" => 1, "Sender" => 1, "Subject" => 1, "Supersedes" => 1, "Bytes" => 1, "Also-Control" => 1, "References" => 1, "Xref" => 1, "Keywords" => 1, "X-Trace" => 1, "Date-Received" => 1, "Posted" => 1, "Posting-Version" => 1, "Received" => 1, "Relay-Version" => 1, "NNTP-Posting-Host" => 1, "Followup-To" => 1, "Organization" => 1, "Content-Type" => 1, "Content-Base" => 1, "Content-Disposition" => 1, "X-Newsreader" => 1, "X-Mailer" => 1, "X-Newsposter" => 1, "X-Cancelled-By" => 1, "X-Canceled-By" => 1, "Cancel-Key" => 1, "User-Agent" => 1, "X-Original-Message-ID" => 1 ); # And rock'n'roll. No user servicable parts below. require "cleanfeed.local"; require "filter_innd.pl"; @r = (0, 0, 0, 0, 0, 0); foreach my $d (('corpus/good','corpus/bad')) { foreach my $f (<$d/*>) { %hdr = {}; cft_parse_art("$f"); my $r = filter_art(); print "$f : " if($verbose); $r[2]++ if($d =~ /good$/); $r[5]++ if($d =~ /bad$/); if($r) { print $r."\n" if($verbose); $r[1]++ if($d =~ /good$/); $r[3]++ if($d =~ /bad$/); } else { print $r."accepted\n" if($verbose); $r[0]++ if($d =~ /good$/); $r[4]++ if($d =~ /bad$/); } } } print "\n" if($verbose); print "Result :\n"; printf("\t".'Negatives : %4d ok, %4d bad, %3d%%'."\n",$r[0], $r[1], ($r[0] * 100 / $r[2])) if($r[2]); printf("\t".'Positives : %4d ok, %4d bad, %3d%%'."\n",$r[3], $r[4], ($r[3] * 100 / $r[5])) if($r[5]); printf("\t".'TOTAL : %4d ok, %4d bad, %3d%%'."\n",$r[0] + $r[3], $r[1] + $r[4], (($r[0] + $r[3]) * 100/ ($r[2] + $r[5]))) if($r[2] + $r[5]); sub cft_parse_art { my $fn=shift; my @l; open(F, $fn) && do { @l = ; close(F); }; $lk = '#'; while (scalar(@l) > 0) { $_ = shift(@l); chomp; last if(/^$/); if(/^\s/) { $hdr{$lk} .= "\n".$_; } elsif(/([A-Za-z0-9-]+): (.*)$/) { if($restricthdr) { $k = ($khdr{$1} ? $1 : "#"); } else { $k = $1; } $hdr{$k} = $2; $lk = $k; } } $hdr{__LINES__} = scalar(@l); $hdr{__BODY__} = join('', @l); delete($hdr{'#'}); }