blob: a8c2e56681ffa5f3a4f4e3a3d53fdb1cf706e073 [file] [log] [blame]
# <@LICENSE>
# Copyright 2004 Apache Software Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
=head1 NAME
Mail::SpamAssassin::Plugin::DCC - perform DCC check of messages
=head1 SYNOPSIS
loadplugin Mail::SpamAssassin::Plugin::DCC
=head1 DESCRIPTION
The DCC or Distributed Checksum Clearinghouse is a system of servers
collecting and counting checksums of millions of mail messages. The
counts can be used by SpamAssassin to detect and reject or filter spam.
Because simplistic checksums of spam can be easily defeated, the main
DCC checksums are fuzzy and ignore aspects of messages. The fuzzy
checksums are changed as spam evolves.
Note that DCC is disabled by default in C<init.pre> because it is not
open source. See the DCC license for more details.
See http://www.rhyolite.com/anti-spam/dcc/ for more information about
DCC.
=cut
package Mail::SpamAssassin::Plugin::DCC;
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::Logger;
use IO::Socket;
use strict;
use warnings;
use bytes;
use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);
sub new {
my $class = shift;
my $mailsaobject = shift;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsaobject);
bless ($self, $class);
# are network tests enabled?
if ($mailsaobject->{local_tests_only}) {
$self->{dcc_disabled} = 1;
dbg("dcc: local tests only, disabling DCC");
}
else {
dbg("dcc: network tests on, registering DCC");
}
$self->register_eval_rule("check_dcc");
$self->set_config($mailsaobject->{conf});
return $self;
}
sub set_config {
my($self, $conf) = @_;
my @cmds = ();
=head1 USER OPTIONS
=over 4
=item use_dcc (0|1) (default: 1)
Whether to use DCC, if it is available.
=cut
push(@cmds, {
setting => 'use_dcc',
default => 1,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_BOOL,
});
=item dcc_timeout n (default: 5)
How many seconds you wait for DCC to complete, before scanning continues
without the DCC results.
=cut
push (@cmds, {
setting => 'dcc_timeout',
default => 5,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC,
});
=item dcc_body_max NUMBER
=item dcc_fuz1_max NUMBER
=item dcc_fuz2_max NUMBER
This option sets how often a message's body/fuz1/fuz2 checksum must have been
reported to the DCC server before SpamAssassin will consider the DCC check as
matched.
As nearly all DCC clients are auto-reporting these checksums, you should set
this to a relatively high value, e.g. C<999999> (this is DCC's MANY count).
The default is C<999999> for all these options.
=cut
push (@cmds, {
setting => 'dcc_body_max',
default => 999999,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
},
{
setting => 'dcc_fuz1_max',
default => 999999,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
},
{
setting => 'dcc_fuz2_max',
default => 999999,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
});
=back
=head1 ADMINISTRATOR OPTIONS
=over 4
=item dcc_home STRING
This option tells SpamAssassin specifically where to find the dcc homedir.
If C<dcc_path> is not specified, it will default to looking in
C<dcc_home/bin> for dcc client instead of relying on SpamAssassin to find it
in the current PATH. If it isn't found there, it will look in the current
PATH. If a C<dccifd> socket is found in C<dcc_home>, it will use that
interface that instead of C<dccproc>.
=cut
push (@cmds, {
setting => 'dcc_home',
is_admin => 1,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING
});
=item dcc_dccifd_path STRING
This option tells SpamAssassin specifically where to find the dccifd socket.
If C<dcc_dccifd_path> is not specified, it will default to looking in
C<dcc_home> If a C<dccifd> socket is found, it will use it instead of
C<dccproc>.
=cut
push (@cmds, {
setting => 'dcc_dccifd_path',
is_admin => 1,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING
});
=item dcc_path STRING
This option tells SpamAssassin specifically where to find the C<dccproc>
client instead of relying on SpamAssassin to find it in the current PATH.
Note that if I<taint mode> is enabled in the Perl interpreter, you should
use this, as the current PATH will have been cleared.
=cut
push (@cmds, {
setting => 'dcc_path',
is_admin => 1,
default => undef,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_STRING
});
=item dcc_options options
Specify additional options to the dccproc(8) command. Please note that only
characters in the range [0-9A-Za-z ,._/-] are allowed for security reasons.
The default is C<-R>.
=cut
push (@cmds, {
setting => 'dcc_options',
is_admin => 1,
default => '-R',
code => sub {
my ($self, $key, $value, $line) = @_;
if ($value !~ m{^([0-9A-Za-z ,._/-]+)$}) {
return $Mail::SpamAssassin::Conf::INVALID_VALUE;
}
$self->{dcc_options} = $1;
}
});
$conf->{parser}->register_commands(\@cmds);
}
sub is_dccifd_available {
my ($self) = @_;
$self->{dccifd_available} = 0;
if ($self->{main}->{conf}->{use_dcc} == 0) {
dbg("dcc: dccifd is not available: use_dcc is set to 0");
return 0;
}
my $dcchome = $self->{main}->{conf}->{dcc_home} || '';
my $dccifd = $self->{main}->{conf}->{dcc_dccifd_path} || '';
if (!$dccifd && ($dcchome && -S "$dcchome/dccifd")) {
$dccifd = "$dcchome/dccifd";
}
unless ($dccifd && -S $dccifd && -w _ && -r _) {
dbg("dcc: dccifd is not available: no r/w dccifd socket found");
return 0;
}
# remember any found dccifd socket
$self->{main}->{conf}->{dcc_dccifd_path} = $dccifd;
dbg("dcc: dccifd is available: " . $self->{main}->{conf}->{dcc_dccifd_path});
$self->{dccifd_available} = 1;
return 1;
}
sub is_dccproc_available {
my ($self) = @_;
$self->{dccproc_available} = 0;
if ($self->{main}->{conf}->{use_dcc} == 0) {
dbg("dcc: dccproc is not available: use_dcc is set to 0");
return 0;
}
my $dcchome = $self->{main}->{conf}->{dcc_home} || '';
my $dccproc = $self->{main}->{conf}->{dcc_path} || '';
if (!$dccproc && ($dcchome && -x "$dcchome/bin/dccproc")) {
$dccproc = "$dcchome/bin/dccproc";
}
unless ($dccproc) {
$dccproc = Mail::SpamAssassin::Util::find_executable_in_env_path('dccproc');
}
unless ($dccproc && -x $dccproc) {
dbg("dcc: dccproc is not available: no dccproc executable found");
return 0;
}
# remember any found dccproc
$self->{main}->{conf}->{dcc_path} = $dccproc;
dbg("dcc: dccproc is available: " . $self->{main}->{conf}->{dcc_path});
$self->{dccproc_available} = 1;
return 1;
}
sub get_dcc_interface {
my ($self) = @_;
if ($self->is_dccifd_available()) {
$self->{dcc_interface} = "dccifd";
}
elsif ($self->is_dccproc_available()) {
$self->{dcc_interface} = "dccproc";
}
else {
dbg("dcc: dccifd and dccproc are not available, disabling DCC");
$self->{dcc_interface} = "none";
$self->{dcc_disabled} = 1;
}
}
sub check_dcc {
my ($self, $permsgstatus, $full) = @_;
# short-circuit if there's already a X-DCC header with value of
# "bulk" from an upstream DCC check
if ($permsgstatus->get('ALL') =~ /^X-DCC-(?:[^:]{1,80}-)?Metrics:.*bulk/m) {
return 1;
}
$self->get_dcc_interface() unless $self->{dcc_interface};
return 0 if $self->{dcc_disabled};
if ($$full eq '') {
dbg("dcc: empty message, skipping dcc check");
return 0;
}
if ($self->{dccifd_available}) {
return $self->dccifd_lookup($permsgstatus, $full);
}
else {
return $self->dccproc_lookup($permsgstatus, $full);
}
return 0;
}
sub dccifd_lookup {
my ($self, $permsgstatus, $fulltext) = @_;
my $response = "";
my %count;
my $left;
my $right;
my $timeout = $self->{main}->{conf}->{dcc_timeout};
my $sockpath = $self->{main}->{conf}->{dcc_dccifd_path};
$count{body} = 0;
$count{fuz1} = 0;
$count{fuz2} = 0;
$permsgstatus->enter_helper_run_mode();
my $oldalarm = 0;
eval {
# safe to use $SIG{ALRM} here instead of Util::trap_sigalrm_fully(),
# since there are no killer regexp hang dangers here
local $SIG{ALRM} = sub { die "__alarm__\n" };
$oldalarm = alarm $timeout;
my $sock = IO::Socket::UNIX->new(Type => SOCK_STREAM,
Peer => $sockpath) || dbg("dcc: failed to open socket") && die;
# send the options and other parameters to the daemon
$sock->print("header\n") || dbg("dcc: failed write") && die; # options
$sock->print("0.0.0.0\n") || dbg("dcc: failed write") && die; # client
$sock->print("\n") || dbg("dcc: failed write") && die; # HELO value
$sock->print("\n") || dbg("dcc: failed write") && die; # sender
$sock->print("unknown\r\n") || dbg("dcc: failed write") && die; # recipients
$sock->print("\n") || dbg("dcc: failed write") && die; # recipients
$sock->print($$fulltext);
$sock->shutdown(1) || dbg("dcc: failed socket shutdown: $!") && die;
$sock->getline() || dbg("dcc: failed read status") && die;
$sock->getline() || dbg("dcc: failed read multistatus") && die;
my @null = $sock->getlines();
if (!@null) {
# no facility prefix on this
die("failed to read header\n");
}
# the first line will be the header we want to look at
chomp($response = shift @null);
# but newer versions of DCC fold the header if it's too long...
while (my $v = shift @null) {
last unless ($v =~ s/^\s+/ /); # if this line wasn't folded, stop
chomp $v;
$response .= $v;
}
dbg("dcc: dccifd got response: $response");
alarm $oldalarm;
};
# do NOT reinstate $oldalarm here; we may already have done that in
# the success case. leave it to the error handler below
my $err = $@;
$permsgstatus->leave_helper_run_mode();
if ($err) {
alarm $oldalarm;
chomp $err;
$response = undef;
if ($err =~ /__alarm__/) {
dbg("dcc: dccifd check timed out after $timeout secs.");
return 0;
} else {
warn("dcc: dccifd -> check skipped: $! $err");
return 0;
}
}
if (!defined $response || $response !~ /^X-DCC/) {
dbg("dcc: dccifd check failed - no X-DCC returned: $response");
return 0;
}
if ($response =~ /^X-DCC-(.*)-Metrics: (.*)$/) {
$permsgstatus->{tag_data}->{DCCB} = $1;
$permsgstatus->{tag_data}->{DCCR} = $2;
}
$response =~ s/many/999999/ig;
$response =~ s/ok\d?/0/ig;
if ($response =~ /Body=(\d+)/) {
$count{body} = $1+0;
}
if ($response =~ /Fuz1=(\d+)/) {
$count{fuz1} = $1+0;
}
if ($response =~ /Fuz2=(\d+)/) {
$count{fuz2} = $1+0;
}
if ($count{body} >= $self->{main}->{conf}->{dcc_body_max} ||
$count{fuz1} >= $self->{main}->{conf}->{dcc_fuz1_max} ||
$count{fuz2} >= $self->{main}->{conf}->{dcc_fuz2_max})
{
dbg(sprintf("dcc: listed: BODY=%s/%s FUZ1=%s/%s FUZ2=%s/%s",
$count{body}, $self->{main}->{conf}->{dcc_body_max},
$count{fuz1}, $self->{main}->{conf}->{dcc_fuz1_max},
$count{fuz2}, $self->{main}->{conf}->{dcc_fuz2_max}));
return 1;
}
return 0;
}
sub dccproc_lookup {
my ($self, $permsgstatus, $fulltext) = @_;
my $response = undef;
my %count;
my $timeout = $self->{main}->{conf}->{dcc_timeout};
$count{body} = 0;
$count{fuz1} = 0;
$count{fuz2} = 0;
$permsgstatus->enter_helper_run_mode();
# use a temp file here -- open2() is unreliable, buffering-wise, under spamd
my $tmpf = $permsgstatus->create_fulltext_tmpfile($fulltext);
my $oldalarm = 0;
eval {
# safe to use $SIG{ALRM} here instead of Util::trap_sigalrm_fully(),
# since there are no killer regexp hang dangers here
local $SIG{ALRM} = sub { die "__alarm__\n" };
local $SIG{PIPE} = sub { die "__brokenpipe__\n" };
$oldalarm = alarm $timeout;
# note: not really tainted, this came from system configuration file
my $path = Mail::SpamAssassin::Util::untaint_file_path($self->{main}->{conf}->{dcc_path});
my $opts = $self->{main}->{conf}->{dcc_options} || '';
dbg("dcc: opening pipe: " . join(' ', $path, "-H", $opts, "< $tmpf"));
my $pid = Mail::SpamAssassin::Util::helper_app_pipe_open(*DCC,
$tmpf, 1, $path, "-H", split(' ', $opts));
$pid or die "$!\n";
my @null = <DCC>;
close DCC;
if (!@null) {
# no facility prefix on this
die("failed to read header\n");
}
# the first line will be the header we want to look at
chomp($response = shift @null);
# but newer versions of DCC fold the header if it's too long...
while (my $v = shift @null) {
last unless ($v =~ s/^\s+/ /); # if this line wasn't folded, stop
chomp $v;
$response .= $v;
}
unless (defined($response)) {
# no facility prefix on this
die("no response\n"); # yes, this is possible
}
dbg("dcc: got response: $response");
# note: this must be called BEFORE leave_helper_run_mode()
# $self->cleanup_kids($pid);
alarm $oldalarm;
};
# do NOT reinstate $oldalarm here; we may already have done that in
# the success case. leave it to the error handler below
my $err = $@;
$permsgstatus->leave_helper_run_mode();
if ($err) {
alarm $oldalarm;
chomp $err;
if ($err =~ /^__alarm__$/) {
dbg("dcc: check timed out after $timeout seconds");
} elsif ($err =~ /^__brokenpipe__$/) {
dbg("dcc: check failed: broken pipe");
} elsif ($err eq "no response") {
dbg("dcc: check failed: no response");
} else {
warn("dcc: check failed: $err\n");
}
return 0;
}
if (!defined($response) || $response !~ /^X-DCC/) {
$response ||= '';
dbg("dcc: check failed: no X-DCC returned (did you create a map file?): $response");
return 0;
}
if ($response =~ /^X-DCC-(.*)-Metrics: (.*)$/) {
$permsgstatus->{tag_data}->{DCCB} = $1;
$permsgstatus->{tag_data}->{DCCR} = $2;
}
$response =~ s/many/999999/ig;
$response =~ s/ok\d?/0/ig;
if ($response =~ /Body=(\d+)/) {
$count{body} = $1+0;
}
if ($response =~ /Fuz1=(\d+)/) {
$count{fuz1} = $1+0;
}
if ($response =~ /Fuz2=(\d+)/) {
$count{fuz2} = $1+0;
}
if ($count{body} >= $self->{main}->{conf}->{dcc_body_max} ||
$count{fuz1} >= $self->{main}->{conf}->{dcc_fuz1_max} ||
$count{fuz2} >= $self->{main}->{conf}->{dcc_fuz2_max})
{
dbg(sprintf("dcc: listed: BODY=%s/%s FUZ1=%s/%s FUZ2=%s/%s",
$count{body}, $self->{main}->{conf}->{dcc_body_max},
$count{fuz1}, $self->{main}->{conf}->{dcc_fuz1_max},
$count{fuz2}, $self->{main}->{conf}->{dcc_fuz2_max}));
return 1;
}
return 0;
}
# only supports dccproc right now
sub plugin_report {
my ($self, $options) = @_;
return if $self->{dcc_disabled};
if (!defined $self->{dccproc_available}) {
$self->is_dccproc_available();
}
if ($self->{dccproc_available} && !$self->{options}->{dont_report_to_dcc}) {
# use temporary file: open2() is unreliable due to buffering under spamd
my $tmpf = $options->{report}->create_fulltext_tmpfile($options->{text});
if ($self->dcc_report($options, $tmpf)) {
$options->{report}->{report_available} = 1;
info("reporter: spam reported to DCC");
$options->{report}->{report_return} = 1;
}
else {
info("reporter: could not report spam to DCC");
}
$options->{report}->delete_fulltext_tmpfile();
}
}
sub dcc_report {
my ($self, $options, $tmpf) = @_;
my $timeout = $options->{report}->{conf}->{dcc_timeout};
$options->{report}->enter_helper_run_mode();
my $oldalarm = 0;
eval {
local $SIG{ALRM} = sub { die "__alarm__\n" };
local $SIG{PIPE} = sub { die "__brokenpipe__\n" };
$oldalarm = alarm $timeout;
# note: not really tainted, this came from system configuration file
my $path = Mail::SpamAssassin::Util::untaint_file_path($options->{report}->{conf}->{dcc_path});
my $opts = $options->{report}->{conf}->{dcc_options} || '';
my $pid = Mail::SpamAssassin::Util::helper_app_pipe_open(*DCC,
$tmpf, 1, $path, "-t", "many", split(' ', $opts));
$pid or die "$!\n";
my @ignored = <DCC>;
$options->{report}->close_pipe_fh(\*DCC);
waitpid ($pid, 0);
alarm $oldalarm;
};
my $err = $@;
# do not call alarm $oldalarm here, that *may* have already taken place
$options->{report}->leave_helper_run_mode();
if ($err) {
alarm $oldalarm; # reinstate the one we missed
chomp $err;
if ($err =~ /^__alarm__$/) {
dbg("reporter: DCC report timed out after $timeout seconds");
} elsif ($err =~ /^__brokenpipe__$/) {
dbg("reporter: DCC report failed: broken pipe");
} else {
warn("reporter: DCC report failed: $err\n");
}
return 0;
}
return 1;
}
1;
=back
=cut