| package Mail::SpamAssassin::Spamd; |
| |
| our (%conf_backup, %msa_backup); |
| |
| use Mail::SpamAssassin::Logger; |
| eval { use Time::HiRes qw(time); }; |
| |
| our $SPAMD_VER = '1.4'; # TODO: support for `Compress: zlib' |
| our %resphash = ( |
| EX_OK => 0, # no problems |
| EX_USAGE => 64, # command line usage error |
| EX_DATAERR => 65, # data format error |
| EX_NOINPUT => 66, # cannot open input |
| EX_NOUSER => 67, # addressee unknown |
| EX_NOHOST => 68, # host name unknown |
| EX_UNAVAILABLE => 69, # service unavailable |
| EX_SOFTWARE => 70, # internal software error |
| EX_OSERR => 71, # system error (e.g., can't fork) |
| EX_OSFILE => 72, # critical OS file missing |
| EX_CANTCREAT => 73, # can't create (user) output file |
| EX_IOERR => 74, # input/output error |
| EX_TEMPFAIL => 75, # temp failure; user is invited to retry |
| EX_PROTOCOL => 76, # remote error in protocol |
| EX_NOPERM => 77, # permission denied |
| EX_CONFIG => 78, # configuration error |
| EX_TIMEOUT => 79, # read timeout |
| ); |
| |
| =head1 NAME |
| |
| Mail::SpamAssassin::Spamd |
| |
| =head1 SYNOPSIS |
| |
| use base qw(Mail::SpamAssassin::Spamd); |
| sub ... { ... } |
| ... |
| |
| =head1 DESCRIPTION |
| |
| This module contains a skeleton for handling client request in spamd |
| implementation. Must not be used directly, but subclassed. |
| |
| An instance should have lifetime of a single request. |
| |
| Interface is likely to change. |
| |
| See the source code of C<spamd(1)> and C<Mail::SpamAssassin::Spamd::Apache2(3)>. |
| |
| =head1 AUTHORS |
| |
| The SpamAssassin(tm) Project E<lt>https://spamassassin.apache.org/E<gt> |
| |
| =head1 COPYRIGHT |
| |
| SpamAssassin is distributed under the Apache License, Version 2.0, as |
| described in the file C<LICENSE> included with the distribution. |
| |
| =head2 METHODS |
| |
| =over |
| |
| =item C<log_connection()> |
| |
| Call as soon as the connection is accepted. |
| |
| =cut |
| |
| sub log_connection { |
| my ($self) = @_; |
| info(sprintf "connection from %s [%s] at port %s\n", |
| $self->_remote_host, $self->_remote_ip, $self->_remote_port); |
| } |
| |
| =item C<log_start_work()> |
| |
| Call after C<parse_msgids()>. |
| |
| =cut |
| |
| sub log_start_work { |
| my ($self) = @_; |
| info( |
| sprintf "%s message %s%s for %s:%d\n", |
| ($self->{method} eq 'PROCESS' ? 'processing' : 'checking'), |
| (defined $self->{msgid} ? $self->{msgid} : '(unknown)'), |
| (defined $self->{rmsgid} ? 'aka ' . $self->{rmsgid} : ''), |
| $self->user, |
| $>, |
| ); |
| } |
| |
| =item C<log_end_work()> |
| |
| Call after C<pass_through_sa()>. |
| |
| =cut |
| |
| sub log_end_work { |
| my ($self) = @_; |
| if ($self->{method} eq 'TELL') { |
| my $info_str; |
| $info_str .= 'Setting' . join ',', @{ $self->{did_set} } |
| if @{ $self->{did_set} }; |
| $info_str .= 'Removing' . join ',', @{ $self->{did_remove} } |
| if @{ $self->{did_remove} }; |
| info( |
| sprintf 'spamd: Tell: %s for $current_user:%d in' |
| . ' %.1f seconds, %d bytes', |
| (defined $info_str ? $info_str : 'Did nothing'), |
| $>, |
| $self->{scan_time}, |
| $self->{actual_length}, |
| ); |
| } |
| else { |
| info( |
| sprintf "%s (%.1f/%.1f) for %s:%d in %.1f seconds, %d bytes.\n", |
| ($self->status->is_spam ? 'identified spam' : 'clean message'), |
| $self->status->get_score, |
| $self->status->get_required_score, |
| $self->user, |
| $>, |
| $self->{scan_time}, |
| $self->{actual_length}, |
| ); |
| } |
| } |
| |
| =item C<log_result()> |
| |
| Call as late as possible, after sending response to the client. |
| |
| =cut |
| |
| sub log_result { |
| my ($self) = @_; |
| my @extra = ( |
| 'scantime=' . sprintf('%.1f', $_[0]->{scan_time}), |
| 'size=' . $self->{actual_length}, |
| 'user=' . $self->user, |
| 'uid=' . $>, |
| 'required_score=' . $self->status->get_required_score, |
| 'rhost=' . $self->_remote_host, |
| 'raddr=' . $self->_remote_ip, |
| 'rport=' . $self->_remote_port, |
| ); |
| { |
| (my $safe = defined $self->{msgid} ? $self->{msgid} : '(unknown)') =~ |
| s/[\x00-\x20\s,]/_/gs; |
| push @extra, "mid=$safe"; |
| } |
| if ($self->{rmsgid}) { |
| (my $safe = $self->{rmsgid}) =~ s/[\x00-\x20\s,]/_/gs; |
| push @extra, "rmid=$safe"; |
| } |
| push @extra, "bayes=" . $self->status->{bayes_score} |
| if defined $self->status->{bayes_score}; |
| push @extra, "autolearn=" . $self->status->get_autolearn_status; |
| my $yorn = $self->status->is_spam ? 'Y' : '.'; |
| my $tests = join ",", sort grep length, $self->status->get_names_of_tests_hit; |
| access_info(sprintf "result: %s %2d - %s %s\n", |
| $yorn, $self->status->get_score, $tests, join ',', @extra); |
| } |
| |
| |
| =item C<check_headers()> |
| |
| Sanity checks on headers sent by the client. |
| Sends status line indicating error to the client and returns false on |
| first problem found. |
| |
| =cut |
| |
| sub check_headers { |
| my $self = shift; |
| |
| if ($self->cfg->{auth_ident}) { |
| unless (exists $self->headers_in->{user}) { |
| $self->service_unavailable_error('User header required'); |
| return 0; |
| } |
| $self->auth_ident($self->headers_in->{user}) |
| or return 0; |
| } |
| |
| my $content_length = $self->headers_in->{content_length}; |
| if (defined $content_length) { # sanity check |
| if ( $content_length !~ /^\d{1,15}$/ |
| || $content_length == 0) |
| { |
| $self->protocol_error('Content-Length too ugly'); |
| return 0; |
| } |
| elsif ($self->cfg->{msg_size_limit} |
| && $content_length > $self->cfg->{msg_size_limit}) |
| { |
| $self->service_unavailable_error('Content-Length exceeds limit'); |
| return 0; |
| } |
| } |
| |
| |
| if ($self->cfg->{allow_tell} && $self->{method} eq 'TELL') { |
| my ($set_local, $set_remote, $remove_local, $remove_remote) = ( |
| $self->headers_in->{set} =~ /local/, |
| $self->headers_in->{set} =~ /remote/, |
| $self->headers_in->{remove} =~ /local/, |
| $self->headers_in->{remove} =~ /remote/, |
| ); |
| |
| if ($set_local && $remove_local) { |
| $self->protocol_error( |
| "Unable to set local and remove local in the same operation."); |
| return 0; |
| } |
| |
| if ($set_remote && $remove_remote) { |
| $self->protocol_error( |
| "Unable to set remote and remove remote in the same operation."); |
| return 0; |
| } |
| } |
| |
| if ($self->headers_in->{compress} && length $self->headers_in->{compress}) { |
| $self->protocol_error('Compress not supported yet'); # TODO |
| return 0; |
| } |
| |
| 1; |
| } |
| |
| |
| =item C<parse_msgids()> |
| |
| Extract the Message-Id(s) for logging purposes. |
| |
| =cut |
| |
| sub parse_msgids { |
| my $self = shift; |
| |
| # Extract the Message-Id(s) for logging purposes. |
| $self->{msgid} = $self->{parsed}->get_pristine_header("Message-Id"); |
| $self->{rmsgid} = $self->{parsed}->get_pristine_header("Resent-Message-Id"); |
| |
| foreach my $id (grep $self->{$_}, qw(msgid rmsgid)) { |
| 1 while $self->{$id} =~ s/\([^\(\)]*\)//; # remove comments and |
| $self->{$id} =~ s/^\s+|\s+$//g; # leading and trailing spaces |
| $self->{$id} =~ s/\s+/ /g; # collapse whitespaces |
| $self->{$id} =~ s/^.*?<(.*?)>.*$/$1/; # keep only the id itself |
| $self->{$id} =~ s/[^\x21-\x7e]/?/g; # replace all weird chars |
| $self->{$id} =~ s/[<>]/?/g; # plus all dangling angle brackets |
| $self->{$id} =~ s/^(.+)$/<$1>/; # re-bracket the id (if not empty) |
| } |
| } |
| |
| |
| =item C<service_unavailable_error('error message')> |
| |
| =item C<protocol_error('error message')> |
| |
| =item C<service_timeout('error message')> |
| |
| Send appropriate status line to the client and log the error. |
| |
| =cut |
| |
| sub service_unavailable_error { |
| my $self = shift; |
| my $msg = join '', @_; |
| $self->send_status_line('EX_UNAVAILABLE', $msg); |
| warn "spamd: service unavailable: $msg\n"; |
| } |
| |
| sub protocol_error { |
| my $self = shift; |
| my $msg = join '', @_; |
| $self->send_status_line('EX_PROTOCOL', $msg); |
| warn "spamd: bad protocol: header error: $msg\n"; |
| } |
| |
| sub service_timeout { |
| my $self = shift; |
| my $msg = join '', @_; |
| $self->send_status_line('EX_TIMEOUT', $msg); |
| warn "spamd: timeout: $msg\n"; |
| } |
| |
| =item C<send_status_line('EX_FOO', 'message')> |
| |
| EX_error constant defaults to C<EX_OK>. |
| Message defaults to the name of the constant. |
| |
| =cut |
| |
| sub send_status_line { |
| my $self = shift; |
| my ($resp, $msg) = @_; |
| $resp = defined $resp ? $resp : 'EX_OK'; |
| $msg = defined $msg ? $msg : $resp; |
| $self->send_buffer("SPAMD/$SPAMD_VER $resphash{$resp} $msg\r\n"); |
| } |
| |
| |
| =item C<send_response()> |
| |
| Generates response (headers and body, no status line) to the request and sends |
| it to the client. |
| |
| =cut |
| |
| sub send_response { |
| my $self = shift; |
| my $msg_resp = ''; |
| |
| if ( $self->{method} eq 'PROCESS' or $self->{method} eq 'HEADERS' ) { |
| $self->status->set_tag('REMOTEHOSTNAME', $self->_remote_host); |
| $self->status->set_tag('REMOTEHOSTADDR', $self->_remote_ip); |
| |
| # Build the message to send back and measure it |
| $msg_resp = $self->status->rewrite_mail; |
| |
| if ( $self->{method} eq 'HEADERS' ) { |
| # just the headers; delete everything after first \015\012\015\012 |
| $msg_resp =~ s/(\015?\012\015?\012).*$/$1/s; |
| } |
| |
| open my $dfh, '>', '/tmp/sadebug' or die $!; # XXX: devel debug |
| print $dfh $msg_resp; |
| close $dfh or die $!; |
| |
| # Spamc protocol 1.3 means multi hdrs are OK |
| $self->send_buffer($self->spamhdr) |
| if $self->{client_version} >= 1.3; |
| |
| # Spamc protocol 1.2 means it accepts content-length |
| # Earlier than 1.2 didn't accept content-length |
| $self->send_buffer('Content-length: ' . length($msg_resp) . "\r\n\r\n") |
| if $self->{client_version} >= 1.2; |
| } |
| elsif ($self->{method} eq 'TELL') { |
| my $response; |
| $response .= 'DidSet: ' . join(',', @{ $self->{did_set} }) . "\r\n" |
| if @{ $self->{did_set} }; |
| $response .= 'DidRemove: ' . join(',', @{ $self->{did_remove} }) . "\r\n" |
| if @{ $self->{did_remove} }; |
| $self->send_buffer($response, "Content-Length: 0\r\n", "\r\n"); |
| } |
| else { # $method eq 'CHECK' et al |
| if ($self->{method} eq 'CHECK') { |
| ## just headers |
| } |
| elsif ($self->{method} eq 'REPORT' |
| or $self->{method} eq 'REPORT_IFSPAM' && $self->status->is_spam) |
| { |
| $msg_resp = $self->status->get_report; |
| } |
| elsif ($self->{method} eq 'REPORT_IFSPAM') { |
| ## message is ham, $msg_resp remains empty |
| } |
| elsif ($self->{method} eq 'SYMBOLS') { |
| $msg_resp = $self->status->get_names_of_tests_hit; |
| $msg_resp .= "\r\n" if $self->{client_version} < 1.3; |
| } |
| else { # FIXME: this should *never* happen, yet it does... |
| die "spamd: unknown method '$self->{method}'"; |
| } |
| |
| # Spamc protocol 1.3 means multi hdrs are OK |
| $self->send_buffer('Content-length: ' . length($msg_resp) . "\r\n") |
| if $self->{client_version} >= 1.3; |
| $self->send_buffer($self->spamhdr, "\r\n"); |
| } |
| |
| $self->send_buffer($msg_resp); |
| |
| # any better place to do it? |
| $self->{scan_time} = time - $self->{start_time}; |
| } |
| |
| =item C<pass_through_sa()> |
| |
| Runs the actual tests. Wrap it with C<eval()> to implement timeout. |
| |
| =cut |
| |
| sub pass_through_sa { |
| my $self = shift; |
| |
| if ($self->{method} eq 'TELL') { |
| |
| # bleh, three copies of the message here... :-/ |
| # do it in read_body? |
| if ($self->{parsed}->get_header("X-Spam-Checker-Version")) { |
| my $new_mail = |
| $self->spamtest->parse( |
| $self->spamtest->remove_spamassassin_markup($self->{parsed}), 1); |
| $self->{parsed}->finish; |
| $self->{parsed} = $new_mail; |
| } |
| |
| my ($set_local, $set_remote, $remove_local, $remove_remote) = ( |
| $self->headers_in->{set} =~ /local/, |
| $self->headers_in->{set} =~ /remote/, |
| $self->headers_in->{remove} =~ /local/, |
| $self->headers_in->{remove} =~ /remote/, |
| ); |
| |
| if ($set_local) { |
| my $status = |
| $self->spamtest->learn($mail, undef, |
| ($self->headers_in->{message_class} eq 'spam' ? 1 : 0), 0); |
| push @{ $self->{did_set} }, 'local' if $status->did_learn; |
| $status->finish; |
| } |
| |
| if ($remove_local) { |
| my $status = $self->spamtest->learn($mail, undef, undef, 1); |
| push @{ $self->{did_remove} }, 'local' if $status->did_learn; |
| $status->finish; |
| } |
| |
| if ($set_remote) { |
| require Mail::SpamAssassin::Reporter; |
| my $msgrpt = |
| Mail::SpamAssassin::Reporter->new($self->spamtest, $self->{parsed}); |
| push @{ $self->{did_set} }, 'remote' if $msgrpt->report; |
| } |
| |
| if ($remove_remote) { |
| require Mail::SpamAssassin::Reporter; |
| my $msgrpt = |
| Mail::SpamAssassin::Reporter->new($self->spamtest, $self->{parsed}); |
| push @{ $self->{did_remove} }, 'remote' if $msgrpt->revoke; |
| } |
| } |
| else { # method other than TELL (or PING) |
| $self->{status} = $self->spamtest->check($self->{parsed}); |
| } |
| |
| undef; |
| } |
| |
| |
| =item C<spamhdr()> |
| |
| Generates the C<Spam: status ; score / threshold> response header. |
| |
| =cut |
| |
| sub spamhdr { |
| my $self = shift; |
| |
| my $msg_score = sprintf('%.1f', $self->status->get_score); |
| my $msg_threshold = sprintf('%.1f', $self->status->get_required_score); |
| |
| my $response_spam_status; |
| if ($self->status->is_spam) { |
| $response_spam_status = |
| $self->{method} eq 'REPORT_IFSPAM' ? 'Yes' : 'True'; |
| } |
| else { |
| $response_spam_status = |
| $self->{method} eq 'REPORT_IFSPAM' ? 'No' : 'False'; |
| } |
| |
| return "Spam: $response_spam_status ; $msg_score / $msg_threshold\r\n"; |
| } |
| |
| |
| |
| =item C<read_user_config()> |
| |
| Read config for the current user and register a cleanup handler to |
| restore state of the SA object later. This is a wrapper around the |
| handle_user_* methods. |
| |
| =cut |
| |
| # Yes, I could have made %mapping non-lexical, so one could add something |
| # there. But I don't think it would be the right way to provide this |
| # functionality; contact the dev list if you need it. |
| { |
| my %mapping = ( |
| 'local' => 'handle_user_local', |
| 'sql' => 'handle_user_sql', |
| 'ldap' => 'handle_user_ldap', |
| ); |
| |
| # This function should run only once per connection (reason: cleanup_register). |
| sub read_user_config { |
| my $self = shift; |
| return unless defined $self->headers_in->{user}; |
| for my $src ( |
| grep $self->can($_), |
| map { exists $mapping{$_} ? $mapping{$_} : $_ } |
| @{ $self->cfg->{sa_users} } |
| ) |
| { |
| my $ret = $self->$src($self->headers_in->{user}); |
| next unless $ret; |
| $self->cleanup_register(\&restore_config, $self->spamtest); |
| return $ret; |
| } |
| return 0; |
| } |
| } |
| |
| =item C<handle_user_sql('username')> |
| |
| load_scoreonly_sql for the given user. |
| Do not call this directly. |
| |
| =cut |
| |
| sub handle_user_sql { |
| my $self = shift; |
| my ($username) = @_; |
| $self->spamtest->load_scoreonly_sql($username) |
| or return 0; |
| $self->spamtest->signal_user_changed({ username => $username, user_dir => undef, }); |
| return 1; |
| } |
| |
| =item C<handle_user_ldap()> |
| |
| load_scoreonly_ldap for the given user. |
| Do not call this directly. |
| |
| =cut |
| |
| sub handle_user_ldap { |
| my $self = shift; |
| my ($username) = @_; |
| dbg("ldap: entering handle_user_ldap($username)"); |
| $self->spamtest->load_scoreonly_ldap($username) |
| or return 0; |
| $self->spamtest->signal_user_changed({ username => $username, user_dir => undef, }); |
| return 1; |
| } |
| |
| |
| =item C<status()> |
| |
| Returns the Mail::SpamAssassin::PerMsgStatus object. Only valid after |
| C<pass_through_sa()>. |
| |
| =item C<spamtest()> |
| |
| Returns the Mail::SpamAssassin object. |
| |
| =cut |
| |
| sub status { $_[0]->{status} } |
| sub spamtest { $_[0]->{spamtest} } |
| |
| =item C<access_info()> |
| |
| =cut |
| |
| sub access_info { info(@_) } |
| |
| =item C<user()> |
| |
| Returns username as supplied by client in the User header or string |
| '(unknown)'. Use for logging purposes. |
| |
| =cut |
| |
| # FIXME: tidy this one, might contain trash |
| sub user { |
| defined $_[0]->headers_in->{user} ? $_[0]->headers_in->{user} : '(unknown)'; |
| } |
| |
| =item C<cfg()> |
| |
| Returns Mail::SpamAssassin::Spamd::Config object (or hash reference with |
| resembling values). |
| |
| =cut |
| |
| sub cfg { $_[0]->{cfg} } |
| |
| =item C<headers_in()> |
| |
| Hash ref containing headers sent by the client. |
| |
| =cut |
| |
| sub headers_in { $_[0]->{headers_in} } |
| |
| =item C<cleanup_register(sub { ... }, $argument)> |
| |
| APR::Pool functionality -- call a piece of code when the object is |
| destroyed. |
| |
| =cut |
| |
| sub cleanup_register { |
| my $self = shift; |
| $self->{pool} ||= Mail::SpamAssassin::Pool->new; |
| $self->{pool}->cleanup_register(@_); |
| } |
| |
| |
| |
| |
| |
| =back |
| |
| The following methods must be overloaded: |
| |
| =over |
| |
| =item C<_remote_host()> |
| |
| =item C<_remote_ip()> |
| |
| =item C<_remote_port()> |
| |
| Information about the client. |
| |
| =item C<new( spamtest => $sa_object, foo => 'bar', ... )> |
| |
| Creates new object; C<shift && bless { @_ }>, basically. |
| |
| =item C<handle_user_local('username')> |
| |
| read_scoreonly_config for the given user. You might want to change uid, |
| chdir, set $ENV, etc. Do not call this directly. |
| |
| =item C<read_body()> |
| |
| Read body from the client, run $self->spamtest->parse and store result |
| as the C<parsed> key. |
| |
| =item C<read_headers()> |
| |
| Read method and headers from the client. Set various properties |
| accordingly. |
| |
| =item C<send_buffer('list of', 'buffers to send')> |
| |
| Send buffers to the client. |
| |
| =item C<auth_ident()> |
| |
| XXX |
| |
| =back |
| |
| =cut |
| |
| |
| |
| |
| # |
| # we need these two functions until SA has some sort of config namespace |
| # |
| |
| # called in Config/Apache2.pm |
| # (yuck, at least 500K wasted memory... for each interpreter) |
| sub backup_config { # -: a |
| my $spamtest = shift; |
| for my $key (qw(username user_dir userstate_dir learn_to_journal)) { |
| $msa_backup{$key} = $spamtest->{$key} if exists $spamtest->{$key}; |
| } |
| $spamtest->copy_config(undef, \%conf_backup) |
| || die "spamd: error returned from copy_config\n"; |
| } |
| |
| # this should be registered as $c->pool->cleanup_register if we add some user |
| # config; warning: if we'll ever support persistent connections, this should |
| # be done in the request pool (or behaviour defined in some other way) |
| sub restore_config { # -: a |
| my $spamtest = shift; |
| for my $key (keys %msa_backup) { |
| $spamtest->{$key} = $msa_backup{$key}; |
| } |
| $spamtest->copy_config(\%conf_backup, undef) |
| || die "spamd: error returned from copy_config\n"; |
| } |
| |
| |
| |
| |
| # simulate APR::Pool |
| package Mail::SpamAssassin::Pool; |
| { |
| local $@; |
| eval { require APR::Pool; }; |
| } |
| |
| sub new { |
| $INC{'APR/Pool.pm'} ? APR::Pool->new : bless [], shift; |
| } |
| |
| sub cleanup_register { |
| my $self = shift; |
| push @$self, [@_]; |
| } |
| |
| sub DESTROY { |
| my $self = shift; |
| for my $cleaner (@$self) { |
| (shift @$cleaner)->(@$cleaner); |
| } |
| } |
| |
| 1; |
| |
| # vim: ts=2 sw=2 et |