spamd-apache2/lib/Mail/SpamAssassin/Spamd.pm - spamassassin - Git at Google

 package Mail::SpamAssassin::Spamd;

 our (%conf_backup, %msa_backup);

 use Mail::SpamAssassin::Logger;
 eval { use Time::HiRes qw(time); };

 our $SPAMD_VER = '1.4';    # TODO: support for `Compress: zlib'
 our %resphash = (
   EX_OK          => 0,     # no problems
   EX_USAGE       => 64,    # command line usage error
   EX_DATAERR     => 65,    # data format error
   EX_NOINPUT     => 66,    # cannot open input
   EX_NOUSER      => 67,    # addressee unknown
   EX_NOHOST      => 68,    # host name unknown
   EX_UNAVAILABLE => 69,    # service unavailable
   EX_SOFTWARE    => 70,    # internal software error
   EX_OSERR       => 71,    # system error (e.g., can't fork)
   EX_OSFILE      => 72,    # critical OS file missing
   EX_CANTCREAT   => 73,    # can't create (user) output file
   EX_IOERR       => 74,    # input/output error
   EX_TEMPFAIL    => 75,    # temp failure; user is invited to retry
   EX_PROTOCOL    => 76,    # remote error in protocol
   EX_NOPERM      => 77,    # permission denied
   EX_CONFIG      => 78,    # configuration error
   EX_TIMEOUT     => 79,    # read timeout
 );

 =head1 NAME

 Mail::SpamAssassin::Spamd

 =head1 SYNOPSIS

  use base qw(Mail::SpamAssassin::Spamd);
  sub ... { ... }
  ...

 =head1 DESCRIPTION

 This module contains a skeleton for handling client request in spamd
 implementation.  Must not be used directly, but subclassed.

 An instance should have lifetime of a single request.

 Interface is likely to change.

 See the source code of C<spamd(1)> and C<Mail::SpamAssassin::Spamd::Apache2(3)>.

 =head1 AUTHORS

 The SpamAssassin(tm) Project E<lt>https://spamassassin.apache.org/E<gt>

 =head1 COPYRIGHT

 SpamAssassin is distributed under the Apache License, Version 2.0, as
 described in the file C<LICENSE> included with the distribution.

 =head2 METHODS

 =over

 =item C<log_connection()>

 Call as soon as the connection is accepted.

 =cut

 sub log_connection {
   my ($self) = @_;
   info(sprintf "connection from %s [%s] at port %s\n",
     $self->_remote_host, $self->_remote_ip, $self->_remote_port);
 }

 =item C<log_start_work()>

 Call after C<parse_msgids()>.

 =cut

 sub log_start_work {
   my ($self) = @_;
   info(
     sprintf "%s message %s%s for %s:%d\n",
     ($self->{method} eq 'PROCESS' ? 'processing' : 'checking'),
     (defined $self->{msgid}  ? $self->{msgid}           : '(unknown)'),
     (defined $self->{rmsgid} ? 'aka ' . $self->{rmsgid} : ''),
     $self->user,
     $>,
   );
 }

 =item C<log_end_work()>

 Call after C<pass_through_sa()>.

 =cut

 sub log_end_work {
   my ($self) = @_;
   if ($self->{method} eq 'TELL') {
     my $info_str;
     $info_str .= 'Setting' . join ',', @{ $self->{did_set} }
       if @{ $self->{did_set} };
     $info_str .= 'Removing' . join ',', @{ $self->{did_remove} }
       if @{ $self->{did_remove} };
     info(
       sprintf 'spamd: Tell: %s for $current_user:%d in'
         . ' %.1f seconds, %d bytes',
       (defined $info_str ? $info_str : 'Did nothing'),
       $>,
       $self->{scan_time},
       $self->{actual_length},
     );
   }
   else {
     info(
       sprintf "%s (%.1f/%.1f) for %s:%d in %.1f seconds, %d bytes.\n",
       ($self->status->is_spam ? 'identified spam' : 'clean message'),
       $self->status->get_score,
       $self->status->get_required_score,
       $self->user,
       $>,
       $self->{scan_time},
       $self->{actual_length},
     );
   }
 }

 =item C<log_result()>

 Call as late as possible, after sending response to the client.

 =cut

 sub log_result {
   my ($self) = @_;
   my @extra = (
     'scantime=' . sprintf('%.1f', $_[0]->{scan_time}),
     'size=' . $self->{actual_length},
     'user=' . $self->user,
     'uid=' . $>,
     'required_score=' . $self->status->get_required_score,
     'rhost=' . $self->_remote_host,
     'raddr=' . $self->_remote_ip,
     'rport=' . $self->_remote_port,
   );
   {
     (my $safe = defined $self->{msgid} ? $self->{msgid} : '(unknown)') =~
       s/[\x00-\x20\s,]/_/gs;
     push @extra, "mid=$safe";
   }
   if ($self->{rmsgid}) {
     (my $safe = $self->{rmsgid}) =~ s/[\x00-\x20\s,]/_/gs;
     push @extra, "rmid=$safe";
   }
   push @extra, "bayes=" . $self->status->{bayes_score}
     if defined $self->status->{bayes_score};
   push @extra, "autolearn=" . $self->status->get_autolearn_status;
   my $yorn = $self->status->is_spam ? 'Y' : '.';
   my $tests = join ",", sort grep length, $self->status->get_names_of_tests_hit;
   access_info(sprintf "result: %s %2d - %s %s\n",
     $yorn, $self->status->get_score, $tests, join ',', @extra);
 }


 =item C<check_headers()>

 Sanity checks on headers sent by the client.
 Sends status line indicating error to the client and returns false on
 first problem found.

 =cut

 sub check_headers {
   my $self = shift;

   if ($self->cfg->{auth_ident}) {
     unless (exists $self->headers_in->{user}) {
       $self->service_unavailable_error('User header required');
       return 0;
     }
     $self->auth_ident($self->headers_in->{user})
       or return 0;
   }

   my $content_length = $self->headers_in->{content_length};
   if (defined $content_length) {    # sanity check
     if ( $content_length !~ /^\d{1,15}$/
       || $content_length == 0)
     {
       $self->protocol_error('Content-Length too ugly');
       return 0;
     }
     elsif ($self->cfg->{msg_size_limit}
       && $content_length > $self->cfg->{msg_size_limit})
     {
       $self->service_unavailable_error('Content-Length exceeds limit');
       return 0;
     }
   }


   if ($self->cfg->{allow_tell} && $self->{method} eq 'TELL') {
     my ($set_local, $set_remote, $remove_local, $remove_remote) = (
       $self->headers_in->{set}    =~ /local/,
       $self->headers_in->{set}    =~ /remote/,
       $self->headers_in->{remove} =~ /local/,
       $self->headers_in->{remove} =~ /remote/,
     );

     if ($set_local && $remove_local) {
       $self->protocol_error(
         "Unable to set local and remove local in the same operation.");
       return 0;
     }

     if ($set_remote && $remove_remote) {
       $self->protocol_error(
         "Unable to set remote and remove remote in the same operation.");
       return 0;
     }
   }

   if ($self->headers_in->{compress} && length $self->headers_in->{compress}) {
     $self->protocol_error('Compress not supported yet');  # TODO
     return 0;
   }

   1;
 }


 =item C<parse_msgids()>

 Extract the Message-Id(s) for logging purposes.

 =cut

 sub parse_msgids {
   my $self = shift;

   # Extract the Message-Id(s) for logging purposes.
   $self->{msgid}  = $self->{parsed}->get_pristine_header("Message-Id");
   $self->{rmsgid} = $self->{parsed}->get_pristine_header("Resent-Message-Id");

   foreach my $id (grep $self->{$_}, qw(msgid rmsgid)) {
     1 while $self->{$id} =~ s/\([^\(\)]*\)//;    # remove comments and
     $self->{$id} =~ s/^\s+|\s+$//g;          # leading and trailing spaces
     $self->{$id} =~ s/\s+/ /g;               # collapse whitespaces
     $self->{$id} =~ s/^.*?<(.*?)>.*$/$1/;    # keep only the id itself
     $self->{$id} =~ s/[^\x21-\x7e]/?/g;      # replace all weird chars
     $self->{$id} =~ s/[<>]/?/g;              # plus all dangling angle brackets
     $self->{$id} =~ s/^(.+)$/<$1>/;          # re-bracket the id (if not empty)
   }
 }


 =item C<service_unavailable_error('error message')>

 =item C<protocol_error('error message')>

 =item C<service_timeout('error message')>

 Send appropriate status line to the client and log the error.

 =cut

 sub service_unavailable_error {
   my $self = shift;
   my $msg = join '', @_;
   $self->send_status_line('EX_UNAVAILABLE', $msg);
   warn "spamd: service unavailable: $msg\n";
 }

 sub protocol_error {
   my $self = shift;
   my $msg = join '', @_;
   $self->send_status_line('EX_PROTOCOL', $msg);
   warn "spamd: bad protocol: header error: $msg\n";
 }

 sub service_timeout {
   my $self = shift;
   my $msg = join '', @_;
   $self->send_status_line('EX_TIMEOUT', $msg);
   warn "spamd: timeout: $msg\n";
 }

 =item C<send_status_line('EX_FOO', 'message')>

 EX_error constant defaults to C<EX_OK>.
 Message defaults to the name of the constant.

 =cut

 sub send_status_line {
   my $self = shift;
   my ($resp, $msg) = @_;
   $resp = defined $resp ? $resp : 'EX_OK';
   $msg  = defined $msg  ? $msg  : $resp;
   $self->send_buffer("SPAMD/$SPAMD_VER $resphash{$resp} $msg\r\n");
 }


 =item C<send_response()>

 Generates response (headers and body, no status line) to the request and sends
 it to the client.

 =cut

 sub send_response {
   my $self = shift;
   my $msg_resp = '';

   if ( $self->{method} eq 'PROCESS' or $self->{method} eq 'HEADERS' ) {
     $self->status->set_tag('REMOTEHOSTNAME', $self->_remote_host);
     $self->status->set_tag('REMOTEHOSTADDR', $self->_remote_ip);

     # Build the message to send back and measure it
     $msg_resp = $self->status->rewrite_mail;

     if ( $self->{method} eq 'HEADERS' ) {
       # just the headers; delete everything after first \015\012\015\012
       $msg_resp =~ s/(\015?\012\015?\012).*$/$1/s;
     }

     open my $dfh, '>', '/tmp/sadebug' or die $!;  # XXX: devel debug
     print $dfh $msg_resp;
     close $dfh or die $!;

     # Spamc protocol 1.3 means multi hdrs are OK
     $self->send_buffer($self->spamhdr)
       if $self->{client_version} >= 1.3;

     # Spamc protocol 1.2 means it accepts content-length
     # Earlier than 1.2 didn't accept content-length
     $self->send_buffer('Content-length: ' . length($msg_resp) . "\r\n\r\n")
       if $self->{client_version} >= 1.2;
   }
   elsif ($self->{method} eq 'TELL') {
     my $response;
     $response .= 'DidSet: ' . join(',', @{ $self->{did_set} }) . "\r\n"
       if @{ $self->{did_set} };
     $response .= 'DidRemove: ' . join(',', @{ $self->{did_remove} }) . "\r\n"
       if @{ $self->{did_remove} };
     $self->send_buffer($response, "Content-Length: 0\r\n", "\r\n");
   }
   else {                   # $method eq 'CHECK' et al
     if ($self->{method} eq 'CHECK') {
       ## just headers
     }
     elsif ($self->{method} eq 'REPORT'
       or $self->{method} eq 'REPORT_IFSPAM' && $self->status->is_spam)
     {
       $msg_resp = $self->status->get_report;
     }
     elsif ($self->{method} eq 'REPORT_IFSPAM') {
       ## message is ham, $msg_resp remains empty
     }
     elsif ($self->{method} eq 'SYMBOLS') {
       $msg_resp = $self->status->get_names_of_tests_hit;
       $msg_resp .= "\r\n" if $self->{client_version} < 1.3;
     }
     else {    # FIXME: this should *never* happen, yet it does...
       die "spamd: unknown method '$self->{method}'";
     }

     # Spamc protocol 1.3 means multi hdrs are OK
     $self->send_buffer('Content-length: ' . length($msg_resp) . "\r\n")
       if $self->{client_version} >= 1.3;
     $self->send_buffer($self->spamhdr, "\r\n");
   }

   $self->send_buffer($msg_resp);

   # any better place to do it?
   $self->{scan_time} = time - $self->{start_time};
 }

 =item C<pass_through_sa()>

 Runs the actual tests.  Wrap it with C<eval()> to implement timeout.

 =cut

 sub pass_through_sa {
   my $self = shift;

   if ($self->{method} eq 'TELL') {

     # bleh, three copies of the message here... :-/
     # do it in read_body?
     if ($self->{parsed}->get_header("X-Spam-Checker-Version")) {
       my $new_mail =
         $self->spamtest->parse(
         $self->spamtest->remove_spamassassin_markup($self->{parsed}), 1);
       $self->{parsed}->finish;
       $self->{parsed} = $new_mail;
     }

     my ($set_local, $set_remote, $remove_local, $remove_remote) = (
       $self->headers_in->{set}    =~ /local/,
       $self->headers_in->{set}    =~ /remote/,
       $self->headers_in->{remove} =~ /local/,
       $self->headers_in->{remove} =~ /remote/,
     );

     if ($set_local) {
       my $status =
         $self->spamtest->learn($mail, undef,
         ($self->headers_in->{message_class} eq 'spam' ? 1 : 0), 0);
       push @{ $self->{did_set} }, 'local' if $status->did_learn;
       $status->finish;
     }

     if ($remove_local) {
       my $status = $self->spamtest->learn($mail, undef, undef, 1);
       push @{ $self->{did_remove} }, 'local' if $status->did_learn;
       $status->finish;
     }

     if ($set_remote) {
       require Mail::SpamAssassin::Reporter;
       my $msgrpt =
         Mail::SpamAssassin::Reporter->new($self->spamtest, $self->{parsed});
       push @{ $self->{did_set} }, 'remote' if $msgrpt->report;
     }

     if ($remove_remote) {
       require Mail::SpamAssassin::Reporter;
       my $msgrpt =
         Mail::SpamAssassin::Reporter->new($self->spamtest, $self->{parsed});
       push @{ $self->{did_remove} }, 'remote' if $msgrpt->revoke;
     }
   }
   else {  # method other than TELL (or PING)
     $self->{status} = $self->spamtest->check($self->{parsed});
   }

   undef;
 }


 =item C<spamhdr()>

 Generates the C<Spam: status ; score / threshold> response header.

 =cut

 sub spamhdr {
   my $self = shift;

   my $msg_score     = sprintf('%.1f', $self->status->get_score);
   my $msg_threshold = sprintf('%.1f', $self->status->get_required_score);

   my $response_spam_status;
   if ($self->status->is_spam) {
     $response_spam_status =
       $self->{method} eq 'REPORT_IFSPAM' ? 'Yes' : 'True';
   }
   else {
     $response_spam_status =
       $self->{method} eq 'REPORT_IFSPAM' ? 'No' : 'False';
   }

   return "Spam: $response_spam_status ; $msg_score / $msg_threshold\r\n";
 }


 =item C<read_user_config()>

 Read config for the current user and register a cleanup handler to
 restore state of the SA object later.  This is a wrapper around the
 handle_user_* methods.

 =cut

 # Yes, I could have made %mapping non-lexical, so one could add something
 # there.  But I don't think it would be the right way to provide this
 # functionality; contact the dev list if you need it.
 {
   my %mapping = (
     'local' => 'handle_user_local',
     'sql'   => 'handle_user_sql',
     'ldap'  => 'handle_user_ldap',
   );

   # This function should run only once per connection (reason: cleanup_register).
   sub read_user_config {
     my $self = shift;
     return unless defined $self->headers_in->{user};
     for my $src (
       grep $self->can($_),
       map { exists $mapping{$_} ? $mapping{$_} : $_ }
       @{ $self->cfg->{sa_users} }
       )
     {
       my $ret = $self->$src($self->headers_in->{user});
       next unless $ret;
       $self->cleanup_register(\&restore_config, $self->spamtest);
       return $ret;
     }
     return 0;
   }
 }

 =item C<handle_user_sql('username')>

 load_scoreonly_sql for the given user.
 Do not call this directly.

 =cut

 sub handle_user_sql {
   my $self = shift;
   my ($username) = @_;
   $self->spamtest->load_scoreonly_sql($username)
     or return 0;
   $self->spamtest->signal_user_changed({ username => $username, user_dir => undef, });
   return 1;
 }

 =item C<handle_user_ldap()>

 load_scoreonly_ldap for the given user.
 Do not call this directly.

 =cut

 sub handle_user_ldap {
   my $self = shift;
   my ($username) = @_;
   dbg("ldap: entering handle_user_ldap($username)");
   $self->spamtest->load_scoreonly_ldap($username)
     or return 0;
   $self->spamtest->signal_user_changed({ username => $username, user_dir => undef, });
   return 1;
 }


 =item C<status()>

 Returns the Mail::SpamAssassin::PerMsgStatus object.  Only valid after
 C<pass_through_sa()>.

 =item C<spamtest()>

 Returns the Mail::SpamAssassin object.

 =cut

 sub status   { $_[0]->{status} }
 sub spamtest { $_[0]->{spamtest} }

 =item C<access_info()>

 =cut

 sub access_info { info(@_) }

 =item C<user()>

 Returns username as supplied by client in the User header or string
 '(unknown)'.  Use for logging purposes.

 =cut

 # FIXME: tidy this one, might contain trash
 sub user {
   defined $_[0]->headers_in->{user} ? $_[0]->headers_in->{user} : '(unknown)';
 }

 =item C<cfg()>

 Returns Mail::SpamAssassin::Spamd::Config object (or hash reference with
 resembling values).

 =cut

 sub cfg { $_[0]->{cfg} }

 =item C<headers_in()>

 Hash ref containing headers sent by the client.

 =cut

 sub headers_in { $_[0]->{headers_in} }

 =item C<cleanup_register(sub { ... }, $argument)>

 APR::Pool functionality -- call a piece of code when the object is
 destroyed.

 =cut

 sub cleanup_register {
   my $self = shift;
   $self->{pool} ||= Mail::SpamAssassin::Pool->new;
   $self->{pool}->cleanup_register(@_);
 }


 =back

 The following methods must be overloaded:

 =over

 =item C<_remote_host()>

 =item C<_remote_ip()>

 =item C<_remote_port()>

 Information about the client.

 =item C<new( spamtest => $sa_object, foo => 'bar', ... )>

 Creates new object; C<shift && bless { @_ }>, basically.

 =item C<handle_user_local('username')>

 read_scoreonly_config for the given user.  You might want to change uid,
 chdir, set $ENV, etc.  Do not call this directly.

 =item C<read_body()>

 Read body from the client, run $self->spamtest->parse and store result
 as the C<parsed> key.

 =item C<read_headers()>

 Read method and headers from the client.  Set various properties
 accordingly.

 =item C<send_buffer('list of', 'buffers to send')>

 Send buffers to the client.

 =item C<auth_ident()>

 XXX

 =back

 =cut


 #
 # we need these two functions until SA has some sort of config namespace
 #

 # called in Config/Apache2.pm
 # (yuck, at least 500K wasted memory... for each interpreter)
 sub backup_config { # -: a
   my $spamtest = shift;
   for my $key (qw(username user_dir userstate_dir learn_to_journal)) {
     $msa_backup{$key} = $spamtest->{$key} if exists $spamtest->{$key};
   }
   $spamtest->copy_config(undef, \%conf_backup)
     || die "spamd: error returned from copy_config\n";
 }

 # this should be registered as $c->pool->cleanup_register if we add some user
 # config;  warning: if we'll ever support persistent connections, this should
 # be done in the request pool (or behaviour defined in some other way)
 sub restore_config { # -: a
   my $spamtest = shift;
   for my $key (keys %msa_backup) {
     $spamtest->{$key} = $msa_backup{$key};
   }
   $spamtest->copy_config(\%conf_backup, undef)
     || die "spamd: error returned from copy_config\n";
 }


 # simulate APR::Pool
 package Mail::SpamAssassin::Pool;
 {
   local $@;
   eval { require APR::Pool; };
 }

 sub new {
   $INC{'APR/Pool.pm'} ? APR::Pool->new : bless [], shift;
 }

 sub cleanup_register {
   my $self = shift;
   push @$self, [@_];
 }

 sub DESTROY {
   my $self = shift;
   for my $cleaner (@$self) {
     (shift @$cleaner)->(@$cleaner);
   }
 }

 1;

 # vim: ts=2 sw=2 et
	package Mail::SpamAssassin::Spamd;

	our (%conf_backup, %msa_backup);

	use Mail::SpamAssassin::Logger;
	eval { use Time::HiRes qw(time); };

	our $SPAMD_VER = '1.4'; # TODO: support for `Compress: zlib'
	our %resphash = (
	EX_OK => 0, # no problems
	EX_USAGE => 64, # command line usage error
	EX_DATAERR => 65, # data format error
	EX_NOINPUT => 66, # cannot open input
	EX_NOUSER => 67, # addressee unknown
	EX_NOHOST => 68, # host name unknown
	EX_UNAVAILABLE => 69, # service unavailable
	EX_SOFTWARE => 70, # internal software error
	EX_OSERR => 71, # system error (e.g., can't fork)
	EX_OSFILE => 72, # critical OS file missing
	EX_CANTCREAT => 73, # can't create (user) output file
	EX_IOERR => 74, # input/output error
	EX_TEMPFAIL => 75, # temp failure; user is invited to retry
	EX_PROTOCOL => 76, # remote error in protocol
	EX_NOPERM => 77, # permission denied
	EX_CONFIG => 78, # configuration error
	EX_TIMEOUT => 79, # read timeout
	);

	=head1 NAME

	Mail::SpamAssassin::Spamd

	=head1 SYNOPSIS

	use base qw(Mail::SpamAssassin::Spamd);
	sub ... { ... }
	...

	=head1 DESCRIPTION

	This module contains a skeleton for handling client request in spamd
	implementation. Must not be used directly, but subclassed.

	An instance should have lifetime of a single request.

	Interface is likely to change.

	See the source code of C<spamd(1)> and C<Mail::SpamAssassin::Spamd::Apache2(3)>.

	=head1 AUTHORS

	The SpamAssassin(tm) Project E<lt>https://spamassassin.apache.org/E<gt>

	=head1 COPYRIGHT

	SpamAssassin is distributed under the Apache License, Version 2.0, as
	described in the file C<LICENSE> included with the distribution.

	=head2 METHODS

	=over

	=item C<log_connection()>

	Call as soon as the connection is accepted.

	=cut

	sub log_connection {
	my ($self) = @_;
	info(sprintf "connection from %s [%s] at port %s\n",
	$self->_remote_host, $self->_remote_ip, $self->_remote_port);
	}

	=item C<log_start_work()>

	Call after C<parse_msgids()>.

	=cut

	sub log_start_work {
	my ($self) = @_;
	info(
	sprintf "%s message %s%s for %s:%d\n",
	($self->{method} eq 'PROCESS' ? 'processing' : 'checking'),
	(defined $self->{msgid} ? $self->{msgid} : '(unknown)'),
	(defined $self->{rmsgid} ? 'aka ' . $self->{rmsgid} : ''),
	$self->user,
	$>,
	);
	}

	=item C<log_end_work()>

	Call after C<pass_through_sa()>.

	=cut

	sub log_end_work {
	my ($self) = @_;
	if ($self->{method} eq 'TELL') {
	my $info_str;
	$info_str .= 'Setting' . join ',', @{ $self->{did_set} }
	if @{ $self->{did_set} };
	$info_str .= 'Removing' . join ',', @{ $self->{did_remove} }
	if @{ $self->{did_remove} };
	info(
	sprintf 'spamd: Tell: %s for $current_user:%d in'
	. ' %.1f seconds, %d bytes',
	(defined $info_str ? $info_str : 'Did nothing'),
	$>,
	$self->{scan_time},
	$self->{actual_length},
	);
	}
	else {
	info(
	sprintf "%s (%.1f/%.1f) for %s:%d in %.1f seconds, %d bytes.\n",
	($self->status->is_spam ? 'identified spam' : 'clean message'),
	$self->status->get_score,
	$self->status->get_required_score,
	$self->user,
	$>,
	$self->{scan_time},
	$self->{actual_length},
	);
	}
	}

	=item C<log_result()>

	Call as late as possible, after sending response to the client.

	=cut

	sub log_result {
	my ($self) = @_;
	my @extra = (
	'scantime=' . sprintf('%.1f', $_[0]->{scan_time}),
	'size=' . $self->{actual_length},
	'user=' . $self->user,
	'uid=' . $>,
	'required_score=' . $self->status->get_required_score,
	'rhost=' . $self->_remote_host,
	'raddr=' . $self->_remote_ip,
	'rport=' . $self->_remote_port,
	);
	{
	(my $safe = defined $self->{msgid} ? $self->{msgid} : '(unknown)') =~
	s/[\x00-\x20\s,]/_/gs;
	push @extra, "mid=$safe";
	}
	if ($self->{rmsgid}) {
	(my $safe = $self->{rmsgid}) =~ s/[\x00-\x20\s,]/_/gs;
	push @extra, "rmid=$safe";
	}
	push @extra, "bayes=" . $self->status->{bayes_score}
	if defined $self->status->{bayes_score};
	push @extra, "autolearn=" . $self->status->get_autolearn_status;
	my $yorn = $self->status->is_spam ? 'Y' : '.';
	my $tests = join ",", sort grep length, $self->status->get_names_of_tests_hit;
	access_info(sprintf "result: %s %2d - %s %s\n",
	$yorn, $self->status->get_score, $tests, join ',', @extra);
	}


	=item C<check_headers()>

	Sanity checks on headers sent by the client.
	Sends status line indicating error to the client and returns false on
	first problem found.

	=cut

	sub check_headers {
	my $self = shift;

	if ($self->cfg->{auth_ident}) {
	unless (exists $self->headers_in->{user}) {
	$self->service_unavailable_error('User header required');
	return 0;
	}
	$self->auth_ident($self->headers_in->{user})
	or return 0;
	}

	my $content_length = $self->headers_in->{content_length};
	if (defined $content_length) { # sanity check
	if ( $content_length !~ /^\d{1,15}$/
	\|\| $content_length == 0)
	{
	$self->protocol_error('Content-Length too ugly');
	return 0;
	}
	elsif ($self->cfg->{msg_size_limit}
	&& $content_length > $self->cfg->{msg_size_limit})
	{
	$self->service_unavailable_error('Content-Length exceeds limit');
	return 0;
	}
	}


	if ($self->cfg->{allow_tell} && $self->{method} eq 'TELL') {
	my ($set_local, $set_remote, $remove_local, $remove_remote) = (
	$self->headers_in->{set} =~ /local/,
	$self->headers_in->{set} =~ /remote/,
	$self->headers_in->{remove} =~ /local/,
	$self->headers_in->{remove} =~ /remote/,
	);

	if ($set_local && $remove_local) {
	$self->protocol_error(
	"Unable to set local and remove local in the same operation.");
	return 0;
	}

	if ($set_remote && $remove_remote) {
	$self->protocol_error(
	"Unable to set remote and remove remote in the same operation.");
	return 0;
	}
	}

	if ($self->headers_in->{compress} && length $self->headers_in->{compress}) {
	$self->protocol_error('Compress not supported yet'); # TODO
	return 0;
	}

	1;
	}


	=item C<parse_msgids()>

	Extract the Message-Id(s) for logging purposes.

	=cut

	sub parse_msgids {
	my $self = shift;

	# Extract the Message-Id(s) for logging purposes.
	$self->{msgid} = $self->{parsed}->get_pristine_header("Message-Id");
	$self->{rmsgid} = $self->{parsed}->get_pristine_header("Resent-Message-Id");

	foreach my $id (grep $self->{$_}, qw(msgid rmsgid)) {
	1 while $self->{$id} =~ s/\([^\(\)]*\)//; # remove comments and
	$self->{$id} =~ s/^\s+\|\s+$//g; # leading and trailing spaces
	$self->{$id} =~ s/\s+/ /g; # collapse whitespaces
	$self->{$id} =~ s/^.?<(.?)>.*$/$1/; # keep only the id itself
	$self->{$id} =~ s/[^\x21-\x7e]/?/g; # replace all weird chars
	$self->{$id} =~ s/[<>]/?/g; # plus all dangling angle brackets
	$self->{$id} =~ s/^(.+)$/<$1>/; # re-bracket the id (if not empty)
	}
	}


	=item C<service_unavailable_error('error message')>

	=item C<protocol_error('error message')>

	=item C<service_timeout('error message')>

	Send appropriate status line to the client and log the error.

	=cut

	sub service_unavailable_error {
	my $self = shift;
	my $msg = join '', @_;
	$self->send_status_line('EX_UNAVAILABLE', $msg);
	warn "spamd: service unavailable: $msg\n";
	}

	sub protocol_error {
	my $self = shift;
	my $msg = join '', @_;
	$self->send_status_line('EX_PROTOCOL', $msg);
	warn "spamd: bad protocol: header error: $msg\n";
	}

	sub service_timeout {
	my $self = shift;
	my $msg = join '', @_;
	$self->send_status_line('EX_TIMEOUT', $msg);
	warn "spamd: timeout: $msg\n";
	}

	=item C<send_status_line('EX_FOO', 'message')>

	EX_error constant defaults to C<EX_OK>.
	Message defaults to the name of the constant.

	=cut

	sub send_status_line {
	my $self = shift;
	my ($resp, $msg) = @_;
	$resp = defined $resp ? $resp : 'EX_OK';
	$msg = defined $msg ? $msg : $resp;
	$self->send_buffer("SPAMD/$SPAMD_VER $resphash{$resp} $msg\r\n");
	}


	=item C<send_response()>

	Generates response (headers and body, no status line) to the request and sends
	it to the client.

	=cut

	sub send_response {
	my $self = shift;
	my $msg_resp = '';

	if ( $self->{method} eq 'PROCESS' or $self->{method} eq 'HEADERS' ) {
	$self->status->set_tag('REMOTEHOSTNAME', $self->_remote_host);
	$self->status->set_tag('REMOTEHOSTADDR', $self->_remote_ip);

	# Build the message to send back and measure it
	$msg_resp = $self->status->rewrite_mail;

	if ( $self->{method} eq 'HEADERS' ) {
	# just the headers; delete everything after first \015\012\015\012
	$msg_resp =~ s/(\015?\012\015?\012).*$/$1/s;
	}

	open my $dfh, '>', '/tmp/sadebug' or die $!; # XXX: devel debug
	print $dfh $msg_resp;
	close $dfh or die $!;

	# Spamc protocol 1.3 means multi hdrs are OK
	$self->send_buffer($self->spamhdr)
	if $self->{client_version} >= 1.3;

	# Spamc protocol 1.2 means it accepts content-length
	# Earlier than 1.2 didn't accept content-length
	$self->send_buffer('Content-length: ' . length($msg_resp) . "\r\n\r\n")
	if $self->{client_version} >= 1.2;
	}
	elsif ($self->{method} eq 'TELL') {
	my $response;
	$response .= 'DidSet: ' . join(',', @{ $self->{did_set} }) . "\r\n"
	if @{ $self->{did_set} };
	$response .= 'DidRemove: ' . join(',', @{ $self->{did_remove} }) . "\r\n"
	if @{ $self->{did_remove} };
	$self->send_buffer($response, "Content-Length: 0\r\n", "\r\n");
	}
	else { # $method eq 'CHECK' et al
	if ($self->{method} eq 'CHECK') {
	## just headers
	}
	elsif ($self->{method} eq 'REPORT'
	or $self->{method} eq 'REPORT_IFSPAM' && $self->status->is_spam)
	{
	$msg_resp = $self->status->get_report;
	}
	elsif ($self->{method} eq 'REPORT_IFSPAM') {
	## message is ham, $msg_resp remains empty
	}
	elsif ($self->{method} eq 'SYMBOLS') {
	$msg_resp = $self->status->get_names_of_tests_hit;
	$msg_resp .= "\r\n" if $self->{client_version} < 1.3;
	}
	else { # FIXME: this should never happen, yet it does...
	die "spamd: unknown method '$self->{method}'";
	}

	# Spamc protocol 1.3 means multi hdrs are OK
	$self->send_buffer('Content-length: ' . length($msg_resp) . "\r\n")
	if $self->{client_version} >= 1.3;
	$self->send_buffer($self->spamhdr, "\r\n");
	}

	$self->send_buffer($msg_resp);

	# any better place to do it?
	$self->{scan_time} = time - $self->{start_time};
	}

	=item C<pass_through_sa()>

	Runs the actual tests. Wrap it with C<eval()> to implement timeout.

	=cut

	sub pass_through_sa {
	my $self = shift;

	if ($self->{method} eq 'TELL') {

	# bleh, three copies of the message here... :-/
	# do it in read_body?
	if ($self->{parsed}->get_header("X-Spam-Checker-Version")) {
	my $new_mail =
	$self->spamtest->parse(
	$self->spamtest->remove_spamassassin_markup($self->{parsed}), 1);
	$self->{parsed}->finish;
	$self->{parsed} = $new_mail;
	}

	my ($set_local, $set_remote, $remove_local, $remove_remote) = (
	$self->headers_in->{set} =~ /local/,
	$self->headers_in->{set} =~ /remote/,
	$self->headers_in->{remove} =~ /local/,
	$self->headers_in->{remove} =~ /remote/,
	);

	if ($set_local) {
	my $status =
	$self->spamtest->learn($mail, undef,
	($self->headers_in->{message_class} eq 'spam' ? 1 : 0), 0);
	push @{ $self->{did_set} }, 'local' if $status->did_learn;
	$status->finish;
	}

	if ($remove_local) {
	my $status = $self->spamtest->learn($mail, undef, undef, 1);
	push @{ $self->{did_remove} }, 'local' if $status->did_learn;
	$status->finish;
	}

	if ($set_remote) {
	require Mail::SpamAssassin::Reporter;
	my $msgrpt =
	Mail::SpamAssassin::Reporter->new($self->spamtest, $self->{parsed});
	push @{ $self->{did_set} }, 'remote' if $msgrpt->report;
	}

	if ($remove_remote) {
	require Mail::SpamAssassin::Reporter;
	my $msgrpt =
	Mail::SpamAssassin::Reporter->new($self->spamtest, $self->{parsed});
	push @{ $self->{did_remove} }, 'remote' if $msgrpt->revoke;
	}
	}
	else { # method other than TELL (or PING)
	$self->{status} = $self->spamtest->check($self->{parsed});
	}

	undef;
	}


	=item C<spamhdr()>

	Generates the C<Spam: status ; score / threshold> response header.

	=cut

	sub spamhdr {
	my $self = shift;

	my $msg_score = sprintf('%.1f', $self->status->get_score);
	my $msg_threshold = sprintf('%.1f', $self->status->get_required_score);

	my $response_spam_status;
	if ($self->status->is_spam) {
	$response_spam_status =
	$self->{method} eq 'REPORT_IFSPAM' ? 'Yes' : 'True';
	}
	else {
	$response_spam_status =
	$self->{method} eq 'REPORT_IFSPAM' ? 'No' : 'False';
	}

	return "Spam: $response_spam_status ; $msg_score / $msg_threshold\r\n";
	}



	=item C<read_user_config()>

	Read config for the current user and register a cleanup handler to
	restore state of the SA object later. This is a wrapper around the
	handle_user_* methods.

	=cut

	# Yes, I could have made %mapping non-lexical, so one could add something
	# there. But I don't think it would be the right way to provide this
	# functionality; contact the dev list if you need it.
	{
	my %mapping = (
	'local' => 'handle_user_local',
	'sql' => 'handle_user_sql',
	'ldap' => 'handle_user_ldap',
	);

	# This function should run only once per connection (reason: cleanup_register).
	sub read_user_config {
	my $self = shift;
	return unless defined $self->headers_in->{user};
	for my $src (
	grep $self->can($_),
	map { exists $mapping{$_} ? $mapping{$_} : $_ }
	@{ $self->cfg->{sa_users} }
	)
	{
	my $ret = $self->$src($self->headers_in->{user});
	next unless $ret;
	$self->cleanup_register(\&restore_config, $self->spamtest);
	return $ret;
	}
	return 0;
	}
	}

	=item C<handle_user_sql('username')>

	load_scoreonly_sql for the given user.
	Do not call this directly.

	=cut

	sub handle_user_sql {
	my $self = shift;
	my ($username) = @_;
	$self->spamtest->load_scoreonly_sql($username)
	or return 0;
	$self->spamtest->signal_user_changed({ username => $username, user_dir => undef, });
	return 1;
	}

	=item C<handle_user_ldap()>

	load_scoreonly_ldap for the given user.
	Do not call this directly.

	=cut

	sub handle_user_ldap {
	my $self = shift;
	my ($username) = @_;
	dbg("ldap: entering handle_user_ldap($username)");
	$self->spamtest->load_scoreonly_ldap($username)
	or return 0;
	$self->spamtest->signal_user_changed({ username => $username, user_dir => undef, });
	return 1;
	}


	=item C<status()>

	Returns the Mail::SpamAssassin::PerMsgStatus object. Only valid after
	C<pass_through_sa()>.

	=item C<spamtest()>

	Returns the Mail::SpamAssassin object.

	=cut

	sub status { $_[0]->{status} }
	sub spamtest { $_[0]->{spamtest} }

	=item C<access_info()>

	=cut

	sub access_info { info(@_) }

	=item C<user()>

	Returns username as supplied by client in the User header or string
	'(unknown)'. Use for logging purposes.

	=cut

	# FIXME: tidy this one, might contain trash
	sub user {
	defined $_[0]->headers_in->{user} ? $_[0]->headers_in->{user} : '(unknown)';
	}

	=item C<cfg()>

	Returns Mail::SpamAssassin::Spamd::Config object (or hash reference with
	resembling values).

	=cut

	sub cfg { $_[0]->{cfg} }

	=item C<headers_in()>

	Hash ref containing headers sent by the client.

	=cut

	sub headers_in { $_[0]->{headers_in} }

	=item C<cleanup_register(sub { ... }, $argument)>

	APR::Pool functionality -- call a piece of code when the object is
	destroyed.

	=cut

	sub cleanup_register {
	my $self = shift;
	$self->{pool} \|\|= Mail::SpamAssassin::Pool->new;
	$self->{pool}->cleanup_register(@_);
	}





	=back

	The following methods must be overloaded:

	=over

	=item C<_remote_host()>

	=item C<_remote_ip()>

	=item C<_remote_port()>

	Information about the client.

	=item C<new( spamtest => $sa_object, foo => 'bar', ... )>

	Creates new object; C<shift && bless { @_ }>, basically.

	=item C<handle_user_local('username')>

	read_scoreonly_config for the given user. You might want to change uid,
	chdir, set $ENV, etc. Do not call this directly.

	=item C<read_body()>

	Read body from the client, run $self->spamtest->parse and store result
	as the C<parsed> key.

	=item C<read_headers()>

	Read method and headers from the client. Set various properties
	accordingly.

	=item C<send_buffer('list of', 'buffers to send')>

	Send buffers to the client.

	=item C<auth_ident()>

	XXX

	=back

	=cut




	#
	# we need these two functions until SA has some sort of config namespace
	#

	# called in Config/Apache2.pm
	# (yuck, at least 500K wasted memory... for each interpreter)
	sub backup_config { # -: a
	my $spamtest = shift;
	for my $key (qw(username user_dir userstate_dir learn_to_journal)) {
	$msa_backup{$key} = $spamtest->{$key} if exists $spamtest->{$key};
	}
	$spamtest->copy_config(undef, \%conf_backup)
	\|\| die "spamd: error returned from copy_config\n";
	}

	# this should be registered as $c->pool->cleanup_register if we add some user
	# config; warning: if we'll ever support persistent connections, this should
	# be done in the request pool (or behaviour defined in some other way)
	sub restore_config { # -: a
	my $spamtest = shift;
	for my $key (keys %msa_backup) {
	$spamtest->{$key} = $msa_backup{$key};
	}
	$spamtest->copy_config(\%conf_backup, undef)
	\|\| die "spamd: error returned from copy_config\n";
	}




	# simulate APR::Pool
	package Mail::SpamAssassin::Pool;
	{
	local $@;
	eval { require APR::Pool; };
	}

	sub new {
	$INC{'APR/Pool.pm'} ? APR::Pool->new : bless [], shift;
	}

	sub cleanup_register {
	my $self = shift;
	push @$self, [@_];
	}

	sub DESTROY {
	my $self = shift;
	for my $cleaner (@$self) {
	(shift @$cleaner)->(@$cleaner);
	}
	}

	1;

	# vim: ts=2 sw=2 et