blob: 897a336ca98376b254b663ac9cca1dbecd9cb795 [file] [log] [blame]
#!/usr/bin/perl -w
# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
use strict;
use warnings;
sub mywarn;
use Getopt::Long qw(:config bundling auto_help);
use Pod::Usage;
our ($opt_noannotate, $opt_h, $opt_ids);
GetOptions("noannotate" => \$opt_noannotate,
"ids" => \$opt_ids,
"h|headers" => \$opt_h
);
=head1 NAME
mboxget - Extract message(s) from an mbox (or other mail file)
=head1 SYNOPSIS
mboxget [options] <target> ...
Options:
-h, --headers Display only message headers
--ids List only the IDs, do not generate an mbox
--noannotate Do not add X-Mass-Check-Id header with message path
=head1 DESCRIPTION
B<mboxget> extracts and displays the messages specified on the command
line and on STDIN. Messages are expected to be listed as
<path_to_file>.<offeset> or simply <path_to_file>. Multiple messages
can be specified, in which case, each message will be output. This
script is very useful in combination with mass-check logs and
mboxes. Targets given on STDIN may be in B<mass-check> format, and may
even be preceeded by a filename and a colon (like the output of
B<grep> would provide)
Use the B<-h> option to just output headers.
By default an X-Mass-Check-Id header is added to make it easier to
find a message. Use B<--noannotate> to disable this.
=head1 EXAMPLES
To show spam messages that hit the rule BAYES_99
grep BAYES_99 masses.log | mboxget
To show the message indicated by "/path/to/my/mbox.1234"
mboxget /path/to/my/mbox.1234
To find the Subject lines of all spam messages that hit SUBJ_AS_SEEN
grep SUBJ_AS_SEEN spam.log | mboxget -h | grep ^Subject:
To get the ids or paths of messages that hit SUBJ_AS_SEEN
grep SUBJ_AS_SEEN spam.log | mboxget --ids
=cut
my $prog = $0;
$prog =~ s@.*/@@;
my $annotate = ($opt_noannotate ? 0 : 1);
foreach (@ARGV) {
handle_input($_);
}
while (<>) {
s/^[^\s:]+://; # filenames, from "grep foo *"
if (/^[Y\.]\s+-?\d+\s+(\S+)\s+\S+/) {
# mass-check format: . 0 /home/jm/Mail/Spam2/3475 FOO_BAR,BAZ
handle_input($1);
}
elsif (/^[Y\.]\s+-?\d+\s+(\S+)\s*$/) {
# mass-check format: . 0 /home/jm/Mail/Spam2/3475
handle_input($1);
}
else {
next if /^#/;
chomp;
handle_input($_);
}
}
exit;
sub handle_input {
my $where = shift;
if ($opt_ids) {
print "$where\n";
return;
}
my ($file, $offset);
if (-f $where) {
($file, $offset) = ($where);
} else {
($file, $offset) = ($where =~ m/(.*?)(?:\.(\d+))?$/);
}
if ($file =~ /\.gz$/) {
open (INPUT, "gunzip -cd $file |") or mywarn "gunzip $file failed: $!";
} elsif ($file =~ /\.bz2$/) {
open (INPUT, "bzip2 -cd $file |") or mywarn "bunzip2 $file failed: $!";
} else {
open (INPUT, "<$file") or mywarn "open $file failed: $!";
}
if ($offset) {
# TODO: steal open-file caching code from old revisions of
# mass-check-results-to-mbox
if (!seek(INPUT, $offset, 0)) {
mywarn "$prog: seek $offset failed: $!\n";
close INPUT;
return;
}
}
# read the message into @msg
my $past = 0;
my @msg = ();
while (<INPUT>) {
if ($past && defined($offset)) {
# only do this for mboxes
last if substr($_,0,5) eq "From ";
}
else {
$past = 1;
}
if ($opt_h) {
last if /^$/;
}
push (@msg, $_);
}
close INPUT;
# now chop off the leading headers that may have come from a previous
# run, or will interfere with insertion of the X-Mass-Check-Id hdr
my $fromline = "From nobody\@nowhere Wed Jan 1 00:00:00 2000\n";
while (scalar @msg > 0 &&
$msg[0] =~ /^(?:From|X-Mass-Check-Id:) /)
{
if ($msg[0] =~ /^From /) { $fromline = $msg[0]; }
shift @msg;
}
# and output
$annotate and unshift (@msg, "X-Mass-Check-Id: $where\n");
print $fromline, @msg, "\n";
}
sub mywarn {
warn @_;
if ($annotate) { print "X-Mass-Check-Warning: ".join ('',@_)."\n"; }
}