| #!/usr/bin/perl -w |
| |
| # <@LICENSE> |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to you under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at: |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # </@LICENSE> |
| |
| use strict; |
| use warnings; |
| |
| sub mywarn; |
| |
| use Getopt::Long qw(:config bundling auto_help); |
| use Pod::Usage; |
| use vars qw($opt_noannotate $opt_h $opt_ids); |
| GetOptions("noannotate" => \$opt_noannotate, |
| "ids" => \$opt_ids, |
| "h|headers" => \$opt_h |
| ); |
| |
| =head1 NAME |
| |
| mboxget - Extract message(s) from an mbox (or other mail file) |
| |
| =head1 SYNOPSIS |
| |
| mboxget [options] <target> ... |
| |
| Options: |
| -h, --headers Display only message headers |
| --ids List only the IDs, do not generate an mbox |
| --noannotate Do not add X-Mass-Check-Id header with message path |
| |
| =head1 DESCRIPTION |
| |
| B<mboxget> extracts and displays the messages specified on the command |
| line and on STDIN. Messages are expected to be listed as |
| <path_to_file>.<offeset> or simply <path_to_file>. Multiple messages |
| can be specified, in which case, each message will be output. This |
| script is very useful in combination with mass-check logs and |
| mboxes. Targets given on STDIN may be in B<mass-check> format, and may |
| even be preceeded by a filename and a colon (like the output of |
| B<grep> would provide) |
| |
| Use the B<-h> option to just output headers. |
| |
| By default an X-Mass-Check-Id header is added to make it easier to |
| find a message. Use B<--noannotate> to disable this. |
| |
| =head1 EXAMPLES |
| |
| To show spam messages that hit the rule BAYES_99 |
| |
| grep BAYES_99 masses.log | mboxget |
| |
| To show the message indicated by "/path/to/my/mbox.1234" |
| |
| mboxget /path/to/my/mbox.1234 |
| |
| To find the Subject lines of all spam messages that hit SUBJ_AS_SEEN |
| |
| grep SUBJ_AS_SEEN spam.log | mboxget -h | grep ^Subject: |
| |
| To get the ids or paths of messages that hit SUBJ_AS_SEEN |
| |
| grep SUBJ_AS_SEEN spam.log | mboxget --ids |
| |
| =cut |
| |
| |
| |
| my $prog = $0; |
| $prog =~ s@.*/@@; |
| |
| my $annotate = ($opt_noannotate ? 0 : 1); |
| |
| foreach (@ARGV) { |
| handle_input($_); |
| } |
| |
| while (<>) { |
| s/^[^\s:]+://; # filenames, from "grep foo *" |
| |
| if (/^[Y\.]\s+-?\d+\s+(\S+)\s+\S+/) { |
| # mass-check format: . 0 /home/jm/Mail/Spam2/3475 FOO_BAR,BAZ |
| handle_input($1); |
| } |
| elsif (/^[Y\.]\s+-?\d+\s+(\S+)\s*$/) { |
| # mass-check format: . 0 /home/jm/Mail/Spam2/3475 |
| handle_input($1); |
| } |
| else { |
| next if /^#/; |
| chomp; |
| handle_input($_); |
| } |
| } |
| |
| exit; |
| |
| sub handle_input { |
| my $where = shift; |
| |
| if ($opt_ids) { |
| print "$where\n"; |
| return; |
| } |
| |
| my ($file, $offset); |
| |
| if (-f $where) { |
| ($file, $offset) = ($where); |
| } else { |
| ($file, $offset) = ($where =~ m/(.*?)(?:\.(\d+))?$/); |
| } |
| |
| if ($file =~ /\.gz$/) { |
| open (INPUT, "gunzip -cd $file |") or mywarn "gunzip $file failed: $!"; |
| } elsif ($file =~ /\.bz2$/) { |
| open (INPUT, "bzip2 -cd $file |") or mywarn "bunzip2 $file failed: $!"; |
| } else { |
| open (INPUT, "<$file") or mywarn "open $file failed: $!"; |
| } |
| |
| if ($offset) { |
| # TODO: steal open-file caching code from old revisions of |
| # mass-check-results-to-mbox |
| if (!seek(INPUT, $offset, 0)) { |
| mywarn "$prog: seek $offset failed: $!\n"; |
| close INPUT; |
| return; |
| } |
| } |
| |
| # read the message into @msg |
| my $past = 0; |
| my @msg = (); |
| while (<INPUT>) { |
| if ($past && defined($offset)) { |
| # only do this for mboxes |
| last if substr($_,0,5) eq "From "; |
| } |
| else { |
| $past = 1; |
| } |
| if ($opt_h) { |
| last if /^$/; |
| } |
| push (@msg, $_); |
| } |
| close INPUT; |
| |
| # now chop off the leading headers that may have come from a previous |
| # run, or will interfere with insertion of the X-Mass-Check-Id hdr |
| my $fromline = "From nobody\@nowhere Wed Jan 1 00:00:00 2000\n"; |
| while (scalar @msg > 0 && |
| $msg[0] =~ /^(?:From|X-Mass-Check-Id:) /) |
| { |
| if ($msg[0] =~ /^From /) { $fromline = $msg[0]; } |
| shift @msg; |
| } |
| |
| # and output |
| $annotate and unshift (@msg, "X-Mass-Check-Id: $where\n"); |
| print $fromline, @msg, "\n"; |
| } |
| |
| sub mywarn { |
| warn @_; |
| if ($annotate) { print "X-Mass-Check-Warning: ".join ('',@_)."\n"; } |
| } |
| |