blob: 0170c8c216116477a0007349e216c31349c4bbca [file] [log] [blame]
#!/usr/bin/perl -w -i.bak
#
# uniq-mailbox mbox [...]
#
# removes duplicate mails found in any of the mailboxes, rewriting
# them as it goes. backups saved as mbox.bak if any modifications
# are made.
#
# support for .tar and .gz mboxes: none. unpack them first ;)
###########################################################################
use English;
my $lastargv = '';
my $found_dups = 0;
my $num_mails = 0;
my $skipmail = 0;
my %seen = ();
while (<>) {
if ($lastargv ne $ARGV) {
report();
$found_dups = 0;
$num_mails = 0;
$lastargv = $ARGV;
}
if (/^From /) {
$in_header = 1;
$skipmail = 0;
@lines = ();
}
if ($skipmail) { next; }
if ($in_header) {
push (@lines, $_);
if (/^Message-[iI][dD]: (.*)$/) {
if (exists $seen{$1}) {
$found_dups++;
@lines = ();
$skipmail = 1;
}
$seen{$1} = 1;
$num_mails++;
}
if (/^$/) {
if (!$skipmail) { print @lines; @lines = (); }
$in_header = 0;
}
} else {
print;
}
}
report();
exit;
sub report {
return unless ($lastargv =~ /\S/);
if ($found_dups) {
warn "found $found_dups duplicate mails: $lastargv ($num_mails messages)\n";
} else {
warn "no duplicate mails: $lastargv ($num_mails messages)\n";
unlink ("$lastargv.bak"); # not needed
}
}