blob: 55807275b338fae641770c1ee3be74eca8348e6f [file] [log] [blame]
#!/usr/bin/perl
#
# uniq-maildirs [-m grepfile] mdir1 mdir2 ...
#
# Run through all maildirs specified on the command line, searching
# recursively (and through subdirs) reading each file for a Message-I[dD]
# header. Files that share the same Message-Id will be listed on stdout
# in sh command format: "rm -f FILENAME" so the output can be run
# as a script.
#
# If the -m arg is used, the file named will be parsed for Message-IDs
# in UNIX "grep" format: e.g. "filename:Message-Id: blah"
use File::Find;
$| = 1;
if (defined $ARGV[0] && $ARGV[0] eq '-m') {
shift @ARGV;
my $msgidsfile = shift @ARGV;
%file = ();
$count = 0;
open (IN, "<$msgidsfile") or die "cannot read msgids from $msgidsfile";
while (<IN>) {
/^([^:]+):Message-I.: (\S+)/ or next;
$f = $1;
$m = $2; $m =~ s/^<//; $m =~ s/>$//;
$file{$m} = $f;
$count++;
progress ($count, "m");
}
warn "\nfound $count message-ids.\n";
}
$count = 0;
$dups = 0;
foreach my $dir (@ARGV) {
File::Find::find ( { wanted => \&wanted, no_chdir => 1 }, $dir);
}
warn "\nscanned $count mails, $dups dups.\n";
sub wanted {
local ($_);
$count++;
open (IN, "<$File::Find::name") or warn "cannot read $File::Find::name";
while (<IN>) {
/^$/ and last;
/^Message-I[dD]: (\S+)/ or next;
my $m = $1;
$m =~ s/^<//; $m =~ s/>$//;
if (exists $file{$m}) {
print "\n# DUP: $File::Find::name dup of $file{$m}\n";
print "rm -f $File::Find::name\n";
$dups++;
progress ($count, "*");
} else {
$file{$m} = $File::Find::name;
progress ($count, ".");
}
last;
}
close IN;
}
sub progress {
my ($c, $sym) = @_;
if (($c % 500) == 0) { print STDERR $sym; }
if (($c % (500*70)) == 0) { print STDERR "\n"; }
}