| #!/usr/bin/perl |
| # |
| # uniq-maildirs [-m grepfile] mdir1 mdir2 ... |
| # |
| # Run through all maildirs specified on the command line, searching |
| # recursively (and through subdirs) reading each file for a Message-I[dD] |
| # header. Files that share the same Message-Id will be listed on stdout |
| # in sh command format: "rm -f FILENAME" so the output can be run |
| # as a script. |
| # |
| # If the -m arg is used, the file named will be parsed for Message-IDs |
| # in UNIX "grep" format: e.g. "filename:Message-Id: blah" |
| |
| use File::Find; |
| |
| $| = 1; |
| |
| if (defined $ARGV[0] && $ARGV[0] eq '-m') { |
| shift @ARGV; |
| my $msgidsfile = shift @ARGV; |
| %file = (); |
| $count = 0; |
| open (IN, "<$msgidsfile") or die "cannot read msgids from $msgidsfile"; |
| while (<IN>) { |
| /^([^:]+):Message-I.: (\S+)/ or next; |
| $f = $1; |
| $m = $2; $m =~ s/^<//; $m =~ s/>$//; |
| |
| $file{$m} = $f; |
| $count++; |
| |
| progress ($count, "m"); |
| } |
| warn "\nfound $count message-ids.\n"; |
| } |
| |
| $count = 0; |
| $dups = 0; |
| foreach my $dir (@ARGV) { |
| File::Find::find ( { wanted => \&wanted, no_chdir => 1 }, $dir); |
| } |
| warn "\nscanned $count mails, $dups dups.\n"; |
| |
| sub wanted { |
| local ($_); |
| $count++; |
| |
| open (IN, "<$File::Find::name") or warn "cannot read $File::Find::name"; |
| while (<IN>) { |
| /^$/ and last; |
| /^Message-I[dD]: (\S+)/ or next; |
| |
| my $m = $1; |
| $m =~ s/^<//; $m =~ s/>$//; |
| if (exists $file{$m}) { |
| print "\n# DUP: $File::Find::name dup of $file{$m}\n"; |
| print "rm -f $File::Find::name\n"; |
| $dups++; |
| progress ($count, "*"); |
| } else { |
| $file{$m} = $File::Find::name; |
| progress ($count, "."); |
| } |
| last; |
| } |
| close IN; |
| } |
| |
| sub progress { |
| my ($c, $sym) = @_; |
| |
| if (($c % 500) == 0) { print STDERR $sym; } |
| if (($c % (500*70)) == 0) { print STDERR "\n"; } |
| } |