| #!/usr/bin/perl -w -i.bak |
| # |
| # uniq-mailbox mbox [...] |
| # |
| # removes duplicate mails found in any of the mailboxes, rewriting |
| # them as it goes. backups saved as mbox.bak if any modifications |
| # are made. |
| # |
| # support for .tar and .gz mboxes: none. unpack them first ;) |
| |
| ########################################################################### |
| |
| use English; |
| |
| my $lastargv = ''; |
| my $found_dups = 0; |
| my $num_mails = 0; |
| my $skipmail = 0; |
| |
| my %seen = (); |
| |
| while (<>) { |
| if ($lastargv ne $ARGV) { |
| report(); |
| $found_dups = 0; |
| $num_mails = 0; |
| $lastargv = $ARGV; |
| } |
| |
| if (/^From /) { |
| $in_header = 1; |
| $skipmail = 0; |
| @lines = (); |
| } |
| |
| if ($skipmail) { next; } |
| |
| if ($in_header) { |
| push (@lines, $_); |
| if (/^Message-[iI][dD]: (.*)$/) { |
| if (exists $seen{$1}) { |
| $found_dups++; |
| @lines = (); |
| $skipmail = 1; |
| } |
| |
| $seen{$1} = 1; |
| $num_mails++; |
| } |
| |
| if (/^$/) { |
| if (!$skipmail) { print @lines; @lines = (); } |
| $in_header = 0; |
| } |
| |
| } else { |
| print; |
| } |
| } |
| |
| report(); |
| exit; |
| |
| sub report { |
| return unless ($lastargv =~ /\S/); |
| if ($found_dups) { |
| warn "found $found_dups duplicate mails: $lastargv ($num_mails messages)\n"; |
| } else { |
| warn "no duplicate mails: $lastargv ($num_mails messages)\n"; |
| unlink ("$lastargv.bak"); # not needed |
| } |
| } |