| #!/usr/bin/perl |
| |
| use warnings; |
| use strict; |
| |
| # Compare hit rule differences for two logfiles from similar mass-check |
| # runs. Might be used after tweaking some code and checking if anything |
| # changed. Assumed that the exact same corpus is used for both runs. |
| |
| # $ ./logrulediff ham-hege.log.old ham-hege.log |
| # ham/0d055650bfee6a5a0a1b43944f73eb7bb7fa7d39 +NEW_RULE -DISAPPEARED_RULE |
| |
| die "Usage: $0 <logfile1> <logfile2>" |
| unless -f $ARGV[0] && -f $ARGV[1]; |
| |
| my %rules1; |
| open(IN, $ARGV[0]) or die; |
| while (<IN>) { |
| next if /^#/; |
| next unless /^[.Y]\s+-?\d+\s+(\S+)\s+(\S+)/; |
| $rules1{$1}{$_}++ foreach split(',', $2); |
| } |
| close IN; |
| |
| my %rules2; |
| open(IN, $ARGV[1]) or die; |
| while (<IN>) { |
| next if /^#/; |
| next unless /^[.Y]\s+-?\d+\s+(\S+)\s+(\S+)/; |
| $rules2{$1}{$_}++ foreach split(',', $2); |
| } |
| close IN; |
| |
| foreach my $f (sort keys %rules1) { |
| if (!defined $rules2{$f}) { |
| print STDERR "!!! $f not found in second logfile\n"; |
| next; |
| } |
| my @adds; |
| my @subs; |
| foreach my $rule (keys %{$rules1{$f}}) { |
| next if exists $rules2{$f}{$rule}; |
| push @subs, $rule; |
| } |
| foreach my $rule (keys %{$rules2{$f}}) { |
| next if exists $rules1{$f}{$rule}; |
| push @adds, $rule; |
| } |
| |
| if (@adds + @subs > 0) { |
| print "$f "; |
| print "+$_ " foreach (@adds); |
| print "-$_ " foreach (@subs); |
| print "\n"; |
| } |
| } |
| |
| foreach my $f (sort keys %rules2) { |
| if (!defined $rules1{$f}) { |
| print STDERR "!!! $f not found in first logfile\n"; |
| next; |
| } |
| } |