| #!/usr/bin/perl -w |
| |
| # settings are located in $HOME/.corpus |
| |
| use strict; |
| use Getopt::Long; |
| |
| our ( $opt_override, $opt_tag ); |
| GetOptions( |
| "tag=s" => \$opt_tag, |
| "override=s" => \$opt_override, |
| ); |
| |
| $opt_override ||= ''; |
| $opt_tag ||= 'n'; # nightly is the default |
| |
| use File::Path; |
| use File::Copy; |
| use Time::ParseDate; |
| use Cwd qw(abs_path); |
| use POSIX qw(nice strftime); |
| |
| use constant WEEK => 7*60*60*24; |
| nice(15); |
| |
| # daterevs -- e.g. "20060429/r239832-r" -- are aligned to 0800 UTC, just before |
| # the time of day when the mass-check tagging occurs; see |
| # http://wiki.apache.org/spamassassin/DateRev for more details. |
| use constant DATEREV_ADJ => - (8 * 60 * 60); |
| |
| # what's the max age of mail we will accept data from? (in weeks) |
| # TODO: maybe this should be in ~/.corpus |
| my $OLDEST_HAM_WEEKS = 72 * 4; # 72 months = 6 years |
| my $OLDEST_SPAM_WEEKS = 2 * 4; # 2 months |
| |
| # --------------------------------------------------------------------------- |
| |
| sub runcmd; |
| my $configuration = "$ENV{HOME}/.corpus"; |
| my %cf; |
| my %revision = (); |
| my %filesize = (); |
| my %dateline = (); |
| my %mtime = (); |
| my %logs_by_daterev = (); |
| my %is_net_daterev = (); |
| my %time = (); |
| my @tmps = (); |
| my $time_start = time; |
| my $output_revpath; |
| my $perl_path = $^X; |
| |
| configure(); |
| init(); |
| |
| my $logsdir = "$cf{html}/logs"; |
| print "reading logs from '$logsdir'\n"; |
| |
| locate_input(); |
| generate_logs(); |
| clean_up(); |
| exit; |
| |
| # --------------------------------------------------------------------------- |
| |
| sub configure { |
| # does rough equivalent of source |
| open(C, $configuration) || die "open failed: $configuration: $!\n"; |
| my $pwd = Cwd::getcwd; |
| |
| # add 'override' options |
| my @lines = (<C>, split(/\|/, $opt_override)); |
| |
| foreach (@lines) { |
| chomp; |
| s/#.*//; |
| if (/^\s*(.*?)\s*=\s*(.*?)\s*$/) { |
| my ($key, $val) = ($1, $2); |
| $val =~ s/\$PWD/$pwd/gs; |
| $cf{$key} = $val; |
| } |
| } |
| close(C); |
| |
| $cf{output_classes} ||= |
| "DETAILS.new DETAILS.all DETAILS.age NET.new NET.all NET.age"; |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub clean_up { |
| chdir "/"; |
| runcmd "rm -rf $cf{tmp}/*.$$ ".join(' ', @tmps); |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub init { |
| $SIG{INT} = \&clean_up; |
| $SIG{TERM} = \&clean_up; |
| |
| $ENV{RSYNC_PASSWORD} = $cf{password}; |
| $ENV{TIME} = '%e,%U,%S'; |
| $ENV{TZ} = 'UTC'; |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub locate_input { |
| opendir(CORPUS, $logsdir); |
| my @files = sort readdir(CORPUS); |
| closedir(CORPUS); |
| |
| @files = grep { |
| /^(?:spam|ham)-(?:net-)?\S+\.log$/ && -f "$logsdir/$_" && -M _ < 10 |
| } @files; |
| |
| foreach my $file (@files) { |
| my $tag = 0; |
| my $headers = ''; |
| |
| open(FILE, "$logsdir/$file") or warn "cannot read $logsdir/$file"; |
| while (my $line = <FILE>) { |
| last if $line !~ /^#/; |
| $headers .= $line; |
| if ($line =~ /, on (... ... .. )(..)(:..:.. ... ....)/) { |
| my ($datepre, $hh, $datepost) = ($1,$2,$3); |
| |
| my $timet = Time::ParseDate::parsedate($datepre.$hh.$datepost, |
| GMT => 1, PREFER_PAST => 1); |
| |
| $time{$file} = $timet; |
| } |
| elsif ($line =~ m/^# Date:\s*(\S+)/) { |
| # a better way to do the above. TODO: parse it instead |
| $dateline{$file} = $1; |
| if (!defined $time{$file}) { |
| # if time line unparseable (localized?) use this instead |
| my ($yyyy, $mm, $dd, $h, $m, $s) = $dateline{$file} =~ /(\d\d\d\d)(\d\d)(\d\d)T(\d\d)(\d\d)(\d\d)Z/; |
| |
| my $timet = Time::ParseDate::parsedate("${yyyy}/${mm}/${dd} ${h}:${m}:${s} GMT+0", |
| GMT => 1, PREFER_PAST => 1); |
| |
| $time{$file} = $timet; |
| } |
| } |
| elsif ($line =~ m/^# SVN revision:\s*(\S+)/) { |
| $revision{$file} = $1; |
| } |
| } |
| close(FILE); |
| |
| my @s = stat("$logsdir/$file"); |
| $filesize{$file} = $s[7]; |
| $mtime{$file} = $s[9]; |
| |
| if (!defined $time{$file}) { |
| warn "$logsdir/$file: no time found, ignored\n"; next; |
| } |
| if (!defined $revision{$file}) { |
| warn "$logsdir/$file: no revision found, ignored\n"; next; |
| } |
| if ($revision{$file} eq 'unknown') { |
| warn "$logsdir/$file: not tagged with a revision, ignored\n"; next; |
| } |
| |
| my $daterev = mk_daterev($time{$file},$revision{$file},$opt_tag); |
| |
| $logs_by_daterev{$daterev} ||= [ ]; |
| push (@{$logs_by_daterev{$daterev}}, $file); |
| |
| if ($file =~ /-net-/) { |
| $is_net_daterev{$daterev} = 1; |
| print "$logsdir/$file: rev=$daterev time=$time{$file} (set 1)\n"; |
| } |
| else { |
| print "$logsdir/$file: rev=$daterev time=$time{$file} (set 0)\n"; |
| } |
| |
| get_rulemetadata_for_revision($daterev, $revision{$file}); |
| } |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub sort_all { |
| my ($a1, $a2) = ($a =~ m/(\(.*?\)|\S+)(?::(\S+))?$/); |
| my ($b1, $b2) = ($b =~ m/(\(.*?\)|\S+)(?::(\S+))?$/); |
| $a1 =~ s/^[\+\-]//; |
| $b1 =~ s/^[\+\-]//; |
| |
| my $n = ($a1 cmp $b1) || (($a2 || '') cmp ($b2 || '')); |
| if ($a1 =~ /^OVERALL/) { $n -= 1000; } |
| elsif ($a1 =~ /^\(all messages\)/) { $n -= 100; } |
| elsif ($a1 =~ /^\(all messages as \%\)/) { $n -= 10; } |
| if ($b1 =~ /^OVERALL/) { $n += 1000; } |
| elsif ($b1 =~ /^\(all messages\)/) { $n += 100; } |
| elsif ($b1 =~ /^\(all messages as \%\)/) { $n += 10; } |
| return $n; |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub time_filter_fileset { |
| my ($fileary, $outname, $after, $before) = @_; |
| |
| my $timet_before = (defined $before ? |
| ($time_start - ($before * WEEK)) : $time_start+1); |
| my $timet_after = (defined $after ? |
| ($time_start - ($after * WEEK)) : 0); |
| |
| open(TMP, ">$outname") or warn "cannot write $outname"; |
| for my $file (@{$fileary}) { |
| open(IN, $file) or warn "cannot read $file"; |
| while (<IN>) { |
| next unless /\btime=(\d+)/; |
| next if ($1 < $timet_after || $1 > $timet_before); |
| print TMP; |
| } |
| close IN; |
| } |
| close TMP or warn "failed to close $outname"; |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub generate_logs { |
| foreach my $entry (split(' ', $cf{output_classes})) { |
| $entry =~ /^(\S+)\.(\S+)$/; |
| my $class = $1; |
| my $rtype = $2; |
| if (!$rtype) { warn "no rtype in $entry"; next; } |
| if ($class eq 'HTML') { warn "class HTML in $entry obsolete, ignored"; next; } |
| |
| foreach my $daterev (reverse sort keys %logs_by_daterev) { |
| my $rev; |
| if ($daterev !~ /\/r(\d+)/) { |
| warn "bad daterev: $daterev"; next; |
| } |
| $rev = $1; |
| |
| if ($class eq "NET") { |
| next unless $is_net_daterev{$daterev}; |
| } |
| |
| gen_class ($daterev, $rev, $class, $rtype); |
| } |
| } |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| my ($tmp_h, $tmp_s, $no_messages_in_freqs, $hf_flags); |
| |
| sub gen_class { |
| my ($daterev, $rev, $class, $rtype) = @_; |
| return if ($class eq "NET" && $rtype !~ /^(?:new|all|age|7day)$/); |
| |
| chdir $logsdir; |
| print STDERR "\ngenerating: $cf{html}/$daterev/$class.$rtype\n"; |
| |
| my @ham = grep { /^ham/ } @{$logs_by_daterev{$daterev}}; |
| print STDERR "input h: " . join(' ', @ham) . "\n"; |
| |
| my @spam = grep { /^spam/ } @{$logs_by_daterev{$daterev}}; |
| print STDERR "input s: " . join(' ', @spam) . "\n"; |
| |
| # net vs. local |
| if ($class eq "NET") { |
| @ham = grep { /-net-/ } @ham; |
| @spam = grep { /-net-/ } @spam; |
| } |
| # age |
| if ($rtype =~ /(\d+)day/) { |
| my $mtime = $1; |
| @ham = grep { -M $_ < $mtime } @ham; |
| @spam = grep { -M $_ < $mtime } @spam; |
| } |
| |
| print STDERR "selected h: " . join(' ', @ham) . "\n"; |
| print STDERR "selected s: " . join(' ', @spam) . "\n"; |
| |
| # we cannot continue if we have no files that match the criteria... |
| # demand at least 1 ham and 1 spam file |
| if (scalar @spam <= 0 || scalar @ham <= 0) { |
| warn "not enough files found matching criteria ($daterev $class $rtype)\n"; |
| return; |
| } |
| |
| my $dir = create_outputdir($daterev); |
| my $fname = "$dir/$class.$rtype"; |
| |
| # now, if the target file already exists, check to see if it's newer |
| # than all the sources, make-style; if not, don't re-create it |
| if (-f $fname) { |
| my $targetfreshness = (-M $fname); |
| my $needsrebuild = 0; |
| |
| foreach my $srcfile (@spam, @ham) { |
| my $srcfreshness = (-M $srcfile); |
| if ($targetfreshness > $srcfreshness) { # src is fresher |
| print "need rebuild, $fname is older than $srcfile: $targetfreshness > $srcfreshness\n"; |
| $needsrebuild = 1; |
| last; |
| } |
| } |
| |
| if (!$needsrebuild) { |
| print "existing: $fname, fresher than sources\n"; |
| return; |
| } |
| } |
| |
| my $when = scalar localtime time; |
| print qq{creating: $fname ($class) |
| started $when... |
| }; |
| my $bytes = 0; |
| |
| my $tmpfname = "$fname.$$"; |
| if ($class eq 'LOGS') { |
| $bytes = gen_report_logs($fname, \@ham, \@spam); |
| } |
| elsif ($class eq 'CORPUS') { |
| push (@tmps, abs_path($tmpfname)); |
| |
| my $cmd = "$perl_path $cf{tree}/masses/logs-to-corpus-report ". |
| join(" ", @ham)." ".join(" ", @spam)." > $tmpfname"; |
| runcmd $cmd; |
| ($? >> 8 == 0) or warn "failed to run logs-to-corpus-report"; |
| |
| rename($tmpfname, $fname) or warn "cannot rename $tmpfname to $fname"; |
| $bytes = (-s $fname); |
| } |
| else { |
| push (@tmps, abs_path($tmpfname)); |
| |
| open(OUT, "> $tmpfname") or warn "cannot write to $tmpfname"; |
| print OUT "# ham results used for $daterev $class $rtype: " . join(" ", @ham) . "\n"; |
| print OUT "# spam results used for $daterev $class $rtype: " . join(" ", @spam) . "\n"; |
| print OUT "# ".log_metadata_xml($daterev, @ham, @spam)."\n"; |
| |
| $hf_flags = ""; |
| $hf_flags = "-t net -s 1" if $class eq "NET"; |
| $hf_flags = "-o" if $class eq "OVERLAP"; |
| $hf_flags = "-S" if $class eq "SCOREMAP"; |
| |
| if ($cf{rules_dir}) { |
| $hf_flags .= " -c '$cf{rules_dir}'"; |
| } |
| |
| # are we analyzing --net mass-check logs? if so, use scoreset 1 |
| if (join(" ", @ham) =~ /-net-/) { |
| $hf_flags .= " -s 1" if $class eq "NET"; |
| } |
| |
| # catch an odd error condition, where hit-frequencies creates output |
| # with no log lines included at all |
| $no_messages_in_freqs = 0; |
| |
| $tmp_h = "$cf{tmp}/ham.log.$$"; |
| $tmp_s = "$cf{tmp}/spam.log.$$"; |
| |
| if ($rtype eq "all") { |
| gen_report_freqs_all($tmpfname, \@ham, \@spam, $rev); |
| } |
| elsif ($rtype eq "age") { |
| gen_report_freqs_age($tmpfname, \@ham, \@spam, $rev); |
| } |
| elsif (@ham && @spam) { |
| gen_report_freqs_basic($tmpfname, \@ham, \@spam, $rev); |
| } |
| |
| $bytes = (-s OUT); |
| close(OUT); |
| |
| unlink $tmp_h, $tmp_s; |
| |
| if ($no_messages_in_freqs) { |
| warn "ERROR: no data in freqs! aborting, leaving tmp file as $tmpfname"; |
| return; |
| } |
| |
| rename($tmpfname, $fname) or warn "cannot rename $tmpfname to $fname"; |
| |
| # compress for certain classes |
| if ($class eq "OVERLAP") { |
| $fname =~ s/'//gs; |
| runcmd ("rm '$fname.gz'; pigz '$fname'"); |
| # takes care of keeping the original around so we don't have to |
| if ($? >> 8 != 0) { warn "pigz '$fname' failed"; } |
| } |
| } |
| |
| $when = scalar localtime time; |
| print qq{created: $bytes bytes, finished at $when |
| URL: |
| |
| $cf{ruleqa_url}$output_revpath |
| |
| }; |
| |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub mk_daterev { |
| my ($timet, $rev, $tag) = @_; |
| return strftime("%Y%m%d", gmtime($timet + DATEREV_ADJ)) . "/r$rev-$tag"; |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub create_outputdir { |
| my ($revpath) = @_; |
| my $dir = $cf{html} .'/'. $revpath; |
| |
| # print "output dir: $dir\n"; |
| if (!-d $dir) { |
| my $prevu = umask 0; |
| mkpath([$dir], 0, oct($cf{html_mode})) or warn "failed to mkdir $dir"; |
| umask $prevu; |
| } |
| |
| $output_revpath = $revpath; # set the global |
| $output_revpath =~ s/\//-/; # looks nicer |
| |
| return $dir; |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub log_metadata_xml { |
| my ($daterev, @files) = @_; |
| my $str = ''; |
| |
| # this is extracted into the info.xml file later by the gen_info_xml script |
| foreach my $f (@files) { |
| $str .= qq{ |
| <mclogmd file='$f'> |
| <daterev>$daterev</daterev> |
| <rev>$revision{$f}</rev> |
| <fsize>$filesize{$f}</fsize> |
| <mcstartdate>$dateline{$f}</mcstartdate> |
| <mtime>$mtime{$f}</mtime> |
| </mclogmd> |
| }; |
| } |
| |
| $str =~ s/\s+/ /gs; # on a single line please |
| return '<mclogmds>'.$str.'</mclogmds>'; |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub create_rulemetadata_dir { |
| my $rev = shift; |
| my $dir = "$cf{html}/rulemetadata/$rev"; |
| if (!-d $dir) { |
| my $prevu = umask 0; |
| mkpath([$dir], 0, oct($cf{html_mode})) or warn "failed to mkdir $dir"; |
| umask $prevu; |
| } |
| return $dir; |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub get_rulemetadata_for_revision { |
| my ($daterev, $rev) = @_; |
| |
| my $dir = create_rulemetadata_dir($rev); |
| |
| # argh. this is silly; ~bbmass/.corpus specifies "$PWD" in its |
| # "tree" path, so we have to ensure we're in the 'masses' dir |
| # for this to work! |
| chdir "$cf{tree}/masses" or die "cannot chdir $cf{tree}/masses"; |
| |
| my $cmd = "$cf{tree}/masses/rule-qa/get-rulemetadata-for-revision ". |
| "--rev=$rev --outputdir='$dir'"; |
| |
| runcmd($cmd); |
| if ($? >> 8 != 0) { |
| warn "'$cmd' failed"; |
| } |
| |
| chdir $logsdir; |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub start_hit_frequencies_at_rev { |
| my ($rev, $args) = @_; |
| |
| $rev ||= 'HEAD'; |
| (-d "$cf{tmp}/hfdir") or runcmd("mkdir -p $cf{tmp}/hfdir"); |
| my $hfdir = "$cf{tmp}/hfdir/r$rev"; |
| my $expected_svn_file = "$hfdir/Makefile.PL"; |
| |
| print "setting up hit-frequencies for r$rev in $hfdir\n"; |
| |
| my $needs_checkout = 0; |
| if (-d $hfdir && chdir $hfdir) { |
| eval { |
| # "svn up" has been observed to wedge on the ruleqa zone VM, put a timeout so we can recover |
| local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required |
| alarm 60*60; # an hour should be generous enough |
| runcmd("svn up -r$rev"); |
| alarm 0; |
| }; |
| |
| if ($@ || $?>>8 != 0 || !-f $expected_svn_file) { |
| print "simple 'svn update' failed. performing full checkout instead...\n"; |
| $needs_checkout = 1; |
| } |
| } else { |
| $needs_checkout = 1; |
| } |
| |
| if ($needs_checkout) { |
| my $svnurl = get_svn_url(); |
| runcmd("rm -rf $hfdir"); |
| eval { |
| local $SIG{ALRM} = sub { die "alarm\n" }; # NB: \n required |
| alarm 60*60; # an hour should be generous enough |
| runcmd("svn co $svnurl\@$rev $hfdir"); |
| alarm 0; |
| }; |
| if ($@ || $?>>8 != 0 || !-f $expected_svn_file) { die "svn co failed"; } |
| } |
| chdir "$hfdir" or die "cannot chdir $hfdir"; |
| |
| # ensure these are rebuilt |
| runcmd "rm -f rules/70_sandbox.cf rules/72_active.cf"; |
| |
| # do this twice in case Makefile.PL is rebuilt |
| runcmd "( make build_rules || $perl_path Makefile.PL;make build_rules )</dev/null"; |
| |
| chdir "$hfdir/masses" or die "cannot chdir $hfdir/masses"; |
| open (FREQS, "$perl_path hit-frequencies -TxpagP $args |") |
| or die "cannot run ./hit-frequencies $args |"; |
| |
| chdir $logsdir; |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub get_svn_url { |
| open (SVNINFO, "svn info $cf{tree}|") or die "cannot run svn info"; |
| my $svnurl; |
| while (<SVNINFO>) { |
| /URL: (.*)$/ and $svnurl = $1; |
| } |
| close SVNINFO or die "cannot close svn info"; |
| return $svnurl; |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub gen_report_logs { |
| my ($fname, $hamref, $spamref) = @_; |
| |
| my $bytes = 0; |
| foreach my $f (@$hamref, @$spamref) { |
| $f =~ s/[^-\._A-Za-z0-9]+/_/gs; # sanitize! |
| my $zf = "$fname-$f.gz"; |
| |
| runcmd("pigz -c < $f > $zf.$$"); |
| if ($? >> 8 != 0) { |
| warn "pigz -c < $f > $zf.$$ failed"; |
| } |
| |
| rename("$zf.$$", $zf) or |
| warn "cannot rename $zf.$$ to $zf"; |
| $bytes += (-s $zf); |
| } |
| |
| # this is just so we won't recompress these logs if re-run |
| open TOUCH, ">$fname" or warn "cannot write to $fname"; |
| close TOUCH; |
| |
| return $bytes; |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub gen_report_freqs_all { |
| my ($tmpfname, $hamref, $spamref, $rev) = @_; |
| my %spam; |
| my %ham; |
| my @output; |
| |
| for my $file (@$spamref) { |
| my $u = extract_username_from_log_filename($file); |
| $spam{$u} = $file; print "username in spam log: $u\n"; |
| } |
| for my $file (@$hamref) { |
| my $u = extract_username_from_log_filename($file); |
| $ham{$u} = $file; print "username in ham log: $u\n"; |
| } |
| |
| if (scalar keys %spam <= 0 && scalar keys %ham <= 0) { |
| warn "no files found"; |
| return; |
| } |
| |
| my $tmp_h_all = "$cf{tmp}/hamall.log.$$"; |
| my $tmp_s_all = "$cf{tmp}/spamall.log.$$"; |
| unlink $tmp_h_all, $tmp_s_all; |
| |
| my %alluserkeys; |
| for my $k (keys %spam, keys %ham) { |
| next if exists $alluserkeys{$k}; undef $alluserkeys{$k}; |
| } |
| for my $user (sort keys %alluserkeys) { |
| my $files_h = []; |
| if ($ham{$user}) { $files_h = [ "$logsdir/$ham{$user}" ]; } |
| my $files_s = []; |
| if ($spam{$user}) { $files_s = [ "$logsdir/$spam{$user}" ]; } |
| |
| time_filter_fileset($files_h, $tmp_h, $OLDEST_HAM_WEEKS, undef); |
| time_filter_fileset($files_s, $tmp_s, $OLDEST_SPAM_WEEKS, undef); |
| |
| start_hit_frequencies_at_rev($rev, "$hf_flags $tmp_s $tmp_h"); |
| while(<FREQS>) { |
| chomp; push @output, "$_:$user\n"; |
| } |
| close(FREQS); |
| |
| runcmd("cat $tmp_h >> $tmp_h_all"); |
| runcmd("cat $tmp_s >> $tmp_s_all"); |
| } |
| |
| if (-z $tmp_h_all && -z $tmp_s_all) { |
| warn "time_filter_fileset() returned empty logs. not creating freqs!"; |
| unlink $tmp_h_all, $tmp_s_all; |
| return; # we'll try again later |
| } |
| |
| start_hit_frequencies_at_rev($rev, "$hf_flags $tmp_s_all $tmp_h_all"); |
| while(<FREQS>) { |
| /\s0\s+0\s+0.500\s+0.00\s+0.00\s+\(all messages\)/ and $no_messages_in_freqs = 1; |
| push @output, $_; |
| } |
| close(FREQS); |
| for (sort sort_all @output) { print OUT; } |
| |
| unlink $tmp_h_all, $tmp_s_all; |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub extract_username_from_log_filename { |
| my $u = shift; |
| # spam-someuser.log, spam-net-bb-jm.20090518-r775863-n.log |
| $u =~ s/\.log$//; $u =~ s/.*\///; $u =~ s/^(h|sp)am-(?:net-)?//; |
| $u =~ s/\.\d{8}-r\d+-[a-z]//; # daterev |
| return $u; |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub gen_report_freqs_age { |
| my ($tmpfname, $hamref, $spamref, $rev) = @_; |
| my @output; |
| |
| for my $which (("0-1", "1-2", "2-3", "3-6")) { |
| my ($before, $after) = split(/-/, $which); |
| time_filter_fileset($hamref, $tmp_h, $after, $before); |
| time_filter_fileset($spamref, $tmp_s, $after, $before); |
| |
| # print out by age |
| start_hit_frequencies_at_rev($rev, "$hf_flags $tmp_s $tmp_h"); |
| while(<FREQS>) { |
| chomp; push @output, "$_:$which\n"; |
| } |
| close(FREQS); |
| } |
| for (sort sort_all @output) { print OUT; } |
| } |
| |
| # --------------------------------------------------------------------------- |
| |
| sub gen_report_freqs_basic { |
| my ($tmpfname, $hamref, $spamref, $rev) = @_; |
| |
| time_filter_fileset($hamref, $tmp_h, $OLDEST_HAM_WEEKS, undef); |
| time_filter_fileset($spamref, $tmp_s, $OLDEST_SPAM_WEEKS, undef); |
| |
| if (-z $tmp_h && -z $tmp_s) { |
| warn "time_filter_fileset() returned empty logs. not creating freqs!"; |
| return; # we'll try again later |
| } |
| |
| start_hit_frequencies_at_rev($rev, "$hf_flags $tmp_s $tmp_h"); |
| while(<FREQS>) { |
| /\s0\s+0\s+0.500\s+0.00\s+0.00\s+\(all messages\)/ and $no_messages_in_freqs = 1; |
| print(OUT); |
| } |
| close(FREQS); |
| } |
| |
| sub runcmd { |
| my ($cmd) = @_; |
| print "[$cmd]\n"; |
| system $cmd; |
| } |