| #!/usr/bin/perl |
| # |
| # Convert seek-phrases-in-log output to something suitable for use as a production ruleset |
| |
| my $nameprefix = (shift @ARGV); |
| my $num = 0; |
| my @rule = (); |
| while (<>) { |
| # fix control chars, high-bit chars |
| s/([\x00-\x09\x0b-\x1f\x7f-\xff])/ |
| sprintf "\\x{%02x}", ord $1; |
| /gex; |
| |
| print; |
| if (/^body\s+(\S+)\s/) { |
| push @rule, $1; |
| } |
| if (/^# passed hit-rate threshold: (\S+)/) { |
| end_subrule_block(); |
| start_subrule_block($1); |
| } |
| } |
| end_subrule_block(); |
| exit; |
| |
| sub start_subrule_block { |
| @rule = (); |
| } |
| |
| sub end_subrule_block { |
| $num++; |
| if (@rule) { |
| print " |
| meta $nameprefix$num (".join(" || ",@rule).") |
| score $nameprefix$num 3.0 |
| describe $nameprefix$num Body contains frequently-spammed text patterns |
| "; |
| } |
| else { |
| print " |
| meta $nameprefix$num (0) |
| score $nameprefix$num 0 |
| describe $nameprefix$num Body contains frequently-spammed text patterns |
| "; |
| } |
| } |