| #!/usr/bin/perl |
| # |
| # split-log-into-buckets [n] |
| # |
| # Split a mass-check log into n identically-sized buckets, evenly |
| # taking messages from all checked corpora and preserving comments. |
| # It does this evenly by running through all buckets sequentially |
| # as each line is read. n defaults to 10 |
| |
| my $numbuckets = 0; |
| if (defined $ARGV[0]) { |
| $numbuckets = $ARGV[0]+0; |
| } |
| $numbuckets ||= 10; |
| |
| my %buckets = (); |
| foreach my $i (1 .. $numbuckets) { |
| print "Creating split-$i.log\n"; |
| open ($buckets{$i}, ">split-$i.log"); |
| } |
| |
| while (<STDIN>) { |
| select $buckets{1+int(rand()*$numbuckets)}; print $_; |
| } |
| |
| foreach my $i (1 .. $numbuckets) { |
| close $buckets{$i}; |
| } |
| |