blob: ee909cc0a7c3a1f915dafb0522b7be008c163b4d [file] [log] [blame]
#!/usr/bin/perl
#
# split-log-into-buckets [n]
#
# Split a mass-check log into n identically-sized buckets, evenly
# taking messages from all checked corpora and preserving comments.
# It does this evenly by running through all buckets sequentially
# as each line is read. n defaults to 10
my $numbuckets = 0;
if (defined $ARGV[0]) {
$numbuckets = $ARGV[0]+0;
}
$numbuckets ||= 10;
my %buckets = ();
foreach my $i (1 .. $numbuckets) {
print "Creating split-$i.log\n";
open ($buckets{$i}, ">split-$i.log");
}
my $current = 0;
while (<STDIN>) {
select $buckets{$current+1}; print $_;
if (/^#/) { next; }
$current = ($current+1) % $numbuckets;
}
foreach my $i (1 .. $numbuckets) {
close $buckets{$i};
}