blob: 775d0b77c85020c48113ee6693febe6167c757b3 [file] [log] [blame]
#!/usr/bin/perl
# Takes a corpus of words on STDIN and builds a vocabulary with word
# counts, writing them to STDOUT in the format
#
# ID WORD COUNT
use utf8;
use warnings;
use strict;
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
my %count;
while (my $line = <>) {
chomp($line);
my @tokens = split(' ', $line);
map { $count{$_}++ } @tokens;
}
my $id = 1;
map { print $id++ . " $_ $count{$_}\n" } (sort { $count{$b} <=> $count{$a} } keys %count);