blob: 2d85d578bf65f094e580c643c9e37364cea240e1 [file] [log] [blame]
--- misc/mythes-1.2.0.orig/th_gen_idx.pl 2010-02-27 12:52:58.000000000 -0300
+++ misc/build/mythes-1.2.0/th_gen_idx.pl 2012-01-12 04:13:15.149371123 -0300
@@ -1,11 +1,26 @@
-#!/usr/bin/perl
-
-# perl program to take a thesaurus structured text data file
-# and create the proper sorted index file (.idx)
+:
+eval 'exec perl -wS $0 ${1+"$@"}'
+ if 0;
+#**************************************************************
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
#
-# typcially invoked as follows:
-# cat th_en_US_new.dat | ./th_gen_idx.pl > th_en_US_new.idx
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
#
+#**************************************************************
sub by_entry {
my ($aent, $aoff) = split('\|',$a);
@@ -13,6 +28,27 @@ sub by_entry {
$aent cmp $bent;
}
+#FIXME: someone may want "infile" or even parameter parsing
+sub get_outfile {
+ my $next_is_file = 0;
+ foreach ( @ARGV ) {
+ if ( $next_is_file ) {
+ return $_
+ }
+ if ( $_ eq "-o" ) {
+ $next_is_file = 1;
+ }
+ }
+ return "";
+}
+
+sub usage {
+ print "usage:\n";
+ print "$0 -o outfile < input\n";
+
+ exit 99;
+}
+
# main routine
my $ne = 0; # number of entries in index
my @tindex=(); # the index itself
@@ -24,6 +60,10 @@ my $nm=0; # number of meaning fo
my $meaning=""; # current meaning and synonyms
my $p; # misc uses
my $encoding; # encoding used by text file
+my $outfile = "";
+
+$outfile = get_outfile();
+usage() if ( $outfile eq "" );
# top line of thesaurus provides encoding
$encoding=<STDIN>;
@@ -51,9 +91,13 @@ while ($rec=<STDIN>){
# now we have all of the information
# so sort it and then output the encoding, count and index data
@tindex = sort by_entry @tindex;
-print STDOUT "$encoding\n";
-print STDOUT "$ne\n";
+
+print "$outfile\n";
+open OUTFILE, ">$outfile" or die "ERROR: Can't open $outfile for writing!";
+print OUTFILE "$encoding\n";
+print OUTFILE "$ne\n";
foreach $one (@tindex) {
- print STDOUT "$one\n";
+ print OUTFILE "$one\n";
}
+close OUTFILE;