| #!/usr/bin/env perl |
| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| # Takes a config file on STDIN, and replaces each of the file arguments with those found on the |
| # command line. Parameters not found in the config file (and thus not replaceable) are appended to |
| # the end. |
| # |
| # cat joshua.config | copy-config.pl -param1 value -param2 "multi-word value" ... |
| # |
| # Some parameters can take options. For example, there are multiple permitted "tm" lines. If you |
| # want to specify which one to replace, you can add "/N" after the name, where N is the 0-indexed |
| # index of the grammar. For example, |
| # |
| # cat joshua.config | copy-config.pl -tm0/path /path/to/grammar -tm0/owner pt |
| # |
| # This will ensure that only the first tm line gets updated. |
| # |
| # Most keys are replacement keys: specifying a value will replace what's found in the config |
| # file. The only exception is -feature-function, instances of which are appended to the output. |
| # Feature functions can't be deleted from the config file. |
| # |
| # Weights |
| |
| use strict; |
| use warnings; |
| |
| # Step 1. process command-line arguments for key/value pairs. The keys are matched next to the |
| # config file and the configfile values replaced with those found on the command-line. |
| |
| my (%params,%weights,%features); |
| while (my $key = shift @ARGV) { |
| # make sure the parameter has a leading dash |
| if ($key !~ /^-/) { |
| print STDERR "* FATAL: invalid command-line argument '$key'\n"; |
| exit 1; |
| } |
| |
| # remove leading dash |
| $key =~ s/^-+//g; |
| $key = normalize_key($key); |
| |
| # get the value and store the pair |
| my $value = shift(@ARGV); |
| |
| # -feature-function lines are gathered, other keys can be present only once |
| if ($key eq "featurefunction") { |
| $features{$value} = $value; |
| } elsif ($key eq "weights") { |
| my @tokens = split(' ', $value); |
| for (my $i = 0; $i < @tokens; $i += 2) { |
| $weights{$tokens[$i]} = $tokens[$i+1]; |
| } |
| } else { |
| $params{$key} = $value; |
| } |
| } |
| |
| # Step 2. Now read through the config file. |
| |
| my @weights_order; |
| my $tm_index = -1; |
| while (my $line = <>) { |
| if ($line =~ /^\s*$/ or $line =~ /^#/) { |
| # Comments, empty lines |
| print $line; |
| |
| } elsif ($line =~ /=/) { |
| # Regular configuration variables. |
| |
| # split on equals |
| my ($key,$value) = split(/\s*=\s*/, $line, 2); |
| |
| # remove leading and trailing spaces |
| $key =~ s/^\s+//g; |
| $value =~ s/\s+$//g; |
| |
| my $norm_key = normalize_key($key); |
| |
| # TMs get special treatment. We parse the line (supporting old format and new keyword format), |
| # and then compare to command-line args to see what gets updated |
| if ($norm_key =~ /^tm/) { |
| $tm_index++; |
| |
| # get the hash of tm values from the config file |
| my $tm_hash = parse_tm_line($value); |
| |
| # Delete TM lines if they've been requested to be deleted |
| if (exists $params{"tm${tm_index}"} and $params{"tm${tm_index}"} eq "DELETE") { |
| delete $params{"tm${tm_index}"}; |
| next; |
| } |
| |
| # check if each one was passed as a command-line argument, and if so, retrieve its new value |
| foreach my $tmkey (keys %$tm_hash) { |
| my $concat = "tm${tm_index}/${tmkey}"; |
| if (exists $params{$concat}) { |
| $tm_hash->{$tmkey} = $params{$concat}; |
| delete $params{$concat}; |
| } |
| } |
| # write out the new line (using new keyword format always) |
| $params{$norm_key} = $tm_hash->{type}; |
| foreach my $tmkey (keys %$tm_hash) { |
| next if $tmkey eq "type"; |
| $params{$norm_key} .= " -$tmkey $tm_hash->{$tmkey}"; |
| } |
| } |
| |
| # If an exact feature function line is in the config file, delete |
| # it from the command-line arguments so it doesn't get printed |
| # later. All features not found in the config file are appended. |
| if ($norm_key eq "featurefunction" and exists $features{$value}) { |
| delete $features{$value}; |
| } |
| |
| # if the parameter was found on the command line, print out its replaced value |
| if (exists $params{$norm_key}) { |
| print "$key = " . $params{$norm_key} . "\n"; |
| |
| # Deleting the parameter means it will only match the first time. Useful for duplicated keys |
| # (like multiple "tm = ..." lines) |
| delete $params{$norm_key}; |
| } else { |
| # otherwise, print out the original line |
| print $line; |
| } |
| |
| } else { |
| # Weights. Save these to print at the end, just to keep things neat. |
| chomp($line); |
| my ($name, $value) = split(' ', $line); |
| $weights{$name} = $value unless exists $weights{$name}; |
| push(@weights_order, $name); |
| } |
| } |
| |
| # print out the remaining keys for appending to the end of the file |
| if (scalar(keys(%params))) { |
| print $/; |
| foreach my $key (keys %params) { |
| next if $key =~ /^tm/; # skip unused tm flags |
| print STDERR "* WARNING: no key '$key' found in config file (appending to end)\n"; |
| print "$key = $params{$key}\n"; |
| } |
| } |
| |
| # print out the feature functions |
| map { print "feature-function = $_\n" } (keys %features); |
| print $/; |
| |
| # Print out the weights |
| foreach my $weight (@weights_order) { |
| print "$weight $weights{$weight}\n"; |
| delete $weights{$weight}; |
| } |
| foreach my $weight (keys %weights) { |
| print "$weight $weights{$weight}\n"; |
| } |
| |
| # Remove hyphens and underscores, lowercase |
| sub normalize_key { |
| my ($key) = @_; |
| |
| $key =~ s/[-_]//g; |
| $key = lc $key; |
| |
| # print STDERR "** KEY($_[0]) -> $key\n"; |
| return $key; |
| } |
| |
| # Produces a {key => value} hash from the TM line, supporting both the old format: |
| # |
| # tm = thrax pt 0 /path/to/grammar.gz |
| # |
| # and the new one |
| # |
| # tm = thrax -owner pt -maxspan 0 -path /path/to/grammar.gz |
| # |
| sub parse_tm_line { |
| my ($line) = @_; |
| |
| # line might still have keyword on it |
| $line =~ s/^tm = // if ($line =~ /^tm = /); |
| |
| my %hash; |
| my @tokens = split(' ', $line); |
| $hash{type} = shift(@tokens); |
| if ($tokens[0] =~ /^-/) { |
| while (@tokens) { |
| my $key = shift(@tokens); |
| my $value = shift(@tokens); |
| $key =~ s/^-//; |
| $hash{$key} = $value; |
| } |
| } else { |
| $hash{owner} = shift(@tokens); |
| $hash{maxspan} = shift(@tokens); |
| $hash{path} = shift(@tokens); |
| } |
| |
| return \%hash; |
| } |