| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.joshua.adagrad; |
| |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Set; |
| import java.util.Vector; |
| |
| import org.apache.joshua.corpus.Vocabulary; |
| import org.apache.joshua.metrics.EvaluationMetric; |
| |
| // this class implements the AdaGrad algorithm |
| public class Optimizer { |
| public Optimizer(Vector<String>_output, boolean[] _isOptimizable, double[] _initialLambda, |
| HashMap<String, String>[] _feat_hash, HashMap<String, String>[] _stats_hash) { |
| output = _output; // (not used for now) |
| isOptimizable = _isOptimizable; |
| initialLambda = _initialLambda; // initial weights array |
| paramDim = initialLambda.length - 1; |
| initialLambda = _initialLambda; |
| feat_hash = _feat_hash; // feature hash table |
| stats_hash = _stats_hash; // suff. stats hash table |
| finalLambda = new double[initialLambda.length]; |
| System.arraycopy(initialLambda, 0, finalLambda, 0, finalLambda.length); |
| } |
| |
| //run AdaGrad for one epoch |
| public double[] runOptimizer() { |
| List<Integer> sents = new ArrayList<>(); |
| for( int i = 0; i < sentNum; ++i ) |
| sents.add(i); |
| double[] avgLambda = new double[initialLambda.length]; //only needed if averaging is required |
| for( int i = 0; i < initialLambda.length; ++i ) |
| avgLambda[i] = 0; |
| for ( int iter = 0; iter < adagradIter; ++iter ) { |
| System.arraycopy(finalLambda, 1, initialLambda, 1, paramDim); |
| if(needShuffle) |
| Collections.shuffle(sents); |
| |
| double oraMetric, oraScore, predMetric, predScore; |
| double[] oraPredScore = new double[4]; |
| double loss = 0; |
| double diff = 0; |
| double sumMetricScore = 0; |
| double sumModelScore = 0; |
| String oraFeat = ""; |
| String predFeat = ""; |
| String[] oraPredFeat = new String[2]; |
| String[] vecOraFeat; |
| String[] vecPredFeat; |
| String[] featInfo; |
| int numBatch = 0; |
| int numUpdate = 0; |
| Iterator<Integer> it; |
| Integer diffFeatId; |
| |
| //update weights |
| Integer s; |
| int sentCount = 0; |
| double prevLambda = 0; |
| double diffFeatVal = 0; |
| double oldVal = 0; |
| double gdStep = 0; |
| double Hii = 0; |
| double gradiiSquare = 0; |
| int lastUpdateTime = 0; |
| HashMap<Integer, Integer> lastUpdate = new HashMap<>(); |
| HashMap<Integer, Double> lastVal = new HashMap<>(); |
| HashMap<Integer, Double> H = new HashMap<>(); |
| while( sentCount < sentNum ) { |
| loss = 0; |
| ++numBatch; |
| HashMap<Integer, Double> featDiff = new HashMap<>(); |
| for(int b = 0; b < batchSize; ++b ) { |
| //find out oracle and prediction |
| s = sents.get(sentCount); |
| findOraPred(s, oraPredScore, oraPredFeat, finalLambda, featScale); |
| |
| //the model scores here are already scaled in findOraPred |
| oraMetric = oraPredScore[0]; |
| oraScore = oraPredScore[1]; |
| predMetric = oraPredScore[2]; |
| predScore = oraPredScore[3]; |
| oraFeat = oraPredFeat[0]; |
| predFeat = oraPredFeat[1]; |
| |
| //update the scale |
| if(needScale) { //otherwise featscale remains 1.0 |
| sumMetricScore += Math.abs(oraMetric + predMetric); |
| //restore the original model score |
| sumModelScore += Math.abs(oraScore + predScore) / featScale; |
| |
| if(sumModelScore/sumMetricScore > scoreRatio) |
| featScale = sumMetricScore/sumModelScore; |
| } |
| // processedSent++; |
| |
| vecOraFeat = oraFeat.split("\\s+"); |
| vecPredFeat = predFeat.split("\\s+"); |
| |
| //accumulate difference feature vector |
| if ( b == 0 ) { |
| for (String aVecOraFeat : vecOraFeat) { |
| featInfo = aVecOraFeat.split("="); |
| diffFeatId = Integer.parseInt(featInfo[0]); |
| featDiff.put(diffFeatId, Double.parseDouble(featInfo[1])); |
| } |
| for (String aVecPredFeat : vecPredFeat) { |
| featInfo = aVecPredFeat.split("="); |
| diffFeatId = Integer.parseInt(featInfo[0]); |
| if (featDiff.containsKey(diffFeatId)) { //overlapping features |
| diff = featDiff.get(diffFeatId) - Double.parseDouble(featInfo[1]); |
| if (Math.abs(diff) > 1e-20) |
| featDiff.put(diffFeatId, diff); |
| else |
| featDiff.remove(diffFeatId); |
| } else //features only firing in the 2nd feature vector |
| featDiff.put(diffFeatId, -1.0 * Double.parseDouble(featInfo[1])); |
| } |
| } else { |
| for (String aVecOraFeat : vecOraFeat) { |
| featInfo = aVecOraFeat.split("="); |
| diffFeatId = Integer.parseInt(featInfo[0]); |
| if (featDiff.containsKey(diffFeatId)) { //overlapping features |
| diff = featDiff.get(diffFeatId) + Double.parseDouble(featInfo[1]); |
| if (Math.abs(diff) > 1e-20) |
| featDiff.put(diffFeatId, diff); |
| else |
| featDiff.remove(diffFeatId); |
| } else //features only firing in the new oracle feature vector |
| featDiff.put(diffFeatId, Double.parseDouble(featInfo[1])); |
| } |
| for (String aVecPredFeat : vecPredFeat) { |
| featInfo = aVecPredFeat.split("="); |
| diffFeatId = Integer.parseInt(featInfo[0]); |
| if (featDiff.containsKey(diffFeatId)) { //overlapping features |
| diff = featDiff.get(diffFeatId) - Double.parseDouble(featInfo[1]); |
| if (Math.abs(diff) > 1e-20) |
| featDiff.put(diffFeatId, diff); |
| else |
| featDiff.remove(diffFeatId); |
| } else //features only firing in the new prediction feature vector |
| featDiff.put(diffFeatId, -1.0 * Double.parseDouble(featInfo[1])); |
| } |
| } |
| |
| //remember the model scores here are already scaled |
| double singleLoss = evalMetric.getToBeMinimized() ? |
| (predMetric-oraMetric) - (oraScore-predScore)/featScale: |
| (oraMetric-predMetric) - (oraScore-predScore)/featScale; |
| if(singleLoss > 0) |
| loss += singleLoss; |
| ++sentCount; |
| if( sentCount >= sentNum ) { |
| break; |
| } |
| } //for(int b : batchSize) |
| |
| //System.out.println("\n\n"+sentCount+":"); |
| |
| if( loss > 0 ) { |
| //if(true) { |
| ++numUpdate; |
| //update weights (see Duchi'11, Eq.23. For l1-reg, use lazy update) |
| Set<Integer> diffFeatSet = featDiff.keySet(); |
| it = diffFeatSet.iterator(); |
| while(it.hasNext()) { //note these are all non-zero gradients! |
| diffFeatId = it.next(); |
| diffFeatVal = -1.0 * featDiff.get(diffFeatId); //gradient |
| if( regularization > 0 ) { |
| lastUpdateTime = |
| lastUpdate.get(diffFeatId) == null ? 0 : lastUpdate.get(diffFeatId); |
| if( lastUpdateTime < numUpdate - 1 ) { |
| //haven't been updated (gradient=0) for at least 2 steps |
| //lazy compute prevLambda now |
| oldVal = |
| lastVal.get(diffFeatId) == null ? initialLambda[diffFeatId] : lastVal.get(diffFeatId); |
| Hii = |
| H.get(diffFeatId) == null ? 0 : H.get(diffFeatId); |
| if(Math.abs(Hii) > 1e-20) { |
| if( regularization == 1 ) |
| prevLambda = |
| Math.signum(oldVal) * clip( Math.abs(oldVal) - lam * eta * (numBatch - 1 - lastUpdateTime) / Hii ); |
| else if( regularization == 2 ) { |
| prevLambda = |
| Math.pow( Hii/(lam+Hii), (numUpdate - 1 - lastUpdateTime) ) * oldVal; |
| if(needAvg) { //fill the gap due to lazy update |
| double prevLambdaCopy = prevLambda; |
| double scale = Hii/(lam+Hii); |
| for( int t = 0; t < numUpdate - 1 - lastUpdateTime; ++t ) { |
| avgLambda[diffFeatId] += prevLambdaCopy; |
| prevLambdaCopy /= scale; |
| } |
| } |
| } |
| } else { |
| if( regularization == 1 ) |
| prevLambda = 0; |
| else if( regularization == 2 ) |
| prevLambda = oldVal; |
| } |
| } else //just updated at last time step or just started |
| prevLambda = finalLambda[diffFeatId]; |
| if(H.get(diffFeatId) != null) { |
| gradiiSquare = H.get(diffFeatId); |
| gradiiSquare *= gradiiSquare; |
| gradiiSquare += diffFeatVal * diffFeatVal; |
| Hii = Math.sqrt(gradiiSquare); |
| } else |
| Hii = Math.abs(diffFeatVal); |
| H.put(diffFeatId, Hii); |
| //update the weight |
| if( regularization == 1 ) { |
| gdStep = prevLambda - eta * diffFeatVal / Hii; |
| finalLambda[diffFeatId] = Math.signum(gdStep) * clip( Math.abs(gdStep) - lam * eta / Hii ); |
| } else if(regularization == 2 ) { |
| finalLambda[diffFeatId] = (Hii * prevLambda - eta * diffFeatVal) / (lam + Hii); |
| if(needAvg) |
| avgLambda[diffFeatId] += finalLambda[diffFeatId]; |
| } |
| lastUpdate.put(diffFeatId, numUpdate); |
| lastVal.put(diffFeatId, finalLambda[diffFeatId]); |
| } else { //if no regularization |
| if(H.get(diffFeatId) != null) { |
| gradiiSquare = H.get(diffFeatId); |
| gradiiSquare *= gradiiSquare; |
| gradiiSquare += diffFeatVal * diffFeatVal; |
| Hii = Math.sqrt(gradiiSquare); |
| } else |
| Hii = Math.abs(diffFeatVal); |
| H.put(diffFeatId, Hii); |
| finalLambda[diffFeatId] = finalLambda[diffFeatId] - eta * diffFeatVal / Hii; |
| if(needAvg) |
| avgLambda[diffFeatId] += finalLambda[diffFeatId]; |
| } |
| } //while(it.hasNext()) |
| } //if(loss > 0) |
| else { //no loss, therefore the weight update is skipped |
| //however, the avg weights still need to be accumulated |
| if( regularization == 0 ) { |
| for( int i = 1; i < finalLambda.length; ++i ) |
| avgLambda[i] += finalLambda[i]; |
| } else if( regularization == 2 ) { |
| if(needAvg) { |
| //due to lazy update, we need to figure out the actual |
| //weight vector at this point first... |
| for( int i = 1; i < finalLambda.length; ++i ) { |
| if( lastUpdate.get(i) != null ) { |
| if( lastUpdate.get(i) < numUpdate ) { |
| oldVal = lastVal.get(i); |
| Hii = H.get(i); |
| //lazy compute |
| avgLambda[i] += |
| Math.pow( Hii/(lam+Hii), (numUpdate - lastUpdate.get(i)) ) * oldVal; |
| } else |
| avgLambda[i] += finalLambda[i]; |
| } |
| avgLambda[i] += finalLambda[i]; |
| } |
| } |
| } |
| } |
| } //while( sentCount < sentNum ) |
| if( regularization > 0 ) { |
| for( int i = 1; i < finalLambda.length; ++i ) { |
| //now lazy compute those weights that haven't been taken care of |
| if( lastUpdate.get(i) == null ) |
| finalLambda[i] = 0; |
| else if( lastUpdate.get(i) < numUpdate ) { |
| oldVal = lastVal.get(i); |
| Hii = H.get(i); |
| if( regularization == 1 ) |
| finalLambda[i] = |
| Math.signum(oldVal) * clip( Math.abs(oldVal) - lam * eta * (numUpdate - lastUpdate.get(i)) / Hii ); |
| else if( regularization == 2 ) { |
| finalLambda[i] = |
| Math.pow( Hii/(lam+Hii), (numUpdate - lastUpdate.get(i)) ) * oldVal; |
| if(needAvg) { //fill the gap due to lazy update |
| double prevLambdaCopy = finalLambda[i]; |
| double scale = Hii/(lam+Hii); |
| for( int t = 0; t < numUpdate - lastUpdate.get(i); ++t ) { |
| avgLambda[i] += prevLambdaCopy; |
| prevLambdaCopy /= scale; |
| } |
| } |
| } |
| } |
| if( regularization == 2 && needAvg ) { |
| if( iter == adagradIter - 1 ) |
| finalLambda[i] = avgLambda[i] / ( numBatch * adagradIter ); |
| } |
| } |
| } else { //if no regularization |
| if( iter == adagradIter - 1 && needAvg ) { |
| for( int i = 1; i < finalLambda.length; ++i ) |
| finalLambda[i] = avgLambda[i] / ( numBatch * adagradIter ); |
| } |
| } |
| |
| double initMetricScore; |
| if (iter == 0) { |
| initMetricScore = computeCorpusMetricScore(initialLambda); |
| finalMetricScore = computeCorpusMetricScore(finalLambda); |
| } else { |
| initMetricScore = finalMetricScore; |
| finalMetricScore = computeCorpusMetricScore(finalLambda); |
| } |
| // prepare the printing info |
| String result = " Initial " |
| + evalMetric.get_metricName() + "=" + String.format("%.4f", initMetricScore) + " Final " |
| + evalMetric.get_metricName() + "=" + String.format("%.4f", finalMetricScore); |
| //print lambda info |
| // int numParamToPrint = 0; |
| // numParamToPrint = paramDim > 10 ? 10 : paramDim; // how many parameters |
| // // to print |
| // result = paramDim > 10 ? "Final lambda (first 10): {" : "Final lambda: {"; |
| |
| // for (int i = 1; i <= numParamToPrint; ++i) |
| // result += String.format("%.4f", finalLambda[i]) + " "; |
| |
| output.add(result); |
| } //for ( int iter = 0; iter < adagradIter; ++iter ) { |
| |
| //non-optimizable weights should remain unchanged |
| ArrayList<Double> cpFixWt = new ArrayList<>(); |
| for ( int i = 1; i < isOptimizable.length; ++i ) { |
| if ( ! isOptimizable[i] ) |
| cpFixWt.add(finalLambda[i]); |
| } |
| normalizeLambda(finalLambda); |
| int countNonOpt = 0; |
| for ( int i = 1; i < isOptimizable.length; ++i ) { |
| if ( ! isOptimizable[i] ) { |
| finalLambda[i] = cpFixWt.get(countNonOpt); |
| ++countNonOpt; |
| } |
| } |
| return finalLambda; |
| } |
| |
| private double clip(double x) { |
| return x > 0 ? x : 0; |
| } |
| |
| public double computeCorpusMetricScore(double[] finalLambda) { |
| int suffStatsCount = evalMetric.get_suffStatsCount(); |
| double modelScore; |
| double maxModelScore; |
| Set<String> candSet; |
| String candStr; |
| String[] feat_str; |
| String[] tmpStatsVal = new String[suffStatsCount]; |
| int[] corpusStatsVal = new int[suffStatsCount]; |
| for (int i = 0; i < suffStatsCount; i++) |
| corpusStatsVal[i] = 0; |
| |
| for (int i = 0; i < sentNum; i++) { |
| candSet = feat_hash[i].keySet(); |
| |
| // find out the 1-best candidate for each sentence |
| // this depends on the training mode |
| maxModelScore = NegInf; |
| for (String aCandSet : candSet) { |
| modelScore = 0.0; |
| candStr = aCandSet.toString(); |
| |
| feat_str = feat_hash[i].get(candStr).split("\\s+"); |
| |
| String[] feat_info; |
| |
| for (String aFeat_str : feat_str) { |
| feat_info = aFeat_str.split("="); |
| modelScore += Double.parseDouble(feat_info[1]) * finalLambda[Vocabulary.id(feat_info[0])]; |
| } |
| |
| if (maxModelScore < modelScore) { |
| maxModelScore = modelScore; |
| tmpStatsVal = stats_hash[i].get(candStr).split("\\s+"); // save the |
| // suff stats |
| } |
| } |
| |
| for (int j = 0; j < suffStatsCount; j++) |
| corpusStatsVal[j] += Integer.parseInt(tmpStatsVal[j]); // accumulate |
| // corpus-leve |
| // suff stats |
| } // for( int i=0; i<sentNum; i++ ) |
| |
| return evalMetric.score(corpusStatsVal); |
| } |
| |
| private void findOraPred(int sentId, double[] oraPredScore, String[] oraPredFeat, double[] lambda, double featScale) |
| { |
| double oraMetric=0, oraScore=0, predMetric=0, predScore=0; |
| String oraFeat="", predFeat=""; |
| double candMetric = 0, candScore = 0; //metric and model scores for each cand |
| Set<String> candSet = stats_hash[sentId].keySet(); |
| String cand = ""; |
| String feats = ""; |
| String oraCand = ""; //only used when BLEU/TER-BLEU is used as metric |
| String[] featStr; |
| String[] featInfo; |
| |
| int actualFeatId; |
| double bestOraScore; |
| double worstPredScore; |
| |
| if(oraSelectMode==1) |
| bestOraScore = NegInf; //larger score will be selected |
| else { |
| if(evalMetric.getToBeMinimized()) |
| bestOraScore = PosInf; //smaller score will be selected |
| else |
| bestOraScore = NegInf; |
| } |
| |
| if(predSelectMode==1 || predSelectMode==2) |
| worstPredScore = NegInf; //larger score will be selected |
| else { |
| if(evalMetric.getToBeMinimized()) |
| worstPredScore = NegInf; //larger score will be selected |
| else |
| worstPredScore = PosInf; |
| } |
| |
| for (String aCandSet : candSet) { |
| cand = aCandSet.toString(); |
| candMetric = computeSentMetric(sentId, cand); //compute metric score |
| |
| //start to compute model score |
| candScore = 0; |
| featStr = feat_hash[sentId].get(cand).split("\\s+"); |
| feats = ""; |
| |
| for (String aFeatStr : featStr) { |
| featInfo = aFeatStr.split("="); |
| actualFeatId = Vocabulary.id(featInfo[0]); |
| candScore += Double.parseDouble(featInfo[1]) * lambda[actualFeatId]; |
| if ((actualFeatId < isOptimizable.length && isOptimizable[actualFeatId]) |
| || actualFeatId >= isOptimizable.length) |
| feats += actualFeatId + "=" + Double.parseDouble(featInfo[1]) + " "; |
| } |
| |
| candScore *= featScale; //scale the model score |
| |
| //is this cand oracle? |
| if (oraSelectMode == 1) {//"hope", b=1, r=1 |
| if (evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better |
| if (bestOraScore <= (candScore - candMetric)) { |
| bestOraScore = candScore - candMetric; |
| oraMetric = candMetric; |
| oraScore = candScore; |
| oraFeat = feats; |
| oraCand = cand; |
| } |
| } else { |
| if (bestOraScore <= (candScore + candMetric)) { |
| bestOraScore = candScore + candMetric; |
| oraMetric = candMetric; |
| oraScore = candScore; |
| oraFeat = feats; |
| oraCand = cand; |
| } |
| } |
| } else {//best metric score(ex: max BLEU), b=1, r=0 |
| if (evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better |
| if (bestOraScore >= candMetric) { |
| bestOraScore = candMetric; |
| oraMetric = candMetric; |
| oraScore = candScore; |
| oraFeat = feats; |
| oraCand = cand; |
| } |
| } else { |
| if (bestOraScore <= candMetric) { |
| bestOraScore = candMetric; |
| oraMetric = candMetric; |
| oraScore = candScore; |
| oraFeat = feats; |
| oraCand = cand; |
| } |
| } |
| } |
| |
| //is this cand prediction? |
| if (predSelectMode == 1) {//"fear" |
| if (evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better |
| if (worstPredScore <= (candScore + candMetric)) { |
| worstPredScore = candScore + candMetric; |
| predMetric = candMetric; |
| predScore = candScore; |
| predFeat = feats; |
| } |
| } else { |
| if (worstPredScore <= (candScore - candMetric)) { |
| worstPredScore = candScore - candMetric; |
| predMetric = candMetric; |
| predScore = candScore; |
| predFeat = feats; |
| } |
| } |
| } else if (predSelectMode == 2) {//model prediction(max model score) |
| if (worstPredScore <= candScore) { |
| worstPredScore = candScore; |
| predMetric = candMetric; |
| predScore = candScore; |
| predFeat = feats; |
| } |
| } else {//worst metric score(ex: min BLEU) |
| if (evalMetric.getToBeMinimized()) {//if the smaller the metric score, the better |
| if (worstPredScore <= candMetric) { |
| worstPredScore = candMetric; |
| predMetric = candMetric; |
| predScore = candScore; |
| predFeat = feats; |
| } |
| } else { |
| if (worstPredScore >= candMetric) { |
| worstPredScore = candMetric; |
| predMetric = candMetric; |
| predScore = candScore; |
| predFeat = feats; |
| } |
| } |
| } |
| } |
| |
| oraPredScore[0] = oraMetric; |
| oraPredScore[1] = oraScore; |
| oraPredScore[2] = predMetric; |
| oraPredScore[3] = predScore; |
| oraPredFeat[0] = oraFeat; |
| oraPredFeat[1] = predFeat; |
| |
| //update the BLEU metric statistics if pseudo corpus is used to compute BLEU/TER-BLEU |
| if(evalMetric.get_metricName().equals("BLEU") && usePseudoBleu ) { |
| String statString; |
| String[] statVal_str; |
| statString = stats_hash[sentId].get(oraCand); |
| statVal_str = statString.split("\\s+"); |
| |
| for (int j = 0; j < evalMetric.get_suffStatsCount(); j++) |
| bleuHistory[sentId][j] = R*bleuHistory[sentId][j]+Integer.parseInt(statVal_str[j]); |
| } |
| |
| if(evalMetric.get_metricName().equals("TER-BLEU") && usePseudoBleu ) { |
| String statString; |
| String[] statVal_str; |
| statString = stats_hash[sentId].get(oraCand); |
| statVal_str = statString.split("\\s+"); |
| |
| for (int j = 0; j < evalMetric.get_suffStatsCount()-2; j++) |
| bleuHistory[sentId][j] = R*bleuHistory[sentId][j]+Integer.parseInt(statVal_str[j+2]); //the first 2 stats are TER stats |
| } |
| } |
| |
| // compute *sentence-level* metric score for cand |
| private double computeSentMetric(int sentId, String cand) { |
| String statString; |
| String[] statVal_str; |
| int[] statVal = new int[evalMetric.get_suffStatsCount()]; |
| |
| statString = stats_hash[sentId].get(cand); |
| statVal_str = statString.split("\\s+"); |
| |
| if(evalMetric.get_metricName().equals("BLEU") && usePseudoBleu) { |
| for (int j = 0; j < evalMetric.get_suffStatsCount(); j++) |
| statVal[j] = (int) (Integer.parseInt(statVal_str[j]) + bleuHistory[sentId][j]); |
| } else if(evalMetric.get_metricName().equals("TER-BLEU") && usePseudoBleu) { |
| for (int j = 0; j < evalMetric.get_suffStatsCount()-2; j++) |
| statVal[j+2] = (int)(Integer.parseInt(statVal_str[j+2]) + bleuHistory[sentId][j]); //only modify the BLEU stats part(TER has 2 stats) |
| } else { //in all other situations, use normal stats |
| for (int j = 0; j < evalMetric.get_suffStatsCount(); j++) |
| statVal[j] = Integer.parseInt(statVal_str[j]); |
| } |
| |
| return evalMetric.score(statVal); |
| } |
| |
| // from ZMERT |
| private void normalizeLambda(double[] origLambda) { |
| // private String[] normalizationOptions; |
| // How should a lambda[] vector be normalized (before decoding)? |
| // nO[0] = 0: no normalization |
| // nO[0] = 1: scale so that parameter nO[2] has absolute value nO[1] |
| // nO[0] = 2: scale so that the maximum absolute value is nO[1] |
| // nO[0] = 3: scale so that the minimum absolute value is nO[1] |
| // nO[0] = 4: scale so that the L-nO[1] norm equals nO[2] |
| |
| int normalizationMethod = (int) normalizationOptions[0]; |
| double scalingFactor = 1.0; |
| if (normalizationMethod == 0) { |
| scalingFactor = 1.0; |
| } else if (normalizationMethod == 1) { |
| int c = (int) normalizationOptions[2]; |
| scalingFactor = normalizationOptions[1] / Math.abs(origLambda[c]); |
| } else if (normalizationMethod == 2) { |
| double maxAbsVal = -1; |
| int maxAbsVal_c = 0; |
| for (int c = 1; c <= paramDim; ++c) { |
| if (Math.abs(origLambda[c]) > maxAbsVal) { |
| maxAbsVal = Math.abs(origLambda[c]); |
| maxAbsVal_c = c; |
| } |
| } |
| scalingFactor = normalizationOptions[1] / Math.abs(origLambda[maxAbsVal_c]); |
| |
| } else if (normalizationMethod == 3) { |
| double minAbsVal = PosInf; |
| int minAbsVal_c = 0; |
| |
| for (int c = 1; c <= paramDim; ++c) { |
| if (Math.abs(origLambda[c]) < minAbsVal) { |
| minAbsVal = Math.abs(origLambda[c]); |
| minAbsVal_c = c; |
| } |
| } |
| scalingFactor = normalizationOptions[1] / Math.abs(origLambda[minAbsVal_c]); |
| |
| } else if (normalizationMethod == 4) { |
| double pow = normalizationOptions[1]; |
| double norm = L_norm(origLambda, pow); |
| scalingFactor = normalizationOptions[2] / norm; |
| } |
| |
| for (int c = 1; c <= paramDim; ++c) { |
| origLambda[c] *= scalingFactor; |
| } |
| } |
| |
| // from ZMERT |
| private double L_norm(double[] A, double pow) { |
| // calculates the L-pow norm of A[] |
| // NOTE: this calculation ignores A[0] |
| double sum = 0.0; |
| for (int i = 1; i < A.length; ++i) |
| sum += Math.pow(Math.abs(A[i]), pow); |
| |
| return Math.pow(sum, 1 / pow); |
| } |
| |
| public static double getScale() |
| { |
| return featScale; |
| } |
| |
| public static void initBleuHistory(int sentNum, int statCount) |
| { |
| bleuHistory = new double[sentNum][statCount]; |
| for(int i=0; i<sentNum; i++) { |
| for(int j=0; j<statCount; j++) { |
| bleuHistory[i][j] = 0.0; |
| } |
| } |
| } |
| |
| public double getMetricScore() |
| { |
| return finalMetricScore; |
| } |
| |
| private final Vector<String> output; |
| private double[] initialLambda; |
| private final double[] finalLambda; |
| private double finalMetricScore; |
| private final HashMap<String, String>[] feat_hash; |
| private final HashMap<String, String>[] stats_hash; |
| private final int paramDim; |
| private final boolean[] isOptimizable; |
| public static int sentNum; |
| public static int adagradIter; //AdaGrad internal iterations |
| public static int oraSelectMode; |
| public static int predSelectMode; |
| public static int batchSize; |
| public static int regularization; |
| public static boolean needShuffle; |
| public static boolean needScale; |
| public static double scoreRatio; |
| public static boolean needAvg; |
| public static boolean usePseudoBleu; |
| public static double featScale = 1.0; //scale the features in order to make the model score comparable with metric score |
| //updates in each epoch if necessary |
| public static double eta; |
| public static double lam; |
| public static double R; //corpus decay(used only when pseudo corpus is used to compute BLEU) |
| public static EvaluationMetric evalMetric; |
| public static double[] normalizationOptions; |
| public static double[][] bleuHistory; |
| |
| private final static double NegInf = (-1.0 / 0.0); |
| private final static double PosInf = (+1.0 / 0.0); |
| } |