src/main/java/org/apache/joshua/decoder/ff/FeatureVector.java - joshua - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *  http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 package org.apache.joshua.decoder.ff;

 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;

 /**
  * An implementation of a sparse feature vector, using for representing both weights and feature
  * values.
  *
  * This class is used to hold both the decoder weights and the feature values accumulated across
  * each edge. When features are read in upon decoder startup, they all start out as sparse features
  * and are stored in the hash table. After the feature functions have been loaded, the decoder
  * queries each of them for their sparse features via {@link registerDenseFeatures}. Those features
  * returned by each decoder are then *removed* from the sparse feature hash and placed in the dense
  * feature array. Therefore, when a feature registers a dense feature, it should take care to
  * query either {@link org.apache.joshua.decoder.ff.FeatureVector#getDense(int)} or
  * {@link org.apache.joshua.decoder.ff.FeatureVector#getSparse(String)} when asking for the feature
  * values later on.
  *
  * @author Matt Post post@cs.jhu.edu
  */

 public class FeatureVector {
   /*
    * A list of the dense feature names. Increased via calls to registerDenseFeatures()
    */
   public static final ArrayList<String> DENSE_FEATURE_NAMES = new ArrayList<>();

   /*
    * The values of each of the dense features, defaulting to 0.
    */
   private ArrayList<Float> denseFeatures = null;

   /*
    * Value of sparse features.
    */
   private final HashMap<String, Float> sparseFeatures;

   public FeatureVector() {
     sparseFeatures = new HashMap<>();
     denseFeatures = new ArrayList<>(DENSE_FEATURE_NAMES.size());
     for (int i = 0; i < denseFeatures.size(); i++)
       denseFeatures.set(i, 0.0f);
   }

   /**
    * This version of the constructor takes an uninitialized feature with potentially intermingled
    * labeled and unlabeled feature values, of the format:
    *
    * [feature1=]value [feature2=]value
    *
    * It produces a Feature Vector where all unlabeled features have been labeled by appending the
    * unlabeled feature index (starting at 0) to the defaultPrefix value.
    *
    * **IMPORTANT** The feature values are inverted, for historical reasons, which leads to a lot
    * of confusion. They have to be inverted here and when the score is actually computed. They
    * are inverted here (which is used to build the feature vector representation of a rule's dense
    * features) and in {@link org.apache.joshua.decoder.ff.tm.Rule#estimateRuleCost(java.util.List)}
    * , where the rule's precomputable (weighted) score is cached.
    *
    * @param featureString, the string of labeled and unlabeled features (probably straight from the
    *          grammar text file)
    * @param prefix, the prefix to use for unlabeled features (probably "tm_OWNER_")
    */
   public FeatureVector(String featureString, String prefix) {

 //    System.err.println(String.format("FEATURES_OF(%s, %s)", featureString, prefix));

     /*
      * Read through the features on this rule, adding them to the feature vector. Unlabeled features
      * are converted to a canonical form.
      *
      * Note that it's bad form to mix unlabeled features and the named feature index they are mapped
      * to, but we are being liberal in what we accept.
      *
      * IMPORTANT: Note that, for historical reasons, the sign is reversed on all *dense* scores.
      * This is the source of *no end* of confusion and should be done away with.
      */
     this();

     int denseFeatureIndex = 0;

     if (!featureString.trim().equals("")) {
       for (String token : featureString.split("\\s+")) {
         if (token.indexOf('=') == -1) {
           /*
            * If we encounter an unlabeled feature, it is the next dense feature
            */
           while (denseFeatures.size() <= denseFeatureIndex)
             denseFeatures.add(0.0f);
           denseFeatures.set(denseFeatureIndex, -Float.parseFloat(token));
           denseFeatureIndex++;
         } else {
           /*
            * Labeled features are of two types: if they start with the prefix, they are actually
            * dense feature in disguise; otherwise, they are proper sparse features.
            */
           int splitPoint = token.indexOf('=');
           if (token.startsWith(prefix)) {
 //            System.err.println(String.format("  PREFIX=%s '%s'.substring(%d,%d) = %s", prefix, token, prefix.length(), splitPoint,
 //                token.substring(prefix.length(), splitPoint)));
             int index = Integer.parseInt(token.substring(prefix.length(), splitPoint));
             while (denseFeatures.size() <= index)
               denseFeatures.add(0.0f);
             denseFeatures.set(index, 1.0f * Float.parseFloat(token.substring(splitPoint + 1)));
           } else {
             sparseFeatures.put(token.substring(0, splitPoint),
                 Float.parseFloat(token.substring(splitPoint + 1)));
           }
         }
       }
     }
   }

   /**
    * Register one or more dense features with the global weight vector. This assumes them global
    * IDs, and then returns the index of the first feature (from which the calling feature function
    * can infer them all). This *must* be called by every feature function wishing to register
    * dense features!
    *
    * @param featureFunctions {@link java.util.ArrayList} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
    */
   public void registerDenseFeatures(ArrayList<FeatureFunction> featureFunctions) {
     for (FeatureFunction feature: featureFunctions) {
       ArrayList<String> names = feature.reportDenseFeatures(denseFeatures.size());
       for (String name: names) {
         DENSE_FEATURE_NAMES.add(name);
         denseFeatures.add(getSparse(name));
         sparseFeatures.remove(name);
       }
     }
   }

   public ArrayList<Float> getDenseFeatures() {
     return denseFeatures;
   }

   public HashMap<String,Float> getSparseFeatures() {
     return sparseFeatures;
   }

   public Set<String> keySet() {
     return sparseFeatures.keySet();
   }

   public int size() {
     return sparseFeatures.size() + denseFeatures.size();
   }

   public FeatureVector clone() {
     FeatureVector newOne = new FeatureVector();
     for (String key : this.sparseFeatures.keySet())
       newOne.set(key, this.sparseFeatures.get(key));
     for (int i = 0; i < denseFeatures.size(); i++)
       newOne.set(i, getDense(i));
     return newOne;
   }

   /**
    * Subtracts the weights in the other feature vector from this one. Note that this is not set
    * subtraction; keys found in the other FeatureVector but not in this one will be initialized with
    * a value of 0.0f before subtraction.
    *
    * @param other another {@link org.apache.joshua.decoder.ff.FeatureVector} from which to subtract its score
    */
   public void subtract(FeatureVector other) {
     for (int i = 0; i < denseFeatures.size(); i++)
       denseFeatures.set(i, getDense(i) - other.getDense(i));

     for (String key : other.keySet()) {
       float oldValue = (sparseFeatures.containsKey(key)) ? sparseFeatures.get(key) : 0.0f;
       sparseFeatures.put(key, oldValue - other.getSparse(key));
     }
   }

   /**
    * Adds the weights in the other feature vector to this one. This is set union, with values shared
    * between the two being summed.
    *
    * @param other another {@link org.apache.joshua.decoder.ff.FeatureVector} from which to add its score
    */
   public void add(FeatureVector other) {
     while (denseFeatures.size() < other.denseFeatures.size())
       denseFeatures.add(0.0f);

     for (int i = 0; i < other.denseFeatures.size(); i++)
       increment(i, other.getDense(i));

     for (String key : other.keySet()) {
       if (!sparseFeatures.containsKey(key))
         sparseFeatures.put(key, other.getSparse(key));
       else
         sparseFeatures.put(key, sparseFeatures.get(key) + other.getSparse(key));
     }
   }

   /**
    * Return the weight of a feature by name, after checking to determine if it is sparse or dense.
    *
    * @param feature String name of some feature
    * @return the feature's weight
    */
   public float getWeight(String feature) {
     for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
       if (DENSE_FEATURE_NAMES.get(i).equals(feature)) {
         return getDense(i);
       }
     }
     return getSparse(feature);
   }

   /**
    * Return the weight of a sparse feature, indexed by its name.
    *
    * @param feature String name of some feature
    * @return the sparse feature's weight, or 0 if not found.
    */
   public float getSparse(String feature) {
     if (sparseFeatures.containsKey(feature))
       return sparseFeatures.get(feature);
     return 0.0f;
   }

   public boolean hasValue(String name) {
     return sparseFeatures.containsKey(name);
   }

   /**
    * Return the weight of a dense feature, indexed by its feature index, or 0.0f, if the feature
    * is not found. In other words, this is a safe way to query the dense feature vector.
    *
    * @param id int representing of some dense feature
    * @return the dense feature's value, or 0 if not found.
    */
   public float getDense(int id) {
     if (id < denseFeatures.size())
       return denseFeatures.get(id);
     return 0.0f;
   }

   public void increment(String feature, float value) {
     sparseFeatures.put(feature, getSparse(feature) + value);
   }

   public void increment(int id, float value) {
     while (id >= denseFeatures.size())
       denseFeatures.add(0.0f);
     denseFeatures.set(id, getDense(id) + value);
   }

   /**
    * Set the value of a feature. We need to first determine whether the feature is a dense or
    * sparse one, then set accordingly.
    *
    * @param feature String name of some feature
    * @param value float value to set to the featue with the associated name
    */
   public void set(String feature, float value) {
     for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
       if (DENSE_FEATURE_NAMES.get(i).equals(feature)) {
         denseFeatures.set(i, value);
         return;
       }
     }
     // No dense feature was found; assume it's sparse
     sparseFeatures.put(feature, value);
   }

   public void set(int id, float value) {
     while (id >= denseFeatures.size())
       denseFeatures.add(0.0f);
     denseFeatures.set(id, value);
   }

   public Map<String, Float> getMap() {
     Map<String, Float> allFeatures = new HashMap<>(sparseFeatures.size() + denseFeatures.size());
     allFeatures.putAll(sparseFeatures);
     for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
       allFeatures.put(DENSE_FEATURE_NAMES.get(i), getDense(i));
     }
     return allFeatures;
   }

   /**
    * Computes the inner product between this feature vector and another one.
    *
    * @param other a {@link org.apache.joshua.decoder.ff.FeatureVector} with which to compute the inner product
    * @return float value representing the computation
    */
   public float innerProduct(FeatureVector other) {
     float cost = 0.0f;
     for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++)
       cost += getDense(i) * other.getDense(i);

     for (String key : sparseFeatures.keySet())
       cost += sparseFeatures.get(key) * other.getSparse(key);

     return cost;
   }

   public void times(float value) {
     for (String key : sparseFeatures.keySet())
       sparseFeatures.put(key, sparseFeatures.get(key) * value);
   }

   /***
    * Moses distinguishes sparse features as those containing an underscore, so we have to fake it
    * to be compatible with their tuners.
    *
    * @return trimmed Moses output string
    */
   public String mosesString() {
     StringBuilder outputString = new StringBuilder();

     HashSet<String> printed_keys = new HashSet<>();

     // First print all the dense feature names in order
     for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
       outputString.append(String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i).replace("_", "-"), getDense(i)));
       printed_keys.add(DENSE_FEATURE_NAMES.get(i));
     }

     // Now print the sparse features
     ArrayList<String> keys = new ArrayList<>(sparseFeatures.keySet());
     Collections.sort(keys);
     for (String key: keys) {
       if (! printed_keys.contains(key)) {
         float value = sparseFeatures.get(key);
         if (key.equals("OOVPenalty"))
           // force moses to see it as sparse
           key = "OOV_Penalty";
         outputString.append(String.format("%s=%.3f ", key, value));
       }
     }
     return outputString.toString().trim();
   }

   /***
    * Outputs a list of feature names. All dense features are printed. Feature names are printed
    * in the order they were read in.
    */
   @Override
   public String toString() {
     StringBuilder outputString = new StringBuilder();

     HashSet<String> printed_keys = new HashSet<>();

     // First print all the dense feature names in order
     for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
       outputString.append(String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i), getDense(i)));
       printed_keys.add(DENSE_FEATURE_NAMES.get(i));
     }

     // Now print the rest of the features
     ArrayList<String> keys = new ArrayList<>(sparseFeatures.keySet());
     Collections.sort(keys);
     keys.stream().filter(key -> !printed_keys.contains(key)).forEach(
         key -> outputString.append(String.format("%s=%.3f ", key, sparseFeatures.get(key))));

     return outputString.toString().trim();
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/
	package org.apache.joshua.decoder.ff;

	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.HashMap;
	import java.util.HashSet;
	import java.util.Map;
	import java.util.Set;

	/**
	* An implementation of a sparse feature vector, using for representing both weights and feature
	* values.
	*
	* This class is used to hold both the decoder weights and the feature values accumulated across
	* each edge. When features are read in upon decoder startup, they all start out as sparse features
	* and are stored in the hash table. After the feature functions have been loaded, the decoder
	* queries each of them for their sparse features via {@link registerDenseFeatures}. Those features
	* returned by each decoder are then removed from the sparse feature hash and placed in the dense
	* feature array. Therefore, when a feature registers a dense feature, it should take care to
	* query either {@link org.apache.joshua.decoder.ff.FeatureVector#getDense(int)} or
	* {@link org.apache.joshua.decoder.ff.FeatureVector#getSparse(String)} when asking for the feature
	* values later on.
	*
	* @author Matt Post post@cs.jhu.edu
	*/

	public class FeatureVector {
	/*
	* A list of the dense feature names. Increased via calls to registerDenseFeatures()
	*/
	public static final ArrayList<String> DENSE_FEATURE_NAMES = new ArrayList<>();

	/*
	* The values of each of the dense features, defaulting to 0.
	*/
	private ArrayList<Float> denseFeatures = null;

	/*
	* Value of sparse features.
	*/
	private final HashMap<String, Float> sparseFeatures;

	public FeatureVector() {
	sparseFeatures = new HashMap<>();
	denseFeatures = new ArrayList<>(DENSE_FEATURE_NAMES.size());
	for (int i = 0; i < denseFeatures.size(); i++)
	denseFeatures.set(i, 0.0f);
	}

	/**
	* This version of the constructor takes an uninitialized feature with potentially intermingled
	* labeled and unlabeled feature values, of the format:
	*
	* [feature1=]value [feature2=]value
	*
	* It produces a Feature Vector where all unlabeled features have been labeled by appending the
	* unlabeled feature index (starting at 0) to the defaultPrefix value.
	*
	* IMPORTANT The feature values are inverted, for historical reasons, which leads to a lot
	* of confusion. They have to be inverted here and when the score is actually computed. They
	* are inverted here (which is used to build the feature vector representation of a rule's dense
	* features) and in {@link org.apache.joshua.decoder.ff.tm.Rule#estimateRuleCost(java.util.List)}
	* , where the rule's precomputable (weighted) score is cached.
	*
	* @param featureString, the string of labeled and unlabeled features (probably straight from the
	* grammar text file)
	* @param prefix, the prefix to use for unlabeled features (probably "tm_OWNER_")
	*/
	public FeatureVector(String featureString, String prefix) {

	// System.err.println(String.format("FEATURES_OF(%s, %s)", featureString, prefix));

	/*
	* Read through the features on this rule, adding them to the feature vector. Unlabeled features
	* are converted to a canonical form.
	*
	* Note that it's bad form to mix unlabeled features and the named feature index they are mapped
	* to, but we are being liberal in what we accept.
	*
	* IMPORTANT: Note that, for historical reasons, the sign is reversed on all dense scores.
	* This is the source of no end of confusion and should be done away with.
	*/
	this();

	int denseFeatureIndex = 0;

	if (!featureString.trim().equals("")) {
	for (String token : featureString.split("\\s+")) {
	if (token.indexOf('=') == -1) {
	/*
	* If we encounter an unlabeled feature, it is the next dense feature
	*/
	while (denseFeatures.size() <= denseFeatureIndex)
	denseFeatures.add(0.0f);
	denseFeatures.set(denseFeatureIndex, -Float.parseFloat(token));
	denseFeatureIndex++;
	} else {
	/*
	* Labeled features are of two types: if they start with the prefix, they are actually
	* dense feature in disguise; otherwise, they are proper sparse features.
	*/
	int splitPoint = token.indexOf('=');
	if (token.startsWith(prefix)) {
	// System.err.println(String.format(" PREFIX=%s '%s'.substring(%d,%d) = %s", prefix, token, prefix.length(), splitPoint,
	// token.substring(prefix.length(), splitPoint)));
	int index = Integer.parseInt(token.substring(prefix.length(), splitPoint));
	while (denseFeatures.size() <= index)
	denseFeatures.add(0.0f);
	denseFeatures.set(index, 1.0f * Float.parseFloat(token.substring(splitPoint + 1)));
	} else {
	sparseFeatures.put(token.substring(0, splitPoint),
	Float.parseFloat(token.substring(splitPoint + 1)));
	}
	}
	}
	}
	}

	/**
	* Register one or more dense features with the global weight vector. This assumes them global
	* IDs, and then returns the index of the first feature (from which the calling feature function
	* can infer them all). This must be called by every feature function wishing to register
	* dense features!
	*
	* @param featureFunctions {@link java.util.ArrayList} of {@link org.apache.joshua.decoder.ff.FeatureFunction}'s
	*/
	public void registerDenseFeatures(ArrayList<FeatureFunction> featureFunctions) {
	for (FeatureFunction feature: featureFunctions) {
	ArrayList<String> names = feature.reportDenseFeatures(denseFeatures.size());
	for (String name: names) {
	DENSE_FEATURE_NAMES.add(name);
	denseFeatures.add(getSparse(name));
	sparseFeatures.remove(name);
	}
	}
	}

	public ArrayList<Float> getDenseFeatures() {
	return denseFeatures;
	}

	public HashMap<String,Float> getSparseFeatures() {
	return sparseFeatures;
	}

	public Set<String> keySet() {
	return sparseFeatures.keySet();
	}

	public int size() {
	return sparseFeatures.size() + denseFeatures.size();
	}

	public FeatureVector clone() {
	FeatureVector newOne = new FeatureVector();
	for (String key : this.sparseFeatures.keySet())
	newOne.set(key, this.sparseFeatures.get(key));
	for (int i = 0; i < denseFeatures.size(); i++)
	newOne.set(i, getDense(i));
	return newOne;
	}

	/**
	* Subtracts the weights in the other feature vector from this one. Note that this is not set
	* subtraction; keys found in the other FeatureVector but not in this one will be initialized with
	* a value of 0.0f before subtraction.
	*
	* @param other another {@link org.apache.joshua.decoder.ff.FeatureVector} from which to subtract its score
	*/
	public void subtract(FeatureVector other) {
	for (int i = 0; i < denseFeatures.size(); i++)
	denseFeatures.set(i, getDense(i) - other.getDense(i));

	for (String key : other.keySet()) {
	float oldValue = (sparseFeatures.containsKey(key)) ? sparseFeatures.get(key) : 0.0f;
	sparseFeatures.put(key, oldValue - other.getSparse(key));
	}
	}

	/**
	* Adds the weights in the other feature vector to this one. This is set union, with values shared
	* between the two being summed.
	*
	* @param other another {@link org.apache.joshua.decoder.ff.FeatureVector} from which to add its score
	*/
	public void add(FeatureVector other) {
	while (denseFeatures.size() < other.denseFeatures.size())
	denseFeatures.add(0.0f);

	for (int i = 0; i < other.denseFeatures.size(); i++)
	increment(i, other.getDense(i));

	for (String key : other.keySet()) {
	if (!sparseFeatures.containsKey(key))
	sparseFeatures.put(key, other.getSparse(key));
	else
	sparseFeatures.put(key, sparseFeatures.get(key) + other.getSparse(key));
	}
	}

	/**
	* Return the weight of a feature by name, after checking to determine if it is sparse or dense.
	*
	* @param feature String name of some feature
	* @return the feature's weight
	*/
	public float getWeight(String feature) {
	for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
	if (DENSE_FEATURE_NAMES.get(i).equals(feature)) {
	return getDense(i);
	}
	}
	return getSparse(feature);
	}

	/**
	* Return the weight of a sparse feature, indexed by its name.
	*
	* @param feature String name of some feature
	* @return the sparse feature's weight, or 0 if not found.
	*/
	public float getSparse(String feature) {
	if (sparseFeatures.containsKey(feature))
	return sparseFeatures.get(feature);
	return 0.0f;
	}

	public boolean hasValue(String name) {
	return sparseFeatures.containsKey(name);
	}

	/**
	* Return the weight of a dense feature, indexed by its feature index, or 0.0f, if the feature
	* is not found. In other words, this is a safe way to query the dense feature vector.
	*
	* @param id int representing of some dense feature
	* @return the dense feature's value, or 0 if not found.
	*/
	public float getDense(int id) {
	if (id < denseFeatures.size())
	return denseFeatures.get(id);
	return 0.0f;
	}

	public void increment(String feature, float value) {
	sparseFeatures.put(feature, getSparse(feature) + value);
	}

	public void increment(int id, float value) {
	while (id >= denseFeatures.size())
	denseFeatures.add(0.0f);
	denseFeatures.set(id, getDense(id) + value);
	}

	/**
	* Set the value of a feature. We need to first determine whether the feature is a dense or
	* sparse one, then set accordingly.
	*
	* @param feature String name of some feature
	* @param value float value to set to the featue with the associated name
	*/
	public void set(String feature, float value) {
	for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
	if (DENSE_FEATURE_NAMES.get(i).equals(feature)) {
	denseFeatures.set(i, value);
	return;
	}
	}
	// No dense feature was found; assume it's sparse
	sparseFeatures.put(feature, value);
	}

	public void set(int id, float value) {
	while (id >= denseFeatures.size())
	denseFeatures.add(0.0f);
	denseFeatures.set(id, value);
	}

	public Map<String, Float> getMap() {
	Map<String, Float> allFeatures = new HashMap<>(sparseFeatures.size() + denseFeatures.size());
	allFeatures.putAll(sparseFeatures);
	for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
	allFeatures.put(DENSE_FEATURE_NAMES.get(i), getDense(i));
	}
	return allFeatures;
	}

	/**
	* Computes the inner product between this feature vector and another one.
	*
	* @param other a {@link org.apache.joshua.decoder.ff.FeatureVector} with which to compute the inner product
	* @return float value representing the computation
	*/
	public float innerProduct(FeatureVector other) {
	float cost = 0.0f;
	for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++)
	cost += getDense(i) * other.getDense(i);

	for (String key : sparseFeatures.keySet())
	cost += sparseFeatures.get(key) * other.getSparse(key);

	return cost;
	}

	public void times(float value) {
	for (String key : sparseFeatures.keySet())
	sparseFeatures.put(key, sparseFeatures.get(key) * value);
	}

	/***
	* Moses distinguishes sparse features as those containing an underscore, so we have to fake it
	* to be compatible with their tuners.
	*
	* @return trimmed Moses output string
	*/
	public String mosesString() {
	StringBuilder outputString = new StringBuilder();

	HashSet<String> printed_keys = new HashSet<>();

	// First print all the dense feature names in order
	for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
	outputString.append(String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i).replace("_", "-"), getDense(i)));
	printed_keys.add(DENSE_FEATURE_NAMES.get(i));
	}

	// Now print the sparse features
	ArrayList<String> keys = new ArrayList<>(sparseFeatures.keySet());
	Collections.sort(keys);
	for (String key: keys) {
	if (! printed_keys.contains(key)) {
	float value = sparseFeatures.get(key);
	if (key.equals("OOVPenalty"))
	// force moses to see it as sparse
	key = "OOV_Penalty";
	outputString.append(String.format("%s=%.3f ", key, value));
	}
	}
	return outputString.toString().trim();
	}

	/***
	* Outputs a list of feature names. All dense features are printed. Feature names are printed
	* in the order they were read in.
	*/
	@Override
	public String toString() {
	StringBuilder outputString = new StringBuilder();

	HashSet<String> printed_keys = new HashSet<>();

	// First print all the dense feature names in order
	for (int i = 0; i < DENSE_FEATURE_NAMES.size(); i++) {
	outputString.append(String.format("%s=%.3f ", DENSE_FEATURE_NAMES.get(i), getDense(i)));
	printed_keys.add(DENSE_FEATURE_NAMES.get(i));
	}

	// Now print the rest of the features
	ArrayList<String> keys = new ArrayList<>(sparseFeatures.keySet());
	Collections.sort(keys);
	keys.stream().filter(key -> !printed_keys.contains(key)).forEach(
	key -> outputString.append(String.format("%s=%.3f ", key, sparseFeatures.get(key))));

	return outputString.toString().trim();
	}
	}