opennlp-ml/src/main/java/org/apache/opennlp/ml/perceptron/SimplePerceptronSequenceTrainer.java - opennlp-sandbox - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.opennlp.ml.perceptron;

 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;

 import org.apache.opennlp.ml.model.AbstractModel;
 import org.apache.opennlp.ml.model.DataIndexer;
 import org.apache.opennlp.ml.model.Event;
 import org.apache.opennlp.ml.model.IndexHashTable;
 import org.apache.opennlp.ml.model.MutableContext;
 import org.apache.opennlp.ml.model.OnePassDataIndexer;
 import org.apache.opennlp.ml.model.Sequence;
 import org.apache.opennlp.ml.model.SequenceStream;
 import org.apache.opennlp.ml.model.SequenceStreamEventStream;
 import org.apache.opennlp.ml.model.TwoPassDataIndexer;

 /**
  * Trains models for sequences using the perceptron algorithm.  Each outcome is represented as
  * a binary perceptron classifier.  This supports standard (integer) weighting as well
  * average weighting.  Sequence information is used in a simplified was to that described in:
  * Discriminative Training Methods for Hidden Markov Models: Theory and Experiments
  * with the Perceptron Algorithm. Michael Collins, EMNLP 2002.
  * Specifically only updates are applied to tokens which were incorrectly tagged by a sequence tagger
  * rather than to all feature across the sequence which differ from the training sequence.
  */
 public class SimplePerceptronSequenceTrainer {

   private boolean printMessages = true;
   private int iterations;
   private SequenceStream sequenceStream;
   /** Number of events in the event set. */
   private int numEvents;

   /** Number of predicates. */
   private int numPreds;
   private int numOutcomes;

   /** List of outcomes for each event i, in context[i]. */
   private int[] outcomeList;

   private String[] outcomeLabels;

   double[] modelDistribution;

   /** Stores the average parameter values of each predicate during iteration. */
   private MutableContext[] averageParams;

   /** Mapping between context and an integer */
   private IndexHashTable<String> pmap;

   private Map<String,Integer> omap;

   /** Stores the estimated parameter value of each predicate during iteration. */
   private MutableContext[] params;
   private boolean useAverage;
   private int[][][] updates;
   private int VALUE = 0;
   private int ITER = 1;
   private int EVENT = 2;

   private int[] allOutcomesPattern;
   private String[] predLabels;
   int numSequences;

   public AbstractModel trainModel(int iterations, SequenceStream sequenceStream, int cutoff, boolean useAverage) throws IOException {
     this.iterations = iterations;
     this.sequenceStream = sequenceStream;
     DataIndexer di = new OnePassDataIndexer(new SequenceStreamEventStream(sequenceStream),cutoff,false);
     numSequences = 0;
     for (Sequence s : sequenceStream) {
       numSequences++;
     }
     outcomeList  = di.getOutcomeList();
     predLabels = di.getPredLabels();
     pmap = new IndexHashTable<String>(predLabels, 0.7d);

     display("Incorporating indexed data for training...  \n");
     this.useAverage = useAverage;
     numEvents = di.getNumEvents();

     this.iterations = iterations;
     outcomeLabels = di.getOutcomeLabels();
     omap = new HashMap<String,Integer>();
     for (int oli=0;oli<outcomeLabels.length;oli++) {
       omap.put(outcomeLabels[oli], oli);
     }
     outcomeList = di.getOutcomeList();

     numPreds = predLabels.length;
     numOutcomes = outcomeLabels.length;
     if (useAverage) {
       updates = new int[numPreds][numOutcomes][3];
     }

     display("done.\n");

     display("\tNumber of Event Tokens: " + numEvents + "\n");
     display("\t    Number of Outcomes: " + numOutcomes + "\n");
     display("\t  Number of Predicates: " + numPreds + "\n");


     params = new MutableContext[numPreds];
     if (useAverage) averageParams = new MutableContext[numPreds];

     allOutcomesPattern= new int[numOutcomes];
     for (int oi = 0; oi < numOutcomes; oi++) {
       allOutcomesPattern[oi] = oi;
     }

     for (int pi = 0; pi < numPreds; pi++) {
       params[pi]=new MutableContext(allOutcomesPattern,new double[numOutcomes]);
       if (useAverage) averageParams[pi] = new MutableContext(allOutcomesPattern,new double[numOutcomes]);
       for (int aoi=0;aoi<numOutcomes;aoi++) {
         params[pi].setParameter(aoi, 0.0);
         if (useAverage) averageParams[pi].setParameter(aoi, 0.0);
       }
     }
     modelDistribution = new double[numOutcomes];

     display("Computing model parameters...\n");
     findParameters(iterations);
     display("...done.\n");

     /*************** Create and return the model ******************/
     String[] updatedPredLabels = predLabels;
     /*
     String[] updatedPredLabels = new String[pmap.size()];
     for (String pred : pmap.keySet()) {
       updatedPredLabels[pmap.get(pred)]=pred;
     }
     */
     if (useAverage) {
       return new PerceptronModel(averageParams, updatedPredLabels, outcomeLabels);
     }
     else {
       return new PerceptronModel(params, updatedPredLabels, outcomeLabels);
     }
   }

   private void findParameters(int iterations) {
     display("Performing " + iterations + " iterations.\n");
     for (int i = 1; i <= iterations; i++) {
       if (i < 10)
         display("  " + i + ":  ");
       else if (i < 100)
         display(" " + i + ":  ");
       else
         display(i + ":  ");
       nextIteration(i);
     }
     if (useAverage) {
       trainingStats(averageParams);
     }
     else {
       trainingStats(params);
     }
   }

   private void display(String s) {
     if (printMessages)
       System.out.print(s);
   }

   public void nextIteration(int iteration) {
     iteration--; //move to 0-based index
     int numCorrect = 0;
     int oei=0;
     int si=0;
     Map<String,Float>[] featureCounts = new Map[numOutcomes];
     for (int oi=0;oi<numOutcomes;oi++) {
       featureCounts[oi] = new HashMap<String,Float>();
     }
     PerceptronModel model = new PerceptronModel(params,predLabels,pmap,outcomeLabels);
     for (Sequence sequence : sequenceStream) {
       Event[] taggerEvents = sequenceStream.updateContext(sequence, model);
       Event[] events = sequence.getEvents();
       boolean update = false;
       for (int ei=0;ei<events.length;ei++,oei++) {
         if (!taggerEvents[ei].getOutcome().equals(events[ei].getOutcome())) {
           update = true;
           //break;
         }
         else {
           numCorrect++;
         }
       }
       if (update) {
         for (int oi=0;oi<numOutcomes;oi++) {
           featureCounts[oi].clear();
         }
         //System.err.print("train:");for (int ei=0;ei<events.length;ei++) {System.err.print(" "+events[ei].getOutcome());} System.err.println();
         //training feature count computation
         for (int ei=0;ei<events.length;ei++,oei++) {
           String[] contextStrings = events[ei].getContext();
           float values[] = events[ei].getValues();
           int oi = omap.get(events[ei].getOutcome());
           for (int ci=0;ci<contextStrings.length;ci++) {
             float value = 1;
             if (values != null) {
               value = values[ci];
             }
             Float c = featureCounts[oi].get(contextStrings[ci]);
             if (c == null) {
               c = value;
             }
             else {
               c+=value;
             }
             featureCounts[oi].put(contextStrings[ci], c);
           }
         }
         //evaluation feature count computation
         //System.err.print("test: ");for (int ei=0;ei<taggerEvents.length;ei++) {System.err.print(" "+taggerEvents[ei].getOutcome());} System.err.println();
         for (Event taggerEvent : taggerEvents) {
           String[] contextStrings = taggerEvent.getContext();
           float values[] = taggerEvent.getValues();
           int oi = omap.get(taggerEvent.getOutcome());
           for (int ci = 0; ci < contextStrings.length; ci++) {
             float value = 1;
             if (values != null) {
               value = values[ci];
             }
             Float c = featureCounts[oi].get(contextStrings[ci]);
             if (c == null) {
               c = -1*value;
             }
             else {
               c-=value;
             }
             if (c == 0f) {
               featureCounts[oi].remove(contextStrings[ci]);
             }
             else {
               featureCounts[oi].put(contextStrings[ci], c);
             }
           }
         }
         for (int oi=0;oi<numOutcomes;oi++) {
           for (String feature : featureCounts[oi].keySet()) {
             int pi = pmap.get(feature);
             if (pi != -1) {
               //System.err.println(si+" "+outcomeLabels[oi]+" "+feature+" "+featureCounts[oi].get(feature));
               params[pi].updateParameter(oi, featureCounts[oi].get(feature));
               if (useAverage) {
                 if (updates[pi][oi][VALUE] != 0) {
                   averageParams[pi].updateParameter(oi,updates[pi][oi][VALUE]*(numSequences*(iteration-updates[pi][oi][ITER])+(si-updates[pi][oi][EVENT])));
                   //System.err.println("p avp["+pi+"]."+oi+"="+averageParams[pi].getParameters()[oi]);
                 }
                 //System.err.println("p updates["+pi+"]["+oi+"]=("+updates[pi][oi][ITER]+","+updates[pi][oi][EVENT]+","+updates[pi][oi][VALUE]+") + ("+iteration+","+oei+","+params[pi].getParameters()[oi]+") -> "+averageParams[pi].getParameters()[oi]);
                 updates[pi][oi][VALUE] = (int) params[pi].getParameters()[oi];
                 updates[pi][oi][ITER] = iteration;
                 updates[pi][oi][EVENT] = si;
               }
             }
           }
         }
         model = new PerceptronModel(params,predLabels,pmap,outcomeLabels);
       }
       si++;
     }
     //finish average computation
     double totIterations = (double) iterations*si;
     if (useAverage && iteration == iterations-1) {
       for (int pi = 0; pi < numPreds; pi++) {
         double[] predParams = averageParams[pi].getParameters();
         for (int oi = 0;oi<numOutcomes;oi++) {
           if (updates[pi][oi][VALUE] != 0) {
             predParams[oi] +=  updates[pi][oi][VALUE]*(numSequences*(iterations-updates[pi][oi][ITER])-updates[pi][oi][EVENT]);
           }
           if (predParams[oi] != 0) {
             predParams[oi] /=totIterations;
             averageParams[pi].setParameter(oi, predParams[oi]);
             //System.err.println("updates["+pi+"]["+oi+"]=("+updates[pi][oi][ITER]+","+updates[pi][oi][EVENT]+","+updates[pi][oi][VALUE]+") + ("+iterations+","+0+","+params[pi].getParameters()[oi]+") -> "+averageParams[pi].getParameters()[oi]);
           }
         }
       }
     }
     display(". ("+numCorrect+"/"+numEvents+") "+((double) numCorrect / numEvents) + "\n");
   }

   private void trainingStats(MutableContext[] params) {
     int numCorrect = 0;
     int oei=0;
     for (Sequence sequence : sequenceStream) {
       Event[] taggerEvents = sequenceStream.updateContext(sequence, new PerceptronModel(params,predLabels,pmap,outcomeLabels));
       for (int ei=0;ei<taggerEvents.length;ei++,oei++) {
         int max = omap.get(taggerEvents[ei].getOutcome());
         if (max == outcomeList[oei]) {
           numCorrect ++;
         }
       }
     }
     display(". ("+numCorrect+"/"+numEvents+") "+((double) numCorrect / numEvents) + "\n");
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.opennlp.ml.perceptron;

	import java.io.IOException;
	import java.util.HashMap;
	import java.util.Map;

	import org.apache.opennlp.ml.model.AbstractModel;
	import org.apache.opennlp.ml.model.DataIndexer;
	import org.apache.opennlp.ml.model.Event;
	import org.apache.opennlp.ml.model.IndexHashTable;
	import org.apache.opennlp.ml.model.MutableContext;
	import org.apache.opennlp.ml.model.OnePassDataIndexer;
	import org.apache.opennlp.ml.model.Sequence;
	import org.apache.opennlp.ml.model.SequenceStream;
	import org.apache.opennlp.ml.model.SequenceStreamEventStream;
	import org.apache.opennlp.ml.model.TwoPassDataIndexer;

	/**
	* Trains models for sequences using the perceptron algorithm. Each outcome is represented as
	* a binary perceptron classifier. This supports standard (integer) weighting as well
	* average weighting. Sequence information is used in a simplified was to that described in:
	* Discriminative Training Methods for Hidden Markov Models: Theory and Experiments
	* with the Perceptron Algorithm. Michael Collins, EMNLP 2002.
	* Specifically only updates are applied to tokens which were incorrectly tagged by a sequence tagger
	* rather than to all feature across the sequence which differ from the training sequence.
	*/
	public class SimplePerceptronSequenceTrainer {

	private boolean printMessages = true;
	private int iterations;
	private SequenceStream sequenceStream;
	/** Number of events in the event set. */
	private int numEvents;

	/** Number of predicates. */
	private int numPreds;
	private int numOutcomes;

	/** List of outcomes for each event i, in context[i]. */
	private int[] outcomeList;

	private String[] outcomeLabels;

	double[] modelDistribution;

	/** Stores the average parameter values of each predicate during iteration. */
	private MutableContext[] averageParams;

	/** Mapping between context and an integer */
	private IndexHashTable<String> pmap;

	private Map<String,Integer> omap;

	/** Stores the estimated parameter value of each predicate during iteration. */
	private MutableContext[] params;
	private boolean useAverage;
	private int[][][] updates;
	private int VALUE = 0;
	private int ITER = 1;
	private int EVENT = 2;

	private int[] allOutcomesPattern;
	private String[] predLabels;
	int numSequences;

	public AbstractModel trainModel(int iterations, SequenceStream sequenceStream, int cutoff, boolean useAverage) throws IOException {
	this.iterations = iterations;
	this.sequenceStream = sequenceStream;
	DataIndexer di = new OnePassDataIndexer(new SequenceStreamEventStream(sequenceStream),cutoff,false);
	numSequences = 0;
	for (Sequence s : sequenceStream) {
	numSequences++;
	}
	outcomeList = di.getOutcomeList();
	predLabels = di.getPredLabels();
	pmap = new IndexHashTable<String>(predLabels, 0.7d);

	display("Incorporating indexed data for training... \n");
	this.useAverage = useAverage;
	numEvents = di.getNumEvents();

	this.iterations = iterations;
	outcomeLabels = di.getOutcomeLabels();
	omap = new HashMap<String,Integer>();
	for (int oli=0;oli<outcomeLabels.length;oli++) {
	omap.put(outcomeLabels[oli], oli);
	}
	outcomeList = di.getOutcomeList();

	numPreds = predLabels.length;
	numOutcomes = outcomeLabels.length;
	if (useAverage) {
	updates = new int[numPreds][numOutcomes][3];
	}

	display("done.\n");

	display("\tNumber of Event Tokens: " + numEvents + "\n");
	display("\t Number of Outcomes: " + numOutcomes + "\n");
	display("\t Number of Predicates: " + numPreds + "\n");


	params = new MutableContext[numPreds];
	if (useAverage) averageParams = new MutableContext[numPreds];

	allOutcomesPattern= new int[numOutcomes];
	for (int oi = 0; oi < numOutcomes; oi++) {
	allOutcomesPattern[oi] = oi;
	}

	for (int pi = 0; pi < numPreds; pi++) {
	params[pi]=new MutableContext(allOutcomesPattern,new double[numOutcomes]);
	if (useAverage) averageParams[pi] = new MutableContext(allOutcomesPattern,new double[numOutcomes]);
	for (int aoi=0;aoi<numOutcomes;aoi++) {
	params[pi].setParameter(aoi, 0.0);
	if (useAverage) averageParams[pi].setParameter(aoi, 0.0);
	}
	}
	modelDistribution = new double[numOutcomes];

	display("Computing model parameters...\n");
	findParameters(iterations);
	display("...done.\n");

	/************* Create and return the model ****************/
	String[] updatedPredLabels = predLabels;
	/*
	String[] updatedPredLabels = new String[pmap.size()];
	for (String pred : pmap.keySet()) {
	updatedPredLabels[pmap.get(pred)]=pred;
	}
	*/
	if (useAverage) {
	return new PerceptronModel(averageParams, updatedPredLabels, outcomeLabels);
	}
	else {
	return new PerceptronModel(params, updatedPredLabels, outcomeLabels);
	}
	}

	private void findParameters(int iterations) {
	display("Performing " + iterations + " iterations.\n");
	for (int i = 1; i <= iterations; i++) {
	if (i < 10)
	display(" " + i + ": ");
	else if (i < 100)
	display(" " + i + ": ");
	else
	display(i + ": ");
	nextIteration(i);
	}
	if (useAverage) {
	trainingStats(averageParams);
	}
	else {
	trainingStats(params);
	}
	}

	private void display(String s) {
	if (printMessages)
	System.out.print(s);
	}

	public void nextIteration(int iteration) {
	iteration--; //move to 0-based index
	int numCorrect = 0;
	int oei=0;
	int si=0;
	Map<String,Float>[] featureCounts = new Map[numOutcomes];
	for (int oi=0;oi<numOutcomes;oi++) {
	featureCounts[oi] = new HashMap<String,Float>();
	}
	PerceptronModel model = new PerceptronModel(params,predLabels,pmap,outcomeLabels);
	for (Sequence sequence : sequenceStream) {
	Event[] taggerEvents = sequenceStream.updateContext(sequence, model);
	Event[] events = sequence.getEvents();
	boolean update = false;
	for (int ei=0;ei<events.length;ei++,oei++) {
	if (!taggerEvents[ei].getOutcome().equals(events[ei].getOutcome())) {
	update = true;
	//break;
	}
	else {
	numCorrect++;
	}
	}
	if (update) {
	for (int oi=0;oi<numOutcomes;oi++) {
	featureCounts[oi].clear();
	}
	//System.err.print("train:");for (int ei=0;ei<events.length;ei++) {System.err.print(" "+events[ei].getOutcome());} System.err.println();
	//training feature count computation
	for (int ei=0;ei<events.length;ei++,oei++) {
	String[] contextStrings = events[ei].getContext();
	float values[] = events[ei].getValues();
	int oi = omap.get(events[ei].getOutcome());
	for (int ci=0;ci<contextStrings.length;ci++) {
	float value = 1;
	if (values != null) {
	value = values[ci];
	}
	Float c = featureCounts[oi].get(contextStrings[ci]);
	if (c == null) {
	c = value;
	}
	else {
	c+=value;
	}
	featureCounts[oi].put(contextStrings[ci], c);
	}
	}
	//evaluation feature count computation
	//System.err.print("test: ");for (int ei=0;ei<taggerEvents.length;ei++) {System.err.print(" "+taggerEvents[ei].getOutcome());} System.err.println();
	for (Event taggerEvent : taggerEvents) {
	String[] contextStrings = taggerEvent.getContext();
	float values[] = taggerEvent.getValues();
	int oi = omap.get(taggerEvent.getOutcome());
	for (int ci = 0; ci < contextStrings.length; ci++) {
	float value = 1;
	if (values != null) {
	value = values[ci];
	}
	Float c = featureCounts[oi].get(contextStrings[ci]);
	if (c == null) {
	c = -1*value;
	}
	else {
	c-=value;
	}
	if (c == 0f) {
	featureCounts[oi].remove(contextStrings[ci]);
	}
	else {
	featureCounts[oi].put(contextStrings[ci], c);
	}
	}
	}
	for (int oi=0;oi<numOutcomes;oi++) {
	for (String feature : featureCounts[oi].keySet()) {
	int pi = pmap.get(feature);
	if (pi != -1) {
	//System.err.println(si+" "+outcomeLabels[oi]+" "+feature+" "+featureCounts[oi].get(feature));
	params[pi].updateParameter(oi, featureCounts[oi].get(feature));
	if (useAverage) {
	if (updates[pi][oi][VALUE] != 0) {
	averageParams[pi].updateParameter(oi,updates[pi][oi][VALUE](numSequences(iteration-updates[pi][oi][ITER])+(si-updates[pi][oi][EVENT])));
	//System.err.println("p avp["+pi+"]."+oi+"="+averageParams[pi].getParameters()[oi]);
	}
	//System.err.println("p updates["+pi+"]["+oi+"]=("+updates[pi][oi][ITER]+","+updates[pi][oi][EVENT]+","+updates[pi][oi][VALUE]+") + ("+iteration+","+oei+","+params[pi].getParameters()[oi]+") -> "+averageParams[pi].getParameters()[oi]);
	updates[pi][oi][VALUE] = (int) params[pi].getParameters()[oi];
	updates[pi][oi][ITER] = iteration;
	updates[pi][oi][EVENT] = si;
	}
	}
	}
	}
	model = new PerceptronModel(params,predLabels,pmap,outcomeLabels);
	}
	si++;
	}
	//finish average computation
	double totIterations = (double) iterations*si;
	if (useAverage && iteration == iterations-1) {
	for (int pi = 0; pi < numPreds; pi++) {
	double[] predParams = averageParams[pi].getParameters();
	for (int oi = 0;oi<numOutcomes;oi++) {
	if (updates[pi][oi][VALUE] != 0) {
	predParams[oi] += updates[pi][oi][VALUE](numSequences(iterations-updates[pi][oi][ITER])-updates[pi][oi][EVENT]);
	}
	if (predParams[oi] != 0) {
	predParams[oi] /=totIterations;
	averageParams[pi].setParameter(oi, predParams[oi]);
	//System.err.println("updates["+pi+"]["+oi+"]=("+updates[pi][oi][ITER]+","+updates[pi][oi][EVENT]+","+updates[pi][oi][VALUE]+") + ("+iterations+","+0+","+params[pi].getParameters()[oi]+") -> "+averageParams[pi].getParameters()[oi]);
	}
	}
	}
	}
	display(". ("+numCorrect+"/"+numEvents+") "+((double) numCorrect / numEvents) + "\n");
	}

	private void trainingStats(MutableContext[] params) {
	int numCorrect = 0;
	int oei=0;
	for (Sequence sequence : sequenceStream) {
	Event[] taggerEvents = sequenceStream.updateContext(sequence, new PerceptronModel(params,predLabels,pmap,outcomeLabels));
	for (int ei=0;ei<taggerEvents.length;ei++,oei++) {
	int max = omap.get(taggerEvents[ei].getOutcome());
	if (max == outcomeList[oei]) {
	numCorrect ++;
	}
	}
	}
	display(". ("+numCorrect+"/"+numEvents+") "+((double) numCorrect / numEvents) + "\n");
	}
	}