opennlp-ml/src/main/java/org/apache/opennlp/ml/model/AbstractModelReader.java - opennlp-sandbox - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 package org.apache.opennlp.ml.model;

 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.StringTokenizer;
 import java.util.zip.GZIPInputStream;


 public abstract class AbstractModelReader {

   /**
    * The number of predicates contained in the model.
    */
   protected int NUM_PREDS;
   protected DataReader dataReader;

   public AbstractModelReader(File f) throws IOException {
     String filename = f.getName();
     InputStream input;
     // handle the zipped/not zipped distinction
     if (filename.endsWith(".gz")) {
       input = new GZIPInputStream(new FileInputStream(f));
       filename = filename.substring(0,filename.length()-3);
     }
     else {
       input = new FileInputStream(f);
     }

     // handle the different formats
     if (filename.endsWith(".bin")) {
       this.dataReader = new BinaryFileDataReader(input);
     }
     else {  // filename ends with ".txt"
       this.dataReader = new PlainTextFileDataReader(input);
     }
   }

   public AbstractModelReader(DataReader dataReader) {
     super();
     this.dataReader = dataReader;
   }

   /**
    * Implement as needed for the format the model is stored in.
    */
   public int readInt() throws java.io.IOException {
     return dataReader.readInt();
   }

   /**
    * Implement as needed for the format the model is stored in.
    */
   public double readDouble() throws java.io.IOException {
     return dataReader.readDouble();
   }

   /**
    * Implement as needed for the format the model is stored in.
    */
   public String readUTF() throws java.io.IOException {
     return dataReader.readUTF();
   }

   public AbstractModel getModel() throws IOException {
     checkModelType();
     return constructModel();
   }

   public abstract void checkModelType() throws java.io.IOException;

   public abstract AbstractModel constructModel() throws java.io.IOException;

   protected String[] getOutcomes() throws java.io.IOException {
       int numOutcomes = readInt();
       String[] outcomeLabels = new String[numOutcomes];
       for (int i=0; i<numOutcomes; i++) outcomeLabels[i] = readUTF();
       return outcomeLabels;
   }

   protected int[][] getOutcomePatterns() throws java.io.IOException {
       int numOCTypes = readInt();
       int[][] outcomePatterns = new int[numOCTypes][];
       for (int i=0; i<numOCTypes; i++) {
           StringTokenizer tok = new StringTokenizer(readUTF(), " ");
           int[] infoInts = new int[tok.countTokens()];
           for (int j = 0; tok.hasMoreTokens(); j++) {
               infoInts[j] = Integer.parseInt(tok.nextToken());
           }
           outcomePatterns[i] = infoInts;
       }
       return outcomePatterns;
   }

   protected String[] getPredicates() throws java.io.IOException {
       NUM_PREDS = readInt();
       String[] predLabels = new String[NUM_PREDS];
       for (int i=0; i<NUM_PREDS; i++)
           predLabels[i] = readUTF();
       return predLabels;
   }

   /**
    * Reads the parameters from a file and populates an array of context objects.
    * @param outcomePatterns The outcomes patterns for the model.  The first index refers to which
    * outcome pattern (a set of outcomes that occurs with a context) is being specified.  The
    * second index specifies the number of contexts which use this pattern at index 0, and the
    * index of each outcomes which make up this pattern in indicies 1-n.
    * @return An array of context objects.
    * @throws java.io.IOException when the model file does not match the outcome patterns or can not be read.
    */
   protected Context[] getParameters(int[][] outcomePatterns) throws java.io.IOException {
     Context[] params = new Context[NUM_PREDS];
     int pid=0;
     for (int i=0; i<outcomePatterns.length; i++) {
       //construct outcome pattern
       int[] outcomePattern = new int[outcomePatterns[i].length-1];
       for (int k=1; k<outcomePatterns[i].length; k++) {
         outcomePattern[k-1] = outcomePatterns[i][k];
       }
       //System.err.println("outcomePattern "+i+" of "+outcomePatterns.length+" with "+outcomePatterns[i].length+" outcomes ");
       //populate parameters for each context which uses this outcome pattern.
       for (int j=0; j<outcomePatterns[i][0]; j++) {
         double[] contextParameters = new double[outcomePatterns[i].length-1];
         for (int k=1; k<outcomePatterns[i].length; k++) {
           contextParameters[k-1] = readDouble();
         }
         params[pid] = new Context(outcomePattern,contextParameters);
         pid++;
       }
     }
     return params;
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	package org.apache.opennlp.ml.model;

	import java.io.File;
	import java.io.FileInputStream;
	import java.io.IOException;
	import java.io.InputStream;
	import java.util.StringTokenizer;
	import java.util.zip.GZIPInputStream;


	public abstract class AbstractModelReader {

	/**
	* The number of predicates contained in the model.
	*/
	protected int NUM_PREDS;
	protected DataReader dataReader;

	public AbstractModelReader(File f) throws IOException {
	String filename = f.getName();
	InputStream input;
	// handle the zipped/not zipped distinction
	if (filename.endsWith(".gz")) {
	input = new GZIPInputStream(new FileInputStream(f));
	filename = filename.substring(0,filename.length()-3);
	}
	else {
	input = new FileInputStream(f);
	}

	// handle the different formats
	if (filename.endsWith(".bin")) {
	this.dataReader = new BinaryFileDataReader(input);
	}
	else { // filename ends with ".txt"
	this.dataReader = new PlainTextFileDataReader(input);
	}
	}

	public AbstractModelReader(DataReader dataReader) {
	super();
	this.dataReader = dataReader;
	}

	/**
	* Implement as needed for the format the model is stored in.
	*/
	public int readInt() throws java.io.IOException {
	return dataReader.readInt();
	}

	/**
	* Implement as needed for the format the model is stored in.
	*/
	public double readDouble() throws java.io.IOException {
	return dataReader.readDouble();
	}

	/**
	* Implement as needed for the format the model is stored in.
	*/
	public String readUTF() throws java.io.IOException {
	return dataReader.readUTF();
	}

	public AbstractModel getModel() throws IOException {
	checkModelType();
	return constructModel();
	}

	public abstract void checkModelType() throws java.io.IOException;

	public abstract AbstractModel constructModel() throws java.io.IOException;

	protected String[] getOutcomes() throws java.io.IOException {
	int numOutcomes = readInt();
	String[] outcomeLabels = new String[numOutcomes];
	for (int i=0; i<numOutcomes; i++) outcomeLabels[i] = readUTF();
	return outcomeLabels;
	}

	protected int[][] getOutcomePatterns() throws java.io.IOException {
	int numOCTypes = readInt();
	int[][] outcomePatterns = new int[numOCTypes][];
	for (int i=0; i<numOCTypes; i++) {
	StringTokenizer tok = new StringTokenizer(readUTF(), " ");
	int[] infoInts = new int[tok.countTokens()];
	for (int j = 0; tok.hasMoreTokens(); j++) {
	infoInts[j] = Integer.parseInt(tok.nextToken());
	}
	outcomePatterns[i] = infoInts;
	}
	return outcomePatterns;
	}

	protected String[] getPredicates() throws java.io.IOException {
	NUM_PREDS = readInt();
	String[] predLabels = new String[NUM_PREDS];
	for (int i=0; i<NUM_PREDS; i++)
	predLabels[i] = readUTF();
	return predLabels;
	}

	/**
	* Reads the parameters from a file and populates an array of context objects.
	* @param outcomePatterns The outcomes patterns for the model. The first index refers to which
	* outcome pattern (a set of outcomes that occurs with a context) is being specified. The
	* second index specifies the number of contexts which use this pattern at index 0, and the
	* index of each outcomes which make up this pattern in indicies 1-n.
	* @return An array of context objects.
	* @throws java.io.IOException when the model file does not match the outcome patterns or can not be read.
	*/
	protected Context[] getParameters(int[][] outcomePatterns) throws java.io.IOException {
	Context[] params = new Context[NUM_PREDS];
	int pid=0;
	for (int i=0; i<outcomePatterns.length; i++) {
	//construct outcome pattern
	int[] outcomePattern = new int[outcomePatterns[i].length-1];
	for (int k=1; k<outcomePatterns[i].length; k++) {
	outcomePattern[k-1] = outcomePatterns[i][k];
	}
	//System.err.println("outcomePattern "+i+" of "+outcomePatterns.length+" with "+outcomePatterns[i].length+" outcomes ");
	//populate parameters for each context which uses this outcome pattern.
	for (int j=0; j<outcomePatterns[i][0]; j++) {
	double[] contextParameters = new double[outcomePatterns[i].length-1];
	for (int k=1; k<outcomePatterns[i].length; k++) {
	contextParameters[k-1] = readDouble();
	}
	params[pid] = new Context(outcomePattern,contextParameters);
	pid++;
	}
	}
	return params;
	}

	}