src/main/java/org/apache/horn/bsp/AutoEncoder.java - incubator-retired-horn - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.horn.bsp;

 import java.io.IOException;
 import java.util.Map;

 import org.apache.hadoop.fs.Path;
 import org.apache.hama.HamaConfiguration;
 import org.apache.hama.bsp.BSPJob;
 import org.apache.hama.commons.math.DenseDoubleVector;
 import org.apache.hama.commons.math.DoubleFunction;
 import org.apache.hama.commons.math.DoubleMatrix;
 import org.apache.hama.commons.math.DoubleVector;
 import org.apache.hama.commons.math.FunctionFactory;
 import org.apache.hama.ml.util.FeatureTransformer;

 import com.google.common.base.Preconditions;

 /**
  * AutoEncoder is a model used for dimensional reduction and feature learning.
  * It is a special kind of {@link NeuralNetwork} that consists of three layers
  * of neurons, where the first layer and third layer contains the same number of
  * neurons.
  *
  */
 public class AutoEncoder {

   private final SmallLayeredNeuralNetwork model;

   /**
    * Initialize the autoencoder.
    *
    * @param inputDimensions The number of dimensions for the input feature.
    * @param compressedDimensions The number of dimensions for the compressed
    *          information.
    */
   public AutoEncoder(int inputDimensions, int compressedDimensions) {
     model = new SmallLayeredNeuralNetwork();
     model.addLayer(inputDimensions, false,
         FunctionFactory.createDoubleFunction("Sigmoid"));
     model.addLayer(compressedDimensions, false,
         FunctionFactory.createDoubleFunction("Sigmoid"));
     model.addLayer(inputDimensions, true,
         FunctionFactory.createDoubleFunction("Sigmoid"));
     model
         .setLearningStyle(AbstractLayeredNeuralNetwork.LearningStyle.UNSUPERVISED);
     model.setCostFunction(FunctionFactory
         .createDoubleDoubleFunction("SquaredError"));
   }

   public AutoEncoder(HamaConfiguration conf, String modelPath) {
     model = new SmallLayeredNeuralNetwork(conf, modelPath);
   }

   public AutoEncoder setModelPath(String modelPath) {
     model.setModelPath(modelPath);
     return this;
   }

   /**
    * Train the autoencoder with given data. Note that the training data is
    * pre-processed, where the features
    *
    * @param dataInputPath
    * @param trainingParams
    * @throws InterruptedException
    * @throws IOException
    * @throws ClassNotFoundException
    */
   public BSPJob train(HamaConfiguration conf, Path dataInputPath,
       Map<String, String> trainingParams) throws ClassNotFoundException, IOException, InterruptedException {
     return model.train(conf);
   }

   /**
    * Train the model with one instance.
    *
    * @param trainingInstance
    */
   public void trainOnline(DoubleVector trainingInstance) {
     model.trainOnline(trainingInstance);
   }

   /**
    * Get the matrix M used to encode the input features.
    *
    * @return this matrix with encode the input.
    */
   public DoubleMatrix getEncodeWeightMatrix() {
     return model.getWeightsByLayer(0);
   }

   /**
    * Get the matrix M used to decode the compressed information.
    *
    * @return this matrix with decode the compressed information.
    */
   public DoubleMatrix getDecodeWeightMatrix() {
     return model.getWeightsByLayer(1);
   }

   /**
    * Transform the input features.
    *
    * @param inputInstance
    * @return The compressed information.
    */
   private DoubleVector transform(DoubleVector inputInstance, int inputLayer) {
     DoubleVector internalInstance = new DenseDoubleVector(
         inputInstance.getDimension() + 1);
     internalInstance.set(0, 1);
     for (int i = 0; i < inputInstance.getDimension(); ++i) {
       internalInstance.set(i + 1, inputInstance.get(i));
     }
     DoubleFunction squashingFunction = model.getSquashingFunction(inputLayer);
     DoubleMatrix weightMatrix = null;
     if (inputLayer == 0) {
       weightMatrix = this.getEncodeWeightMatrix();
     } else {
       weightMatrix = this.getDecodeWeightMatrix();
     }
     DoubleVector vec = weightMatrix.multiplyVectorUnsafe(internalInstance);
     vec = vec.applyToElements(squashingFunction);
     return vec;
   }

   /**
    * Encode the input instance.
    *
    * @param inputInstance
    * @return a new vector with the encode input instance.
    */
   public DoubleVector encode(DoubleVector inputInstance) {
     Preconditions
         .checkArgument(
             inputInstance.getDimension() == model.getLayerSize(0) - 1,
             String
                 .format(
                     "The dimension of input instance is %d, but the model requires dimension %d.",
                     inputInstance.getDimension(), model.getLayerSize(1) - 1));
     return this.transform(inputInstance, 0);
   }

   /**
    * Decode the input instance.
    *
    * @param inputInstance
    * @return a new vector with the decode input instance.
    */
   public DoubleVector decode(DoubleVector inputInstance) {
     Preconditions
         .checkArgument(
             inputInstance.getDimension() == model.getLayerSize(1) - 1,
             String
                 .format(
                     "The dimension of input instance is %d, but the model requires dimension %d.",
                     inputInstance.getDimension(), model.getLayerSize(1) - 1));
     return this.transform(inputInstance, 1);
   }

   /**
    * Get the label(s) according to the given features.
    *
    * @param inputInstance
    * @return a new vector with output of the model according to given feature
    *         instance.
    */
   public DoubleVector getOutput(DoubleVector inputInstance) {
     return model.getOutput(inputInstance);
   }

   /**
    * Set the feature transformer.
    *
    * @param featureTransformer
    */
   public void setFeatureTransformer(FeatureTransformer featureTransformer) {
     this.model.setFeatureTransformer(featureTransformer);
   }

 }
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.horn.bsp;

	import java.io.IOException;
	import java.util.Map;

	import org.apache.hadoop.fs.Path;
	import org.apache.hama.HamaConfiguration;
	import org.apache.hama.bsp.BSPJob;
	import org.apache.hama.commons.math.DenseDoubleVector;
	import org.apache.hama.commons.math.DoubleFunction;
	import org.apache.hama.commons.math.DoubleMatrix;
	import org.apache.hama.commons.math.DoubleVector;
	import org.apache.hama.commons.math.FunctionFactory;
	import org.apache.hama.ml.util.FeatureTransformer;

	import com.google.common.base.Preconditions;

	/**
	* AutoEncoder is a model used for dimensional reduction and feature learning.
	* It is a special kind of {@link NeuralNetwork} that consists of three layers
	* of neurons, where the first layer and third layer contains the same number of
	* neurons.
	*
	*/
	public class AutoEncoder {

	private final SmallLayeredNeuralNetwork model;

	/**
	* Initialize the autoencoder.
	*
	* @param inputDimensions The number of dimensions for the input feature.
	* @param compressedDimensions The number of dimensions for the compressed
	* information.
	*/
	public AutoEncoder(int inputDimensions, int compressedDimensions) {
	model = new SmallLayeredNeuralNetwork();
	model.addLayer(inputDimensions, false,
	FunctionFactory.createDoubleFunction("Sigmoid"));
	model.addLayer(compressedDimensions, false,
	FunctionFactory.createDoubleFunction("Sigmoid"));
	model.addLayer(inputDimensions, true,
	FunctionFactory.createDoubleFunction("Sigmoid"));
	model
	.setLearningStyle(AbstractLayeredNeuralNetwork.LearningStyle.UNSUPERVISED);
	model.setCostFunction(FunctionFactory
	.createDoubleDoubleFunction("SquaredError"));
	}

	public AutoEncoder(HamaConfiguration conf, String modelPath) {
	model = new SmallLayeredNeuralNetwork(conf, modelPath);
	}

	public AutoEncoder setModelPath(String modelPath) {
	model.setModelPath(modelPath);
	return this;
	}

	/**
	* Train the autoencoder with given data. Note that the training data is
	* pre-processed, where the features
	*
	* @param dataInputPath
	* @param trainingParams
	* @throws InterruptedException
	* @throws IOException
	* @throws ClassNotFoundException
	*/
	public BSPJob train(HamaConfiguration conf, Path dataInputPath,
	Map<String, String> trainingParams) throws ClassNotFoundException, IOException, InterruptedException {
	return model.train(conf);
	}

	/**
	* Train the model with one instance.
	*
	* @param trainingInstance
	*/
	public void trainOnline(DoubleVector trainingInstance) {
	model.trainOnline(trainingInstance);
	}

	/**
	* Get the matrix M used to encode the input features.
	*
	* @return this matrix with encode the input.
	*/
	public DoubleMatrix getEncodeWeightMatrix() {
	return model.getWeightsByLayer(0);
	}

	/**
	* Get the matrix M used to decode the compressed information.
	*
	* @return this matrix with decode the compressed information.
	*/
	public DoubleMatrix getDecodeWeightMatrix() {
	return model.getWeightsByLayer(1);
	}

	/**
	* Transform the input features.
	*
	* @param inputInstance
	* @return The compressed information.
	*/
	private DoubleVector transform(DoubleVector inputInstance, int inputLayer) {
	DoubleVector internalInstance = new DenseDoubleVector(
	inputInstance.getDimension() + 1);
	internalInstance.set(0, 1);
	for (int i = 0; i < inputInstance.getDimension(); ++i) {
	internalInstance.set(i + 1, inputInstance.get(i));
	}
	DoubleFunction squashingFunction = model.getSquashingFunction(inputLayer);
	DoubleMatrix weightMatrix = null;
	if (inputLayer == 0) {
	weightMatrix = this.getEncodeWeightMatrix();
	} else {
	weightMatrix = this.getDecodeWeightMatrix();
	}
	DoubleVector vec = weightMatrix.multiplyVectorUnsafe(internalInstance);
	vec = vec.applyToElements(squashingFunction);
	return vec;
	}

	/**
	* Encode the input instance.
	*
	* @param inputInstance
	* @return a new vector with the encode input instance.
	*/
	public DoubleVector encode(DoubleVector inputInstance) {
	Preconditions
	.checkArgument(
	inputInstance.getDimension() == model.getLayerSize(0) - 1,
	String
	.format(
	"The dimension of input instance is %d, but the model requires dimension %d.",
	inputInstance.getDimension(), model.getLayerSize(1) - 1));
	return this.transform(inputInstance, 0);
	}

	/**
	* Decode the input instance.
	*
	* @param inputInstance
	* @return a new vector with the decode input instance.
	*/
	public DoubleVector decode(DoubleVector inputInstance) {
	Preconditions
	.checkArgument(
	inputInstance.getDimension() == model.getLayerSize(1) - 1,
	String
	.format(
	"The dimension of input instance is %d, but the model requires dimension %d.",
	inputInstance.getDimension(), model.getLayerSize(1) - 1));
	return this.transform(inputInstance, 1);
	}

	/**
	* Get the label(s) according to the given features.
	*
	* @param inputInstance
	* @return a new vector with output of the model according to given feature
	* instance.
	*/
	public DoubleVector getOutput(DoubleVector inputInstance) {
	return model.getOutput(inputInstance);
	}

	/**
	* Set the feature transformer.
	*
	* @param featureTransformer
	*/
	public void setFeatureTransformer(FeatureTransformer featureTransformer) {
	this.model.setFeatureTransformer(featureTransformer);
	}

	}