blob: 4b1eb207244f1c5b620ecde296501c7b75a1a464 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.math4.ml.neuralnet;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.math4.exception.NoDataException;
import org.apache.commons.math4.ml.distance.DistanceMeasure;
import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D;
import org.apache.commons.math4.util.Pair;
/**
* Utilities for network maps.
*
* @since 3.3
*/
public class MapUtils {
/**
* Class contains only static methods.
*/
private MapUtils() {}
/**
* Finds the neuron that best matches the given features.
*
* @param features Data.
* @param neurons List of neurons to scan. If the list is empty
* {@code null} will be returned.
* @param distance Distance function. The neuron's features are
* passed as the first argument to {@link DistanceMeasure#compute(double[],double[])}.
* @return the neuron whose features are closest to the given data.
* @throws org.apache.commons.math4.exception.DimensionMismatchException
* if the size of the input is not compatible with the neurons features
* size.
*/
public static Neuron findBest(double[] features,
Iterable<Neuron> neurons,
DistanceMeasure distance) {
return new MapRanking(neurons, distance).rank(features, 1).get(0);
}
/**
* Finds the two neurons that best match the given features.
*
* @param features Data.
* @param neurons List of neurons to scan. If the list is empty
* {@code null} will be returned.
* @param distance Distance function. The neuron's features are
* passed as the first argument to {@link DistanceMeasure#compute(double[],double[])}.
* @return the two neurons whose features are closest to the given data.
* @throws org.apache.commons.math4.exception.DimensionMismatchException
* if the size of the input is not compatible with the neurons features
* size.
*/
public static Pair<Neuron, Neuron> findBestAndSecondBest(double[] features,
Iterable<Neuron> neurons,
DistanceMeasure distance) {
final List<Neuron> list = new MapRanking(neurons, distance).rank(features, 2);
return new Pair<>(list.get(0), list.get(1));
}
/**
* Creates a list of neurons sorted in increased order of the distance
* to the given {@code features}.
*
* @param features Data.
* @param neurons List of neurons to scan. If it is empty, an empty array
* will be returned.
* @param distance Distance function.
* @return the neurons, sorted in increasing order of distance in data
* space.
* @throws org.apache.commons.math4.exception.DimensionMismatchException
* if the size of the input is not compatible with the neurons features
* size.
*
* @see #findBest(double[],Iterable,DistanceMeasure)
* @see #findBestAndSecondBest(double[],Iterable,DistanceMeasure)
*
* @since 3.6
*/
public static Neuron[] sort(double[] features,
Iterable<Neuron> neurons,
DistanceMeasure distance) {
return new MapRanking(neurons, distance).rank(features).toArray(new Neuron[0]);
}
/**
* Computes the <a href="http://en.wikipedia.org/wiki/U-Matrix">
* U-matrix</a> of a two-dimensional map.
*
* @param map Network.
* @param distance Function to use for computing the average
* distance from a neuron to its neighbours.
* @return the matrix of average distances.
*/
public static double[][] computeU(NeuronSquareMesh2D map,
DistanceMeasure distance) {
final int numRows = map.getNumberOfRows();
final int numCols = map.getNumberOfColumns();
final double[][] uMatrix = new double[numRows][numCols];
final Network net = map.getNetwork();
for (int i = 0; i < numRows; i++) {
for (int j = 0; j < numCols; j++) {
final Neuron neuron = map.getNeuron(i, j);
final Collection<Neuron> neighbours = net.getNeighbours(neuron);
final double[] features = neuron.getFeatures();
double d = 0;
int count = 0;
for (Neuron n : neighbours) {
++count;
d += distance.compute(features, n.getFeatures());
}
uMatrix[i][j] = d / count;
}
}
return uMatrix;
}
/**
* Computes the "hit" histogram of a two-dimensional map.
*
* @param data Feature vectors.
* @param map Network.
* @param distance Function to use for determining the best matching unit.
* @return the number of hits for each neuron in the map.
*/
public static int[][] computeHitHistogram(Iterable<double[]> data,
NeuronSquareMesh2D map,
DistanceMeasure distance) {
final HashMap<Neuron, Integer> hit = new HashMap<>();
final Network net = map.getNetwork();
for (double[] f : data) {
final Neuron best = findBest(f, net, distance);
final Integer count = hit.get(best);
if (count == null) {
hit.put(best, 1);
} else {
hit.put(best, count + 1);
}
}
// Copy the histogram data into a 2D map.
final int numRows = map.getNumberOfRows();
final int numCols = map.getNumberOfColumns();
final int[][] histo = new int[numRows][numCols];
for (int i = 0; i < numRows; i++) {
for (int j = 0; j < numCols; j++) {
final Neuron neuron = map.getNeuron(i, j);
final Integer count = hit.get(neuron);
if (count == null) {
histo[i][j] = 0;
} else {
histo[i][j] = count;
}
}
}
return histo;
}
/**
* Computes the quantization error.
* The quantization error is the average distance between a feature vector
* and its "best matching unit" (closest neuron).
*
* @param data Feature vectors.
* @param neurons List of neurons to scan.
* @param distance Distance function.
* @return the error.
* @throws NoDataException if {@code data} is empty.
*/
public static double computeQuantizationError(Iterable<double[]> data,
Iterable<Neuron> neurons,
DistanceMeasure distance) {
double d = 0;
int count = 0;
for (double[] f : data) {
++count;
d += distance.compute(f, findBest(f, neurons, distance).getFeatures());
}
if (count == 0) {
throw new NoDataException();
}
return d / count;
}
/**
* Computes the topographic error.
* The topographic error is the proportion of data for which first and
* second best matching units are not adjacent in the map.
*
* @param data Feature vectors.
* @param net Network.
* @param distance Distance function.
* @return the error.
* @throws NoDataException if {@code data} is empty.
*/
public static double computeTopographicError(Iterable<double[]> data,
Network net,
DistanceMeasure distance) {
int notAdjacentCount = 0;
int count = 0;
for (double[] f : data) {
++count;
final Pair<Neuron, Neuron> p = findBestAndSecondBest(f, net, distance);
if (!net.getNeighbours(p.getFirst()).contains(p.getSecond())) {
// Increment count if first and second best matching units
// are not neighbours.
++notAdjacentCount;
}
}
if (count == 0) {
throw new NoDataException();
}
return ((double) notAdjacentCount) / count;
}
}