| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.commons.math4.ml.neuralnet; |
| |
| import java.util.Collection; |
| import java.util.HashMap; |
| import java.util.List; |
| |
| import org.apache.commons.math4.exception.NoDataException; |
| import org.apache.commons.math4.ml.distance.DistanceMeasure; |
| import org.apache.commons.math4.ml.neuralnet.twod.NeuronSquareMesh2D; |
| import org.apache.commons.math4.util.Pair; |
| |
| /** |
| * Utilities for network maps. |
| * |
| * @since 3.3 |
| */ |
| public class MapUtils { |
| /** |
| * Class contains only static methods. |
| */ |
| private MapUtils() {} |
| |
| /** |
| * Finds the neuron that best matches the given features. |
| * |
| * @param features Data. |
| * @param neurons List of neurons to scan. If the list is empty |
| * {@code null} will be returned. |
| * @param distance Distance function. The neuron's features are |
| * passed as the first argument to {@link DistanceMeasure#compute(double[],double[])}. |
| * @return the neuron whose features are closest to the given data. |
| * @throws org.apache.commons.math4.exception.DimensionMismatchException |
| * if the size of the input is not compatible with the neurons features |
| * size. |
| */ |
| public static Neuron findBest(double[] features, |
| Iterable<Neuron> neurons, |
| DistanceMeasure distance) { |
| return new MapRanking(neurons, distance).rank(features, 1).get(0); |
| } |
| |
| /** |
| * Finds the two neurons that best match the given features. |
| * |
| * @param features Data. |
| * @param neurons List of neurons to scan. If the list is empty |
| * {@code null} will be returned. |
| * @param distance Distance function. The neuron's features are |
| * passed as the first argument to {@link DistanceMeasure#compute(double[],double[])}. |
| * @return the two neurons whose features are closest to the given data. |
| * @throws org.apache.commons.math4.exception.DimensionMismatchException |
| * if the size of the input is not compatible with the neurons features |
| * size. |
| */ |
| public static Pair<Neuron, Neuron> findBestAndSecondBest(double[] features, |
| Iterable<Neuron> neurons, |
| DistanceMeasure distance) { |
| final List<Neuron> list = new MapRanking(neurons, distance).rank(features, 2); |
| return new Pair<>(list.get(0), list.get(1)); |
| } |
| |
| /** |
| * Creates a list of neurons sorted in increased order of the distance |
| * to the given {@code features}. |
| * |
| * @param features Data. |
| * @param neurons List of neurons to scan. If it is empty, an empty array |
| * will be returned. |
| * @param distance Distance function. |
| * @return the neurons, sorted in increasing order of distance in data |
| * space. |
| * @throws org.apache.commons.math4.exception.DimensionMismatchException |
| * if the size of the input is not compatible with the neurons features |
| * size. |
| * |
| * @see #findBest(double[],Iterable,DistanceMeasure) |
| * @see #findBestAndSecondBest(double[],Iterable,DistanceMeasure) |
| * |
| * @since 3.6 |
| */ |
| public static Neuron[] sort(double[] features, |
| Iterable<Neuron> neurons, |
| DistanceMeasure distance) { |
| return new MapRanking(neurons, distance).rank(features).toArray(new Neuron[0]); |
| } |
| |
| /** |
| * Computes the <a href="http://en.wikipedia.org/wiki/U-Matrix"> |
| * U-matrix</a> of a two-dimensional map. |
| * |
| * @param map Network. |
| * @param distance Function to use for computing the average |
| * distance from a neuron to its neighbours. |
| * @return the matrix of average distances. |
| */ |
| public static double[][] computeU(NeuronSquareMesh2D map, |
| DistanceMeasure distance) { |
| final int numRows = map.getNumberOfRows(); |
| final int numCols = map.getNumberOfColumns(); |
| final double[][] uMatrix = new double[numRows][numCols]; |
| |
| final Network net = map.getNetwork(); |
| |
| for (int i = 0; i < numRows; i++) { |
| for (int j = 0; j < numCols; j++) { |
| final Neuron neuron = map.getNeuron(i, j); |
| final Collection<Neuron> neighbours = net.getNeighbours(neuron); |
| final double[] features = neuron.getFeatures(); |
| |
| double d = 0; |
| int count = 0; |
| for (Neuron n : neighbours) { |
| ++count; |
| d += distance.compute(features, n.getFeatures()); |
| } |
| |
| uMatrix[i][j] = d / count; |
| } |
| } |
| |
| return uMatrix; |
| } |
| |
| /** |
| * Computes the "hit" histogram of a two-dimensional map. |
| * |
| * @param data Feature vectors. |
| * @param map Network. |
| * @param distance Function to use for determining the best matching unit. |
| * @return the number of hits for each neuron in the map. |
| */ |
| public static int[][] computeHitHistogram(Iterable<double[]> data, |
| NeuronSquareMesh2D map, |
| DistanceMeasure distance) { |
| final HashMap<Neuron, Integer> hit = new HashMap<>(); |
| final Network net = map.getNetwork(); |
| |
| for (double[] f : data) { |
| final Neuron best = findBest(f, net, distance); |
| final Integer count = hit.get(best); |
| if (count == null) { |
| hit.put(best, 1); |
| } else { |
| hit.put(best, count + 1); |
| } |
| } |
| |
| // Copy the histogram data into a 2D map. |
| final int numRows = map.getNumberOfRows(); |
| final int numCols = map.getNumberOfColumns(); |
| final int[][] histo = new int[numRows][numCols]; |
| |
| for (int i = 0; i < numRows; i++) { |
| for (int j = 0; j < numCols; j++) { |
| final Neuron neuron = map.getNeuron(i, j); |
| final Integer count = hit.get(neuron); |
| if (count == null) { |
| histo[i][j] = 0; |
| } else { |
| histo[i][j] = count; |
| } |
| } |
| } |
| |
| return histo; |
| } |
| |
| /** |
| * Computes the quantization error. |
| * The quantization error is the average distance between a feature vector |
| * and its "best matching unit" (closest neuron). |
| * |
| * @param data Feature vectors. |
| * @param neurons List of neurons to scan. |
| * @param distance Distance function. |
| * @return the error. |
| * @throws NoDataException if {@code data} is empty. |
| */ |
| public static double computeQuantizationError(Iterable<double[]> data, |
| Iterable<Neuron> neurons, |
| DistanceMeasure distance) { |
| double d = 0; |
| int count = 0; |
| for (double[] f : data) { |
| ++count; |
| d += distance.compute(f, findBest(f, neurons, distance).getFeatures()); |
| } |
| |
| if (count == 0) { |
| throw new NoDataException(); |
| } |
| |
| return d / count; |
| } |
| |
| /** |
| * Computes the topographic error. |
| * The topographic error is the proportion of data for which first and |
| * second best matching units are not adjacent in the map. |
| * |
| * @param data Feature vectors. |
| * @param net Network. |
| * @param distance Distance function. |
| * @return the error. |
| * @throws NoDataException if {@code data} is empty. |
| */ |
| public static double computeTopographicError(Iterable<double[]> data, |
| Network net, |
| DistanceMeasure distance) { |
| int notAdjacentCount = 0; |
| int count = 0; |
| for (double[] f : data) { |
| ++count; |
| final Pair<Neuron, Neuron> p = findBestAndSecondBest(f, net, distance); |
| if (!net.getNeighbours(p.getFirst()).contains(p.getSecond())) { |
| // Increment count if first and second best matching units |
| // are not neighbours. |
| ++notAdjacentCount; |
| } |
| } |
| |
| if (count == 0) { |
| throw new NoDataException(); |
| } |
| |
| return ((double) notAdjacentCount) / count; |
| } |
| } |