| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.commons.math3.stat.clustering; |
| |
| |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.List; |
| import java.util.Random; |
| |
| import org.apache.commons.math3.exception.NumberIsTooSmallException; |
| import org.junit.Assert; |
| import org.junit.Test; |
| |
| @Deprecated |
| public class KMeansPlusPlusClustererTest { |
| |
| @Test |
| public void dimension2() { |
| KMeansPlusPlusClusterer<EuclideanIntegerPoint> transformer = |
| new KMeansPlusPlusClusterer<EuclideanIntegerPoint>(new Random(1746432956321l)); |
| EuclideanIntegerPoint[] points = new EuclideanIntegerPoint[] { |
| |
| // first expected cluster |
| new EuclideanIntegerPoint(new int[] { -15, 3 }), |
| new EuclideanIntegerPoint(new int[] { -15, 4 }), |
| new EuclideanIntegerPoint(new int[] { -15, 5 }), |
| new EuclideanIntegerPoint(new int[] { -14, 3 }), |
| new EuclideanIntegerPoint(new int[] { -14, 5 }), |
| new EuclideanIntegerPoint(new int[] { -13, 3 }), |
| new EuclideanIntegerPoint(new int[] { -13, 4 }), |
| new EuclideanIntegerPoint(new int[] { -13, 5 }), |
| |
| // second expected cluster |
| new EuclideanIntegerPoint(new int[] { -1, 0 }), |
| new EuclideanIntegerPoint(new int[] { -1, -1 }), |
| new EuclideanIntegerPoint(new int[] { 0, -1 }), |
| new EuclideanIntegerPoint(new int[] { 1, -1 }), |
| new EuclideanIntegerPoint(new int[] { 1, -2 }), |
| |
| // third expected cluster |
| new EuclideanIntegerPoint(new int[] { 13, 3 }), |
| new EuclideanIntegerPoint(new int[] { 13, 4 }), |
| new EuclideanIntegerPoint(new int[] { 14, 4 }), |
| new EuclideanIntegerPoint(new int[] { 14, 7 }), |
| new EuclideanIntegerPoint(new int[] { 16, 5 }), |
| new EuclideanIntegerPoint(new int[] { 16, 6 }), |
| new EuclideanIntegerPoint(new int[] { 17, 4 }), |
| new EuclideanIntegerPoint(new int[] { 17, 7 }) |
| |
| }; |
| List<Cluster<EuclideanIntegerPoint>> clusters = |
| transformer.cluster(Arrays.asList(points), 3, 5, 10); |
| |
| Assert.assertEquals(3, clusters.size()); |
| boolean cluster1Found = false; |
| boolean cluster2Found = false; |
| boolean cluster3Found = false; |
| for (Cluster<EuclideanIntegerPoint> cluster : clusters) { |
| int[] center = cluster.getCenter().getPoint(); |
| if (center[0] < 0) { |
| cluster1Found = true; |
| Assert.assertEquals(8, cluster.getPoints().size()); |
| Assert.assertEquals(-14, center[0]); |
| Assert.assertEquals( 4, center[1]); |
| } else if (center[1] < 0) { |
| cluster2Found = true; |
| Assert.assertEquals(5, cluster.getPoints().size()); |
| Assert.assertEquals( 0, center[0]); |
| Assert.assertEquals(-1, center[1]); |
| } else { |
| cluster3Found = true; |
| Assert.assertEquals(8, cluster.getPoints().size()); |
| Assert.assertEquals(15, center[0]); |
| Assert.assertEquals(5, center[1]); |
| } |
| } |
| Assert.assertTrue(cluster1Found); |
| Assert.assertTrue(cluster2Found); |
| Assert.assertTrue(cluster3Found); |
| |
| } |
| |
| /** |
| * JIRA: MATH-305 |
| * |
| * Two points, one cluster, one iteration |
| */ |
| @Test |
| public void testPerformClusterAnalysisDegenerate() { |
| KMeansPlusPlusClusterer<EuclideanIntegerPoint> transformer = new KMeansPlusPlusClusterer<EuclideanIntegerPoint>( |
| new Random(1746432956321l)); |
| EuclideanIntegerPoint[] points = new EuclideanIntegerPoint[] { |
| new EuclideanIntegerPoint(new int[] { 1959, 325100 }), |
| new EuclideanIntegerPoint(new int[] { 1960, 373200 }), }; |
| List<Cluster<EuclideanIntegerPoint>> clusters = transformer.cluster(Arrays.asList(points), 1, 1); |
| Assert.assertEquals(1, clusters.size()); |
| Assert.assertEquals(2, (clusters.get(0).getPoints().size())); |
| EuclideanIntegerPoint pt1 = new EuclideanIntegerPoint(new int[] { 1959, 325100 }); |
| EuclideanIntegerPoint pt2 = new EuclideanIntegerPoint(new int[] { 1960, 373200 }); |
| Assert.assertTrue(clusters.get(0).getPoints().contains(pt1)); |
| Assert.assertTrue(clusters.get(0).getPoints().contains(pt2)); |
| |
| } |
| |
| @Test |
| public void testCertainSpace() { |
| KMeansPlusPlusClusterer.EmptyClusterStrategy[] strategies = { |
| KMeansPlusPlusClusterer.EmptyClusterStrategy.LARGEST_VARIANCE, |
| KMeansPlusPlusClusterer.EmptyClusterStrategy.LARGEST_POINTS_NUMBER, |
| KMeansPlusPlusClusterer.EmptyClusterStrategy.FARTHEST_POINT |
| }; |
| for (KMeansPlusPlusClusterer.EmptyClusterStrategy strategy : strategies) { |
| KMeansPlusPlusClusterer<EuclideanIntegerPoint> transformer = |
| new KMeansPlusPlusClusterer<EuclideanIntegerPoint>(new Random(1746432956321l), strategy); |
| int numberOfVariables = 27; |
| // initialise testvalues |
| int position1 = 1; |
| int position2 = position1 + numberOfVariables; |
| int position3 = position2 + numberOfVariables; |
| int position4 = position3 + numberOfVariables; |
| // testvalues will be multiplied |
| int multiplier = 1000000; |
| |
| EuclideanIntegerPoint[] breakingPoints = new EuclideanIntegerPoint[numberOfVariables]; |
| // define the space which will break the cluster algorithm |
| for (int i = 0; i < numberOfVariables; i++) { |
| int points[] = { position1, position2, position3, position4 }; |
| // multiply the values |
| for (int j = 0; j < points.length; j++) { |
| points[j] *= multiplier; |
| } |
| EuclideanIntegerPoint euclideanIntegerPoint = new EuclideanIntegerPoint(points); |
| breakingPoints[i] = euclideanIntegerPoint; |
| position1 += numberOfVariables; |
| position2 += numberOfVariables; |
| position3 += numberOfVariables; |
| position4 += numberOfVariables; |
| } |
| |
| for (int n = 2; n < 27; ++n) { |
| List<Cluster<EuclideanIntegerPoint>> clusters = |
| transformer.cluster(Arrays.asList(breakingPoints), n, 100); |
| Assert.assertEquals(n, clusters.size()); |
| int sum = 0; |
| for (Cluster<EuclideanIntegerPoint> cluster : clusters) { |
| sum += cluster.getPoints().size(); |
| } |
| Assert.assertEquals(numberOfVariables, sum); |
| } |
| } |
| |
| } |
| |
| /** |
| * A helper class for testSmallDistances(). This class is similar to EuclideanIntegerPoint, but |
| * it defines a different distanceFrom() method that tends to return distances less than 1. |
| */ |
| private class CloseIntegerPoint implements Clusterable<CloseIntegerPoint> { |
| public CloseIntegerPoint(EuclideanIntegerPoint point) { |
| euclideanPoint = point; |
| } |
| |
| public double distanceFrom(CloseIntegerPoint p) { |
| return euclideanPoint.distanceFrom(p.euclideanPoint) * 0.001; |
| } |
| |
| public CloseIntegerPoint centroidOf(Collection<CloseIntegerPoint> p) { |
| Collection<EuclideanIntegerPoint> euclideanPoints = |
| new ArrayList<EuclideanIntegerPoint>(); |
| for (CloseIntegerPoint point : p) { |
| euclideanPoints.add(point.euclideanPoint); |
| } |
| return new CloseIntegerPoint(euclideanPoint.centroidOf(euclideanPoints)); |
| } |
| |
| @Override |
| public boolean equals(Object o) { |
| if (!(o instanceof CloseIntegerPoint)) { |
| return false; |
| } |
| CloseIntegerPoint p = (CloseIntegerPoint) o; |
| |
| return euclideanPoint.equals(p.euclideanPoint); |
| } |
| |
| @Override |
| public int hashCode() { |
| return euclideanPoint.hashCode(); |
| } |
| |
| private EuclideanIntegerPoint euclideanPoint; |
| } |
| |
| /** |
| * Test points that are very close together. See issue MATH-546. |
| */ |
| @Test |
| public void testSmallDistances() { |
| // Create a bunch of CloseIntegerPoints. Most are identical, but one is different by a |
| // small distance. |
| int[] repeatedArray = { 0 }; |
| int[] uniqueArray = { 1 }; |
| CloseIntegerPoint repeatedPoint = |
| new CloseIntegerPoint(new EuclideanIntegerPoint(repeatedArray)); |
| CloseIntegerPoint uniquePoint = |
| new CloseIntegerPoint(new EuclideanIntegerPoint(uniqueArray)); |
| |
| Collection<CloseIntegerPoint> points = new ArrayList<CloseIntegerPoint>(); |
| final int NUM_REPEATED_POINTS = 10 * 1000; |
| for (int i = 0; i < NUM_REPEATED_POINTS; ++i) { |
| points.add(repeatedPoint); |
| } |
| points.add(uniquePoint); |
| |
| // Ask a KMeansPlusPlusClusterer to run zero iterations (i.e., to simply choose initial |
| // cluster centers). |
| final long RANDOM_SEED = 0; |
| final int NUM_CLUSTERS = 2; |
| final int NUM_ITERATIONS = 0; |
| KMeansPlusPlusClusterer<CloseIntegerPoint> clusterer = |
| new KMeansPlusPlusClusterer<CloseIntegerPoint>(new Random(RANDOM_SEED)); |
| List<Cluster<CloseIntegerPoint>> clusters = |
| clusterer.cluster(points, NUM_CLUSTERS, NUM_ITERATIONS); |
| |
| // Check that one of the chosen centers is the unique point. |
| boolean uniquePointIsCenter = false; |
| for (Cluster<CloseIntegerPoint> cluster : clusters) { |
| if (cluster.getCenter().equals(uniquePoint)) { |
| uniquePointIsCenter = true; |
| } |
| } |
| Assert.assertTrue(uniquePointIsCenter); |
| } |
| |
| /** |
| * 2 variables cannot be clustered into 3 clusters. See issue MATH-436. |
| */ |
| @Test(expected=NumberIsTooSmallException.class) |
| public void testPerformClusterAnalysisToManyClusters() { |
| KMeansPlusPlusClusterer<EuclideanIntegerPoint> transformer = |
| new KMeansPlusPlusClusterer<EuclideanIntegerPoint>( |
| new Random(1746432956321l)); |
| |
| EuclideanIntegerPoint[] points = new EuclideanIntegerPoint[] { |
| new EuclideanIntegerPoint(new int[] { |
| 1959, 325100 |
| }), new EuclideanIntegerPoint(new int[] { |
| 1960, 373200 |
| }) |
| }; |
| |
| transformer.cluster(Arrays.asList(points), 3, 1); |
| |
| } |
| |
| } |