commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/ml/clustering/ElkanKMeansPlusPlusClusterer.java - commons-math - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.commons.math4.legacy.ml.clustering;

 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;

 import org.apache.commons.rng.UniformRandomProvider;
 import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
 import org.apache.commons.math4.legacy.ml.distance.DistanceMeasure;
 import org.apache.commons.math4.legacy.stat.descriptive.moment.VectorialMean;

 /**
  * Implementation of k-means++ algorithm.
  * It is based on
  * <blockquote>
  *  Elkan, Charles.
  *  "Using the triangle inequality to accelerate k-means."
  *  ICML. Vol. 3. 2003.
  * </blockquote>
  *
  * <p>
  * Algorithm uses triangle inequality to speed up computation, by reducing
  * the amount of distances calculations.  Towards the last iterations of
  * the algorithm, points which already assigned to some cluster are unlikely
  * to move to a new cluster; updates of cluster centers are also usually
  * relatively small.
  * Triangle inequality is thus used to determine the cases where distance
  * computation could be skipped since center move only a little, without
  * affecting points partitioning.
  *
  * <p>
  * For initial centers seeding, we apply the algorithm described in
  * <blockquote>
  *  Arthur, David, and Sergei Vassilvitskii.
  *  "k-means++: The advantages of careful seeding."
  *  Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms.
  *  Society for Industrial and Applied Mathematics, 2007.
  * </blockquote>
  *
  * @param <T> Type of the points to cluster.
  */
 public class ElkanKMeansPlusPlusClusterer<T extends Clusterable>
     extends KMeansPlusPlusClusterer<T> {

     /**
      * @param k Clustering parameter.
      */
     public ElkanKMeansPlusPlusClusterer(int k) {
         super(k);
     }

     /**
      * @param k Clustering parameter.
      * @param maxIterations Allowed number of iterations.
      * @param measure Distance measure.
      * @param random Random generator.
      */
     public ElkanKMeansPlusPlusClusterer(int k,
                                         int maxIterations,
                                         DistanceMeasure measure,
                                         UniformRandomProvider random) {
         super(k, maxIterations, measure, random);
     }

     /**
      * @param k Clustering parameter.
      * @param maxIterations Allowed number of iterations.
      * @param measure Distance measure.
      * @param random Random generator.
      * @param emptyStrategy Strategy for handling empty clusters that
      * may appear during algorithm progress.
      */
     public ElkanKMeansPlusPlusClusterer(int k,
                                         int maxIterations,
                                         DistanceMeasure measure,
                                         UniformRandomProvider random,
                                         EmptyClusterStrategy emptyStrategy) {
         super(k, maxIterations, measure, random, emptyStrategy);
     }

     /** {@inheritDoc} */
     @Override
     public List<CentroidCluster<T>> cluster(final Collection<T> points) {
         final int k = getNumberOfClusters();

         // Number of clusters has to be smaller or equal the number of data points.
         if (points.size() < k) {
             throw new NumberIsTooSmallException(points.size(), k, false);
         }

         final List<T> pointsList = new ArrayList<>(points);
         final int n = points.size();
         final int dim = pointsList.get(0).getPoint().length;

         // Keep minimum intra cluster distance, e.g. for given cluster c s[c] is
         // the distance to the closest cluster c' or s[c] = 1/2 * min_{c'!=c} dist(c', c)
         final double[] s = new double[k];
         Arrays.fill(s, Double.MAX_VALUE);
         // Store the matrix of distances between all cluster centers, e.g. dcc[c1][c2] = distance(c1, c2)
         final double[][] dcc = new double[k][k];

         // For each point keeps the upper bound distance to the cluster center.
         final double[] u = new double[n];
         Arrays.fill(u, Double.MAX_VALUE);

         // For each point and for each cluster keeps the lower bound for the distance between the point and cluster
         final double[][] l = new double[n][k];

         // Seed initial set of cluster centers.
         final double[][] centers = seed(pointsList);

         // Points partitioning induced by cluster centers, e.g. for point xi the value of partitions[xi] indicates
         // the cluster or index of the cluster center which is closest to xi. partitions[xi] = min_{c} distance(xi, c).
         final int[] partitions = partitionPoints(pointsList, centers, u, l);

         final double[] deltas = new double[k];
         VectorialMean[] means = new VectorialMean[k];
         for (int it = 0, max = getMaxIterations();
              it < max;
              it++) {
             int changes = 0;
             // Step I.
             // Compute inter-cluster distances.
             updateIntraCentersDistances(centers, dcc, s);

             for (int xi = 0; xi < n; xi++) {
                 boolean r = true;

                 // Step II.
                 if (u[xi] <= s[partitions[xi]]) {
                     continue;
                 }

                 for (int c = 0; c < k; c++) {
                     // Check condition III.
                     if (isSkipNext(partitions, u, l, dcc, xi, c)) {
                         continue;
                     }

                     final double[] x = pointsList.get(xi).getPoint();

                     // III(a)
                     if (r) {
                         u[xi] = distance(x, centers[partitions[xi]]);
                         l[xi][partitions[xi]] = u[xi];
                         r = false;
                     }
                     // III(b)
                     if (u[xi] > l[xi][c] || u[xi] > dcc[partitions[xi]][c]) {
                         l[xi][c] = distance(x, centers[c]);
                         if (l[xi][c] < u[xi]) {
                             partitions[xi] = c;
                             u[xi] = l[xi][c];
                             ++changes;
                         }
                     }
                 }
             }

             // Stopping criterion.
             if (changes == 0 &&
                 it != 0) { // First iteration needed (to update bounds).
                 break;
             }

             // Step IV.
             Arrays.fill(means, null);
             for (int i = 0; i < n; i++) {
                 if (means[partitions[i]] == null) {
                     means[partitions[i]] = new VectorialMean(dim);
                 }
                 means[partitions[i]].increment(pointsList.get(i).getPoint());
             }

             for (int i = 0; i < k; i++) {
                 deltas[i] = distance(centers[i], means[i].getResult());
                 centers[i] = means[i].getResult();
             }

             updateBounds(partitions, u, l, deltas);
         }

         return buildResults(pointsList, partitions, centers);
     }

     /**
      * kmeans++ seeding which provides guarantee of resulting with log(k) approximation
      * for final clustering results
      * <p>
      * Arthur, David, and Sergei Vassilvitskii. "k-means++: The advantages of careful seeding."
      * Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms.
      * Society for Industrial and Applied Mathematics, 2007.
      *
      * @param points input data points
      * @return an array of initial clusters centers
      *
      */
     private double[][] seed(final List<T> points) {
         final int k = getNumberOfClusters();
         final UniformRandomProvider random = getRandomGenerator();

         final double[][] result = new double[k][];
         final int n = points.size();
         final int pointIndex = random.nextInt(n);

         final double[] minDistances = new double[n];

         int idx = 0;
         result[idx] = points.get(pointIndex).getPoint();

         double sumSqDist = 0;

         for (int i = 0; i < n; i++) {
             final double d = distance(result[idx], points.get(i).getPoint());
             minDistances[i] = d * d;
             sumSqDist += minDistances[i];
         }

         while (++idx < k) {
             final double p = sumSqDist * random.nextDouble();
             int next = 0;
             for (double cdf = 0; cdf < p; next++) {
                 cdf += minDistances[next];
             }

             result[idx] = points.get(next - 1).getPoint();
             for (int i = 0; i < n; i++) {
                 final double d = distance(result[idx], points.get(i).getPoint());
                 sumSqDist -= minDistances[i];
                 minDistances[i] = Math.min(minDistances[i], d * d);
                 sumSqDist += minDistances[i];
             }
         }

         return result;
     }


     /**
      * Once initial centers are chosen, we can actually go through data points and assign points to the
      * cluster based on the distance between initial centers and points.
      *
      * @param pointsList data points list
      * @param centers current clusters centers
      * @param u points upper bounds
      * @param l lower bounds for points to clusters centers
      *
      * @return initial assignment of points into clusters
      */
     private int[] partitionPoints(List<T> pointsList,
                                   double[][] centers,
                                   double[] u,
                                   double[][] l) {
         final int k = getNumberOfClusters();
         final int n = pointsList.size();
         // Points assignments vector.
         final int[] assignments = new int[n];
         Arrays.fill(assignments, -1);
         // Need to assign points to the clusters for the first time and intitialize the lower bound l(x, c)
         for (int i = 0; i < n; i++) {
             final double[] x = pointsList.get(i).getPoint();
             for (int j = 0; j < k; j++) {
                 l[i][j] = distance(x, centers[j]); // l(x, c) = d(x, c)
                 if (u[i] > l[i][j]) {
                     u[i] = l[i][j]; // u(x) = min_c d(x, c)
                     assignments[i] = j; // c(x) = argmin_c d(x, c)
                 }
             }
         }
         return assignments;
     }

     /**
      * Updated distances between clusters centers and for each cluster
      * pick the closest neighbour and keep distance to it.
      *
      * @param centers cluster centers
      * @param dcc matrix of distance between clusters centers, e.g.
      * {@code dcc[i][j] = distance(centers[i], centers[j])}
      * @param s For a given cluster, {@code s[si]} holds distance value
      * to the closest cluster center.
      */
     private void updateIntraCentersDistances(double[][] centers,
                                              double[][] dcc,
                                              double[] s) {
         final int k = getNumberOfClusters();
         for (int i = 0; i < k; i++) {
             // Since distance(xi, xj) == distance(xj, xi), we need to update
             // only upper or lower triangle of the distances matrix and mirror
             // to the lower of upper triangle accordingly, trace has to be all
             // zeros, since distance(xi, xi) == 0.
             for (int j = i + 1; j < k; j++) {
                 dcc[i][j] = 0.5 * distance(centers[i], centers[j]);
                 dcc[j][i] = dcc[i][j];
                 if (dcc[i][j] < s[i]) {
                     s[i] = dcc[i][j];
                 }
                 if (dcc[j][i] < s[j]) {
                     s[j] = dcc[j][i];
                 }
             }
         }
     }

     /**
      * For given points and and cluster, check condition (3) of Elkan algorithm.
      *
      * <ul>
      *  <li>c is not the cluster xi assigned to</li>
      *  <li>{@code u[xi] > l[xi][x]} upper bound for point xi is greater than
      *   lower bound between xi and some cluster c</li>
      *  <li>{@code u[xi] > 1/2 * d(c(xi), c)} upper bound is greater than
      *   distance between center of xi's cluster and c</li>
      * </ul>
      *
      * @param partitions current partition of points into clusters
      * @param u upper bounds for points
      * @param l lower bounds for distance between cluster centers and points
      * @param dcc matrix of distance between clusters centers
      * @param xi index of the point
      * @param c index of the cluster
      * @return true if conditions above satisfied false otherwise
      */
     private static boolean isSkipNext(int[] partitions,
                                       double[] u,
                                       double[][] l,
                                       double[][] dcc,
                                       int xi,
                                       int c) {
         return c == partitions[xi] ||
                u[xi] <= l[xi][c] ||
                u[xi] <= dcc[partitions[xi]][c];
     }

     /**
      * Once kmeans iterations have been converged and no more movements, we can build up the final
      * resulted list of cluster centroids ({@link CentroidCluster}) and assign input points based
      * on the converged partitioning.
      *
      * @param pointsList list of data points
      * @param partitions current partition of points into clusters
      * @param centers cluster centers
      * @return cluster partitioning
      */
     private List<CentroidCluster<T>> buildResults(List<T> pointsList,
                                                   int[] partitions,
                                                   double[][] centers) {
         final int k = getNumberOfClusters();
         final List<CentroidCluster<T>> result = new ArrayList<>();
         for (int i = 0; i < k; i++) {
             final CentroidCluster<T> cluster = new CentroidCluster<>(new DoublePoint(centers[i]));
             result.add(cluster);
         }
         for (int i = 0; i < pointsList.size(); i++) {
             result.get(partitions[i]).addPoint(pointsList.get(i));
         }
         return result;
     }

     /**
      * Based on the distance that cluster center has moved we need to update our upper and lower bound.
      * Worst case assumption, the center of the assigned to given cluster moves away from the point, while
      * centers of over clusters become closer.
      *
      * @param partitions current points assiments to the clusters
      * @param u points upper bounds
      * @param l lower bounds for distances between point and corresponding cluster
      * @param deltas the movement delta for each cluster center
      */
     private void updateBounds(int[] partitions,
                               double[] u,
                               double[][] l,
                               double[] deltas) {
         final int k = getNumberOfClusters();
         for (int i = 0; i < partitions.length; i++) {
             u[i] += deltas[partitions[i]];
             for (int j = 0; j < k; j++) {
                 l[i][j] = Math.max(0, l[i][j] - deltas[j]);
             }
         }
     }

     /**
      * @param a Coordinates.
      * @param b Coordinates.
      * @return the distance between {@code a} and {@code b}.
      */
     private double distance(final double[] a,
                             final double[] b) {
         return getDistanceMeasure().compute(a, b);
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.commons.math4.legacy.ml.clustering;

	import java.util.ArrayList;
	import java.util.Arrays;
	import java.util.Collection;
	import java.util.List;

	import org.apache.commons.rng.UniformRandomProvider;
	import org.apache.commons.math4.legacy.exception.NumberIsTooSmallException;
	import org.apache.commons.math4.legacy.ml.distance.DistanceMeasure;
	import org.apache.commons.math4.legacy.stat.descriptive.moment.VectorialMean;

	/**
	* Implementation of k-means++ algorithm.
	* It is based on
	* <blockquote>
	* Elkan, Charles.
	* "Using the triangle inequality to accelerate k-means."
	* ICML. Vol. 3. 2003.
	* </blockquote>
	*
	* <p>
	* Algorithm uses triangle inequality to speed up computation, by reducing
	* the amount of distances calculations. Towards the last iterations of
	* the algorithm, points which already assigned to some cluster are unlikely
	* to move to a new cluster; updates of cluster centers are also usually
	* relatively small.
	* Triangle inequality is thus used to determine the cases where distance
	* computation could be skipped since center move only a little, without
	* affecting points partitioning.
	*
	* <p>
	* For initial centers seeding, we apply the algorithm described in
	* <blockquote>
	* Arthur, David, and Sergei Vassilvitskii.
	* "k-means++: The advantages of careful seeding."
	* Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms.
	* Society for Industrial and Applied Mathematics, 2007.
	* </blockquote>
	*
	* @param <T> Type of the points to cluster.
	*/
	public class ElkanKMeansPlusPlusClusterer<T extends Clusterable>
	extends KMeansPlusPlusClusterer<T> {

	/**
	* @param k Clustering parameter.
	*/
	public ElkanKMeansPlusPlusClusterer(int k) {
	super(k);
	}

	/**
	* @param k Clustering parameter.
	* @param maxIterations Allowed number of iterations.
	* @param measure Distance measure.
	* @param random Random generator.
	*/
	public ElkanKMeansPlusPlusClusterer(int k,
	int maxIterations,
	DistanceMeasure measure,
	UniformRandomProvider random) {
	super(k, maxIterations, measure, random);
	}

	/**
	* @param k Clustering parameter.
	* @param maxIterations Allowed number of iterations.
	* @param measure Distance measure.
	* @param random Random generator.
	* @param emptyStrategy Strategy for handling empty clusters that
	* may appear during algorithm progress.
	*/
	public ElkanKMeansPlusPlusClusterer(int k,
	int maxIterations,
	DistanceMeasure measure,
	UniformRandomProvider random,
	EmptyClusterStrategy emptyStrategy) {
	super(k, maxIterations, measure, random, emptyStrategy);
	}

	/** {@inheritDoc} */
	@Override
	public List<CentroidCluster<T>> cluster(final Collection<T> points) {
	final int k = getNumberOfClusters();

	// Number of clusters has to be smaller or equal the number of data points.
	if (points.size() < k) {
	throw new NumberIsTooSmallException(points.size(), k, false);
	}

	final List<T> pointsList = new ArrayList<>(points);
	final int n = points.size();
	final int dim = pointsList.get(0).getPoint().length;

	// Keep minimum intra cluster distance, e.g. for given cluster c s[c] is
	// the distance to the closest cluster c' or s[c] = 1/2 * min_{c'!=c} dist(c', c)
	final double[] s = new double[k];
	Arrays.fill(s, Double.MAX_VALUE);
	// Store the matrix of distances between all cluster centers, e.g. dcc[c1][c2] = distance(c1, c2)
	final double[][] dcc = new double[k][k];

	// For each point keeps the upper bound distance to the cluster center.
	final double[] u = new double[n];
	Arrays.fill(u, Double.MAX_VALUE);

	// For each point and for each cluster keeps the lower bound for the distance between the point and cluster
	final double[][] l = new double[n][k];

	// Seed initial set of cluster centers.
	final double[][] centers = seed(pointsList);

	// Points partitioning induced by cluster centers, e.g. for point xi the value of partitions[xi] indicates
	// the cluster or index of the cluster center which is closest to xi. partitions[xi] = min_{c} distance(xi, c).
	final int[] partitions = partitionPoints(pointsList, centers, u, l);

	final double[] deltas = new double[k];
	VectorialMean[] means = new VectorialMean[k];
	for (int it = 0, max = getMaxIterations();
	it < max;
	it++) {
	int changes = 0;
	// Step I.
	// Compute inter-cluster distances.
	updateIntraCentersDistances(centers, dcc, s);

	for (int xi = 0; xi < n; xi++) {
	boolean r = true;

	// Step II.
	if (u[xi] <= s[partitions[xi]]) {
	continue;
	}

	for (int c = 0; c < k; c++) {
	// Check condition III.
	if (isSkipNext(partitions, u, l, dcc, xi, c)) {
	continue;
	}

	final double[] x = pointsList.get(xi).getPoint();

	// III(a)
	if (r) {
	u[xi] = distance(x, centers[partitions[xi]]);
	l[xi][partitions[xi]] = u[xi];
	r = false;
	}
	// III(b)
	if (u[xi] > l[xi][c] \|\| u[xi] > dcc[partitions[xi]][c]) {
	l[xi][c] = distance(x, centers[c]);
	if (l[xi][c] < u[xi]) {
	partitions[xi] = c;
	u[xi] = l[xi][c];
	++changes;
	}
	}
	}
	}

	// Stopping criterion.
	if (changes == 0 &&
	it != 0) { // First iteration needed (to update bounds).
	break;
	}

	// Step IV.
	Arrays.fill(means, null);
	for (int i = 0; i < n; i++) {
	if (means[partitions[i]] == null) {
	means[partitions[i]] = new VectorialMean(dim);
	}
	means[partitions[i]].increment(pointsList.get(i).getPoint());
	}

	for (int i = 0; i < k; i++) {
	deltas[i] = distance(centers[i], means[i].getResult());
	centers[i] = means[i].getResult();
	}

	updateBounds(partitions, u, l, deltas);
	}

	return buildResults(pointsList, partitions, centers);
	}

	/**
	* kmeans++ seeding which provides guarantee of resulting with log(k) approximation
	* for final clustering results
	* <p>
	* Arthur, David, and Sergei Vassilvitskii. "k-means++: The advantages of careful seeding."
	* Proceedings of the eighteenth annual ACM-SIAM symposium on Discrete algorithms.
	* Society for Industrial and Applied Mathematics, 2007.
	*
	* @param points input data points
	* @return an array of initial clusters centers
	*
	*/
	private double[][] seed(final List<T> points) {
	final int k = getNumberOfClusters();
	final UniformRandomProvider random = getRandomGenerator();

	final double[][] result = new double[k][];
	final int n = points.size();
	final int pointIndex = random.nextInt(n);

	final double[] minDistances = new double[n];

	int idx = 0;
	result[idx] = points.get(pointIndex).getPoint();

	double sumSqDist = 0;

	for (int i = 0; i < n; i++) {
	final double d = distance(result[idx], points.get(i).getPoint());
	minDistances[i] = d * d;
	sumSqDist += minDistances[i];
	}

	while (++idx < k) {
	final double p = sumSqDist * random.nextDouble();
	int next = 0;
	for (double cdf = 0; cdf < p; next++) {
	cdf += minDistances[next];
	}

	result[idx] = points.get(next - 1).getPoint();
	for (int i = 0; i < n; i++) {
	final double d = distance(result[idx], points.get(i).getPoint());
	sumSqDist -= minDistances[i];
	minDistances[i] = Math.min(minDistances[i], d * d);
	sumSqDist += minDistances[i];
	}
	}

	return result;
	}


	/**
	* Once initial centers are chosen, we can actually go through data points and assign points to the
	* cluster based on the distance between initial centers and points.
	*
	* @param pointsList data points list
	* @param centers current clusters centers
	* @param u points upper bounds
	* @param l lower bounds for points to clusters centers
	*
	* @return initial assignment of points into clusters
	*/
	private int[] partitionPoints(List<T> pointsList,
	double[][] centers,
	double[] u,
	double[][] l) {
	final int k = getNumberOfClusters();
	final int n = pointsList.size();
	// Points assignments vector.
	final int[] assignments = new int[n];
	Arrays.fill(assignments, -1);
	// Need to assign points to the clusters for the first time and intitialize the lower bound l(x, c)
	for (int i = 0; i < n; i++) {
	final double[] x = pointsList.get(i).getPoint();
	for (int j = 0; j < k; j++) {
	l[i][j] = distance(x, centers[j]); // l(x, c) = d(x, c)
	if (u[i] > l[i][j]) {
	u[i] = l[i][j]; // u(x) = min_c d(x, c)
	assignments[i] = j; // c(x) = argmin_c d(x, c)
	}
	}
	}
	return assignments;
	}

	/**
	* Updated distances between clusters centers and for each cluster
	* pick the closest neighbour and keep distance to it.
	*
	* @param centers cluster centers
	* @param dcc matrix of distance between clusters centers, e.g.
	* {@code dcc[i][j] = distance(centers[i], centers[j])}
	* @param s For a given cluster, {@code s[si]} holds distance value
	* to the closest cluster center.
	*/
	private void updateIntraCentersDistances(double[][] centers,
	double[][] dcc,
	double[] s) {
	final int k = getNumberOfClusters();
	for (int i = 0; i < k; i++) {
	// Since distance(xi, xj) == distance(xj, xi), we need to update
	// only upper or lower triangle of the distances matrix and mirror
	// to the lower of upper triangle accordingly, trace has to be all
	// zeros, since distance(xi, xi) == 0.
	for (int j = i + 1; j < k; j++) {
	dcc[i][j] = 0.5 * distance(centers[i], centers[j]);
	dcc[j][i] = dcc[i][j];
	if (dcc[i][j] < s[i]) {
	s[i] = dcc[i][j];
	}
	if (dcc[j][i] < s[j]) {
	s[j] = dcc[j][i];
	}
	}
	}
	}

	/**
	* For given points and and cluster, check condition (3) of Elkan algorithm.
	*
	* <ul>
	* <li>c is not the cluster xi assigned to</li>
	* <li>{@code u[xi] > l[xi][x]} upper bound for point xi is greater than
	* lower bound between xi and some cluster c</li>
	* <li>{@code u[xi] > 1/2 * d(c(xi), c)} upper bound is greater than
	* distance between center of xi's cluster and c</li>
	* </ul>
	*
	* @param partitions current partition of points into clusters
	* @param u upper bounds for points
	* @param l lower bounds for distance between cluster centers and points
	* @param dcc matrix of distance between clusters centers
	* @param xi index of the point
	* @param c index of the cluster
	* @return true if conditions above satisfied false otherwise
	*/
	private static boolean isSkipNext(int[] partitions,
	double[] u,
	double[][] l,
	double[][] dcc,
	int xi,
	int c) {
	return c == partitions[xi] \|\|
	u[xi] <= l[xi][c] \|\|
	u[xi] <= dcc[partitions[xi]][c];
	}

	/**
	* Once kmeans iterations have been converged and no more movements, we can build up the final
	* resulted list of cluster centroids ({@link CentroidCluster}) and assign input points based
	* on the converged partitioning.
	*
	* @param pointsList list of data points
	* @param partitions current partition of points into clusters
	* @param centers cluster centers
	* @return cluster partitioning
	*/
	private List<CentroidCluster<T>> buildResults(List<T> pointsList,
	int[] partitions,
	double[][] centers) {
	final int k = getNumberOfClusters();
	final List<CentroidCluster<T>> result = new ArrayList<>();
	for (int i = 0; i < k; i++) {
	final CentroidCluster<T> cluster = new CentroidCluster<>(new DoublePoint(centers[i]));
	result.add(cluster);
	}
	for (int i = 0; i < pointsList.size(); i++) {
	result.get(partitions[i]).addPoint(pointsList.get(i));
	}
	return result;
	}

	/**
	* Based on the distance that cluster center has moved we need to update our upper and lower bound.
	* Worst case assumption, the center of the assigned to given cluster moves away from the point, while
	* centers of over clusters become closer.
	*
	* @param partitions current points assiments to the clusters
	* @param u points upper bounds
	* @param l lower bounds for distances between point and corresponding cluster
	* @param deltas the movement delta for each cluster center
	*/
	private void updateBounds(int[] partitions,
	double[] u,
	double[][] l,
	double[] deltas) {
	final int k = getNumberOfClusters();
	for (int i = 0; i < partitions.length; i++) {
	u[i] += deltas[partitions[i]];
	for (int j = 0; j < k; j++) {
	l[i][j] = Math.max(0, l[i][j] - deltas[j]);
	}
	}
	}

	/**
	* @param a Coordinates.
	* @param b Coordinates.
	* @return the distance between {@code a} and {@code b}.
	*/
	private double distance(final double[] a,
	final double[] b) {
	return getDistanceMeasure().compute(a, b);
	}
	}