| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.samoa.moa.cluster; |
| |
| import java.util.Arrays; |
| |
| import org.apache.samoa.instances.Instance; |
| |
| /* micro cluster, as defined by Aggarwal et al, On Clustering Massive Data Streams: A Summarization Praradigm |
| * in the book Data streams : models and algorithms, by Charu C Aggarwal |
| * @article{ |
| title = {Data Streams: Models and Algorithms}, |
| author = {Aggarwal, Charu C.}, |
| year = {2007}, |
| publisher = {Springer Science+Business Media, LLC}, |
| url = {http://ebooks.ulb.tu-darmstadt.de/11157/}, |
| institution = {eBooks [http://ebooks.ulb.tu-darmstadt.de/perl/oai2] (Germany)}, |
| } |
| |
| DEFINITION A micro-clusterfor a set of d-dimensionalpoints Xi,. .Xi, |
| with t i m e s t a m p s ~. . .T,, is the (2-d+3)tuple (CF2", CFlX CF2t, CFlt, n), |
| wherein CF2" and CFlX each correspond to a vector of d entries. The definition of each of these entries is as follows: |
| |
| o For each dimension, the sum of the squares of the data values is maintained |
| in CF2". Thus, CF2" contains d values. The p-th entry of CF2" is equal to |
| \sum_j=1^n(x_i_j)^2 |
| |
| o For each dimension, the sum of the data values is maintained in C F l X . |
| Thus, CFIX contains d values. The p-th entry of CFIX is equal to |
| \sum_j=1^n x_i_j |
| |
| o The sum of the squares of the time stamps Ti,. .Tin maintained in CF2t |
| |
| o The sum of the time stamps Ti, . . .Tin maintained in CFlt. |
| |
| o The number of data points is maintained in n. |
| |
| */ |
| public abstract class CFCluster extends SphereCluster { |
| |
| private static final long serialVersionUID = 1L; |
| |
| protected double radiusFactor = 1.8; |
| |
| /** |
| * Number of points in the cluster. |
| */ |
| protected double N; |
| /** |
| * Linear sum of all the points added to the cluster. |
| */ |
| public double[] LS; |
| /** |
| * Squared sum of all the points added to the cluster. |
| */ |
| public double[] SS; |
| |
| /** |
| * Instantiates an empty kernel with the given dimensionality. |
| * |
| * @param dimensions |
| * The number of dimensions of the points that can be in this kernel. |
| */ |
| public CFCluster(Instance instance, int dimensions) { |
| this(instance.toDoubleArray(), dimensions); |
| } |
| |
| protected CFCluster(int dimensions) { |
| this.N = 0; |
| this.LS = new double[dimensions]; |
| this.SS = new double[dimensions]; |
| Arrays.fill(this.LS, 0.0); |
| Arrays.fill(this.SS, 0.0); |
| } |
| |
| public CFCluster(double[] center, int dimensions) { |
| this.N = 1; |
| this.LS = center; |
| this.SS = new double[dimensions]; |
| for (int i = 0; i < SS.length; i++) { |
| SS[i] = Math.pow(center[i], 2); |
| } |
| } |
| |
| public CFCluster(CFCluster cluster) { |
| this.N = cluster.N; |
| this.LS = Arrays.copyOf(cluster.LS, cluster.LS.length); |
| this.SS = Arrays.copyOf(cluster.SS, cluster.SS.length); |
| } |
| |
| public void add(CFCluster cluster) { |
| this.N += cluster.N; |
| addVectors(this.LS, cluster.LS); |
| addVectors(this.SS, cluster.SS); |
| } |
| |
| public abstract CFCluster getCF(); |
| |
| /** |
| * @return this kernels' center |
| */ |
| @Override |
| public double[] getCenter() { |
| assert (this.N > 0); |
| double res[] = new double[this.LS.length]; |
| for (int i = 0; i < res.length; i++) { |
| res[i] = this.LS[i] / N; |
| } |
| return res; |
| } |
| |
| @Override |
| public abstract double getInclusionProbability(Instance instance); |
| |
| /** |
| * See interface <code>Cluster</code> |
| * |
| * @return The radius of the cluster. |
| */ |
| @Override |
| public abstract double getRadius(); |
| |
| /** |
| * See interface <code>Cluster</code> |
| * |
| * @return The weight. |
| * @see Cluster#getWeight() |
| */ |
| @Override |
| public double getWeight() { |
| return N; |
| } |
| |
| public void setN(double N) { |
| this.N = N; |
| } |
| |
| public double getN() { |
| return N; |
| } |
| |
| /** |
| * Adds the second array to the first array element by element. The arrays must have the same length. |
| * |
| * @param a1 |
| * Vector to which the second vector is added. |
| * @param a2 |
| * Vector to be added. This vector does not change. |
| */ |
| public static void addVectors(double[] a1, double[] a2) { |
| assert (a1 != null); |
| assert (a2 != null); |
| assert (a1.length == a2.length) : "Adding two arrays of different " |
| + "length"; |
| |
| for (int i = 0; i < a1.length; i++) { |
| a1[i] += a2[i]; |
| } |
| } |
| } |