blob: a65d9a13aab041bd1a50bc51fc709d82a46bb482 [file] [log] [blame]
package com.yahoo.labs.samoa.moa.streams.clustering;
/*
* #%L
* SAMOA
* %%
* Copyright (C) 2010 RWTH Aachen University, Germany
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.LinkedList;
import java.util.Random;
import java.util.Vector;
import com.yahoo.labs.samoa.moa.cluster.Clustering;
import com.yahoo.labs.samoa.moa.cluster.SphereCluster;
import com.yahoo.labs.samoa.moa.core.AutoExpandVector;
import com.yahoo.labs.samoa.moa.core.InstanceExample;
import com.yahoo.labs.samoa.instances.InstancesHeader;
import com.yahoo.labs.samoa.moa.core.ObjectRepository;
import com.yahoo.labs.samoa.moa.core.DataPoint;
import com.github.javacliparser.FlagOption;
import com.github.javacliparser.FloatOption;
import com.github.javacliparser.IntOption;
import com.yahoo.labs.samoa.moa.streams.InstanceStream;
import com.yahoo.labs.samoa.moa.tasks.TaskMonitor;
import com.yahoo.labs.samoa.instances.Attribute;
import com.yahoo.labs.samoa.instances.DenseInstance;
import com.yahoo.labs.samoa.moa.core.FastVector;
import com.yahoo.labs.samoa.instances.Instance;
import com.yahoo.labs.samoa.instances.Instances;
public class RandomRBFGeneratorEvents extends ClusteringStream {
private transient Vector listeners;
private static final long serialVersionUID = 1L;
public IntOption modelRandomSeedOption = new IntOption("modelRandomSeed",
'm', "Seed for random generation of model.", 1);
public IntOption instanceRandomSeedOption = new IntOption(
"instanceRandomSeed", 'i',
"Seed for random generation of instances.", 5);
public IntOption numClusterOption = new IntOption("numCluster", 'K',
"The average number of centroids in the model.", 5, 1, Integer.MAX_VALUE);
public IntOption numClusterRangeOption = new IntOption("numClusterRange", 'k',
"Deviation of the number of centroids in the model.", 3, 0, Integer.MAX_VALUE);
public FloatOption kernelRadiiOption = new FloatOption("kernelRadius", 'R',
"The average radii of the centroids in the model.", 0.07, 0, 1);
public FloatOption kernelRadiiRangeOption = new FloatOption("kernelRadiusRange", 'r',
"Deviation of average radii of the centroids in the model.", 0, 0, 1);
public FloatOption densityRangeOption = new FloatOption("densityRange", 'd',
"Offset of the average weight a cluster has. Value of 0 means all cluster " +
"contain the same amount of points.", 0, 0, 1);
public IntOption speedOption = new IntOption("speed", 'V',
"Kernels move a predefined distance of 0.01 every X points", 500, 1, Integer.MAX_VALUE);
public IntOption speedRangeOption = new IntOption("speedRange", 'v',
"Speed/Velocity point offset", 0, 0, Integer.MAX_VALUE);
public FloatOption noiseLevelOption = new FloatOption("noiseLevel", 'N',
"Noise level", 0.1, 0, 1);
public FlagOption noiseInClusterOption = new FlagOption("noiseInCluster", 'n',
"Allow noise to be placed within a cluster");
public IntOption eventFrequencyOption = new IntOption("eventFrequency", 'E',
"Event frequency. Enable at least one of the events below and set numClusterRange!", 30000, 0, Integer.MAX_VALUE);
public FlagOption eventMergeSplitOption = new FlagOption("eventMergeSplitOption", 'M',
"Enable merging and splitting of clusters. Set eventFrequency and numClusterRange!");
public FlagOption eventDeleteCreateOption = new FlagOption("eventDeleteCreate", 'C',
"Enable emering and disapperaing of clusters. Set eventFrequency and numClusterRange!");
private double merge_threshold = 0.7;
private int kernelMovePointFrequency = 10;
private double maxDistanceMoveThresholdByStep = 0.01;
private int maxOverlapFitRuns = 50;
private double eventFrequencyRange = 0;
private boolean debug = false;
private AutoExpandVector<GeneratorCluster> kernels;
protected Random instanceRandom;
protected InstancesHeader streamHeader;
private int numGeneratedInstances;
private int numActiveKernels;
private int nextEventCounter;
private int nextEventChoice = -1;
private int clusterIdCounter;
private GeneratorCluster mergeClusterA;
private GeneratorCluster mergeClusterB;
private boolean mergeKernelsOverlapping = false;
private class GeneratorCluster implements Serializable{
//TODO: points is redundant to microclusterpoints, we need to come
//up with a good strategy that microclusters get updated and
//rebuild if needed. Idea: Sort microclusterpoints by timestamp and let
// microclusterdecay hold the timestamp for when the last point in a
//microcluster gets kicked, then we rebuild... or maybe not... could be
//same as searching for point to be kicked. more likely is we rebuild
//fewer times then insert.
private static final long serialVersionUID = -6301649898961112942L;
SphereCluster generator;
int kill = -1;
boolean merging = false;
double[] moveVector;
int totalMovementSteps;
int currentMovementSteps;
boolean isSplitting = false;
LinkedList<DataPoint> points = new LinkedList<DataPoint>();
ArrayList<SphereCluster> microClusters = new ArrayList<SphereCluster>();
ArrayList<ArrayList<DataPoint>> microClustersPoints = new ArrayList();
ArrayList<Integer> microClustersDecay = new ArrayList();
public GeneratorCluster(int label) {
boolean outofbounds = true;
int tryCounter = 0;
while(outofbounds && tryCounter < maxOverlapFitRuns){
tryCounter++;
outofbounds = false;
double[] center = new double [numAttsOption.getValue()];
double radius = kernelRadiiOption.getValue()+(instanceRandom.nextBoolean()?-1:1)*kernelRadiiRangeOption.getValue()*instanceRandom.nextDouble();
while(radius <= 0){
radius = kernelRadiiOption.getValue()+(instanceRandom.nextBoolean()?-1:1)*kernelRadiiRangeOption.getValue()*instanceRandom.nextDouble();
}
for (int j = 0; j < numAttsOption.getValue(); j++) {
center[j] = instanceRandom.nextDouble();
if(center[j]- radius < 0 || center[j] + radius > 1){
outofbounds = true;
break;
}
}
generator = new SphereCluster(center, radius);
}
if(tryCounter < maxOverlapFitRuns){
generator.setId(label);
double avgWeight = 1.0/numClusterOption.getValue();
double weight = avgWeight + (instanceRandom.nextBoolean()?-1:1)*avgWeight*densityRangeOption.getValue()*instanceRandom.nextDouble();
generator.setWeight(weight);
setDesitnation(null);
}
else{
generator = null;
kill = 0;
System.out.println("Tried "+maxOverlapFitRuns+" times to create kernel. Reduce average radii." );
}
}
public GeneratorCluster(int label, SphereCluster cluster) {
this.generator = cluster;
cluster.setId(label);
setDesitnation(null);
}
public int getWorkID(){
for(int c = 0; c < kernels.size(); c++){
if(kernels.get(c).equals(this))
return c;
}
return -1;
}
private void updateKernel(){
if(kill == 0){
kernels.remove(this);
}
if(kill > 0){
kill--;
}
//we could be lot more precise if we would keep track of timestamps of points
//then we could remove all old points and rebuild the cluster on up to date point base
//BUT worse the effort??? so far we just want to avoid overlap with this, so its more
//konservative as needed. Only needs to change when we need a thighter representation
for (int m = 0; m < microClusters.size(); m++) {
if(numGeneratedInstances-microClustersDecay.get(m) > decayHorizonOption.getValue()){
microClusters.remove(m);
microClustersPoints.remove(m);
microClustersDecay.remove(m);
}
}
if(!points.isEmpty() && numGeneratedInstances-points.getFirst().getTimestamp() >= decayHorizonOption.getValue()){
// if(debug)
// System.out.println("Cleaning up macro cluster "+generator.getId());
points.removeFirst();
}
}
private void addInstance(Instance instance){
DataPoint point = new DataPoint(instance, numGeneratedInstances);
points.add(point);
int minMicroIndex = -1;
double minHullDist = Double.MAX_VALUE;
boolean inserted = false;
//we favour more recently build clusters so we can remove earlier cluster sooner
for (int m = microClusters.size()-1; m >=0 ; m--) {
SphereCluster micro = microClusters.get(m);
double hulldist = micro.getCenterDistance(point)-micro.getRadius();
//point fits into existing cluster
if(hulldist <= 0){
microClustersPoints.get(m).add(point);
microClustersDecay.set(m, numGeneratedInstances);
inserted = true;
break;
}
//if not, check if its at least the closest cluster
else{
if(hulldist < minHullDist){
minMicroIndex = m;
minHullDist = hulldist;
}
}
}
//Reseting index choice for alternative cluster building
int alt = 1;
if(alt == 1)
minMicroIndex = -1;
if(!inserted){
//add to closest cluster and expand cluster
if(minMicroIndex!=-1){
microClustersPoints.get(minMicroIndex).add(point);
//we should keep the miniball instances and just check in
//new points instead of rebuilding the whole thing
SphereCluster s = new SphereCluster(microClustersPoints.get(minMicroIndex),numAttsOption.getValue());
//check if current microcluster is bigger then generating cluster
if(s.getRadius() > generator.getRadius()){
//remove previously added point
microClustersPoints.get(minMicroIndex).remove(microClustersPoints.get(minMicroIndex).size()-1);
minMicroIndex = -1;
}
else{
microClusters.set(minMicroIndex, s);
microClustersDecay.set(minMicroIndex, numGeneratedInstances);
}
}
//minMicroIndex might have been reset above
//create new micro cluster
if(minMicroIndex == -1){
ArrayList<DataPoint> microPoints = new ArrayList<DataPoint>();
microPoints.add(point);
SphereCluster s;
if(alt == 0)
s = new SphereCluster(microPoints,numAttsOption.getValue());
else
s = new SphereCluster(generator.getCenter(),generator.getRadius(),1);
microClusters.add(s);
microClustersPoints.add(microPoints);
microClustersDecay.add(numGeneratedInstances);
int id = 0;
while(id < kernels.size()){
if(kernels.get(id) == this)
break;
id++;
}
s.setGroundTruth(id);
}
}
}
private void move(){
if(currentMovementSteps < totalMovementSteps){
currentMovementSteps++;
if( moveVector == null){
return;
}
else{
double[] center = generator.getCenter();
boolean outofbounds = true;
while(outofbounds){
double radius = generator.getRadius();
outofbounds = false;
center = generator.getCenter();
for ( int d = 0; d < center.length; d++ ) {
center[d]+= moveVector[d];
if(center[d]- radius < 0 || center[d] + radius > 1){
outofbounds = true;
setDesitnation(null);
break;
}
}
}
generator.setCenter(center);
}
}
else{
if(!merging){
setDesitnation(null);
isSplitting = false;
}
}
}
void setDesitnation(double[] destination){
if(destination == null){
destination = new double [numAttsOption.getValue()];
for (int j = 0; j < numAttsOption.getValue(); j++) {
destination[j] = instanceRandom.nextDouble();
}
}
double[] center = generator.getCenter();
int dim = center.length;
double[] v = new double[dim];
for ( int d = 0; d < dim; d++ ) {
v[d]=destination[d]-center[d];
}
setMoveVector(v);
}
void setMoveVector(double[] vector){
//we are ignoring the steps, otherwise we have to change
//speed of the kernels, do we want that?
moveVector = vector;
int speedInPoints = speedOption.getValue();
if(speedRangeOption.getValue() > 0)
speedInPoints +=(instanceRandom.nextBoolean()?-1:1)*instanceRandom.nextInt(speedRangeOption.getValue());
if(speedInPoints < 1) speedInPoints = speedOption.getValue();
double length = 0;
for ( int d = 0; d < moveVector.length; d++ ) {
length+=Math.pow(vector[d],2);
}
length = Math.sqrt(length);
totalMovementSteps = (int)(length/(maxDistanceMoveThresholdByStep*kernelMovePointFrequency)*speedInPoints);
for ( int d = 0; d < moveVector.length; d++ ) {
moveVector[d]/=(double)totalMovementSteps;
}
currentMovementSteps = 0;
// if(debug){
// System.out.println("Setting new direction for C"+generator.getId()+": distance "
// +length+" in "+totalMovementSteps+" steps");
// }
}
private String tryMerging(GeneratorCluster merge){
String message = "";
double overlapDegree = generator.overlapRadiusDegree(merge.generator);
if(overlapDegree > merge_threshold){
SphereCluster mcluster = merge.generator;
double radius = Math.max(generator.getRadius(), mcluster.getRadius());
generator.combine(mcluster);
// //adjust radius, get bigger and bigger with high dim data
generator.setRadius(radius);
// double[] center = generator.getCenter();
// double[] mcenter = mcluster.getCenter();
// double weight = generator.getWeight();
// double mweight = generator.getWeight();
//// for (int i = 0; i < center.length; i++) {
//// center[i] = (center[i] * weight + mcenter[i] * mweight) / (mweight + weight);
//// }
// generator.setWeight(weight + mweight);
message = "Clusters merging: "+mergeClusterB.generator.getId()+" into "+mergeClusterA.generator.getId();
//clean up and restet merging stuff
//mark kernel so it gets killed when it doesn't contain any more instances
merge.kill = decayHorizonOption.getValue();
//set weight to 0 so no new instances will be created in the cluster
mcluster.setWeight(0.0);
normalizeWeights();
numActiveKernels--;
mergeClusterB = mergeClusterA = null;
merging = false;
mergeKernelsOverlapping = false;
}
else{
if(overlapDegree > 0 && !mergeKernelsOverlapping){
mergeKernelsOverlapping = true;
message = "Merge overlapping started";
}
}
return message;
}
private String splitKernel(){
isSplitting = true;
//todo radius range
double radius = kernelRadiiOption.getValue();
double avgWeight = 1.0/numClusterOption.getValue();
double weight = avgWeight + avgWeight*densityRangeOption.getValue()*instanceRandom.nextDouble();
SphereCluster spcluster = null;
double[] center = generator.getCenter();
spcluster = new SphereCluster(center, radius, weight);
if(spcluster !=null){
GeneratorCluster gc = new GeneratorCluster(clusterIdCounter++, spcluster);
gc.isSplitting = true;
kernels.add(gc);
normalizeWeights();
numActiveKernels++;
return "Split from Kernel "+generator.getId();
}
else{
System.out.println("Tried to split new kernel from C"+generator.getId()+
". Not enough room for new cluster, decrease average radii, number of clusters or enable overlap.");
return "";
}
}
private String fadeOut(){
kill = decayHorizonOption.getValue();
generator.setWeight(0.0);
numActiveKernels--;
normalizeWeights();
return "Fading out C"+generator.getId();
}
}
public RandomRBFGeneratorEvents() {
noiseInClusterOption.set();
// eventDeleteCreateOption.set();
// eventMergeSplitOption.set();
}
public InstancesHeader getHeader() {
return streamHeader;
}
public long estimatedRemainingInstances() {
return -1;
}
public boolean hasMoreInstances() {
return true;
}
public boolean isRestartable() {
return true;
}
@Override
public void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) {
monitor.setCurrentActivity("Preparing random RBF...", -1.0);
generateHeader();
restart();
}
public void restart() {
instanceRandom = new Random(instanceRandomSeedOption.getValue());
nextEventCounter = eventFrequencyOption.getValue();
nextEventChoice = getNextEvent();
numActiveKernels = 0;
numGeneratedInstances = 0;
clusterIdCounter = 0;
mergeClusterA = mergeClusterB = null;
kernels = new AutoExpandVector<GeneratorCluster>();
initKernels();
}
protected void generateHeader() { // 2013/06/02: Noise label
ArrayList<Attribute> attributes = new ArrayList<Attribute>();
for (int i = 0; i < this.numAttsOption.getValue(); i++) {
attributes.add(new Attribute("att" + (i + 1)));
}
ArrayList<String> classLabels = new ArrayList<String>();
for (int i = 0; i < this.numClusterOption.getValue(); i++) {
classLabels.add("class" + (i + 1));
}
if (noiseLevelOption.getValue() > 0) classLabels.add("noise"); // The last label = "noise"
attributes.add(new Attribute("class", classLabels));
streamHeader = new InstancesHeader(new Instances(getCLICreationString(InstanceStream.class), attributes, 0));
streamHeader.setClassIndex(streamHeader.numAttributes() - 1);
}
protected void initKernels() {
for (int i = 0; i < numClusterOption.getValue(); i++) {
kernels.add(new GeneratorCluster(clusterIdCounter));
numActiveKernels++;
clusterIdCounter++;
}
normalizeWeights();
}
public InstanceExample nextInstance() {
numGeneratedInstances++;
eventScheduler();
//make room for the classlabel
double[] values_new = new double [numAttsOption.getValue()]; //+1
double[] values = null;
int clusterChoice = -1;
if(instanceRandom.nextDouble() > noiseLevelOption.getValue()){
clusterChoice = chooseWeightedElement();
values = kernels.get(clusterChoice).generator.sample(instanceRandom).toDoubleArray();
}
else{
//get ranodm noise point
values = getNoisePoint();
}
if(Double.isNaN(values[0])){
System.out.println("Instance corrupted:"+numGeneratedInstances);
}
System.arraycopy(values, 0, values_new, 0, values.length);
Instance inst = new DenseInstance(1.0, values_new);
inst.setDataset(getHeader());
if(clusterChoice == -1){
// 2013/06/02 (Yunsu Kim)
// Noise instance has the last class value instead of "-1"
// Preventing ArrayIndexOutOfBoundsException in WriteStreamToARFFFile
inst.setClassValue(numClusterOption.getValue());
}
else{
inst.setClassValue(kernels.get(clusterChoice).generator.getId());
//Do we need micro cluster representation if have overlapping clusters?
//if(!overlappingOption.isSet())
kernels.get(clusterChoice).addInstance(inst);
}
// System.out.println(numGeneratedInstances+": Overlap is"+updateOverlaps());
return new InstanceExample(inst);
}
public Clustering getGeneratingClusters(){
Clustering clustering = new Clustering();
for (int c = 0; c < kernels.size(); c++) {
clustering.add(kernels.get(c).generator);
}
return clustering;
}
public Clustering getMicroClustering(){
Clustering clustering = new Clustering();
int id = 0;
for (int c = 0; c < kernels.size(); c++) {
for (int m = 0; m < kernels.get(c).microClusters.size(); m++) {
kernels.get(c).microClusters.get(m).setId(id);
kernels.get(c).microClusters.get(m).setGroundTruth(kernels.get(c).generator.getId());
clustering.add(kernels.get(c).microClusters.get(m));
id++;
}
}
//System.out.println("numMicroKernels "+clustering.size());
return clustering;
}
/**************************** EVENTS ******************************************/
private void eventScheduler(){
for ( int i = 0; i < kernels.size(); i++ ) {
kernels.get(i).updateKernel();
}
nextEventCounter--;
//only move kernels every 10 points, performance reasons????
//should this be randomized as well???
if(nextEventCounter%kernelMovePointFrequency == 0){
//move kernels
for ( int i = 0; i < kernels.size(); i++ ) {
kernels.get(i).move();
//overlapControl();
}
}
if(eventFrequencyOption.getValue() == 0){
return;
}
String type ="";
String message ="";
boolean eventFinished = false;
switch(nextEventChoice){
case 0:
if(numActiveKernels > 1 && numActiveKernels > numClusterOption.getValue() - numClusterRangeOption.getValue()){
message = mergeKernels(nextEventCounter);
type = "Merge";
}
if(mergeClusterA==null && mergeClusterB==null && message.startsWith("Clusters merging")){
eventFinished = true;
}
break;
case 1:
if(nextEventCounter<=0){
if(numActiveKernels < numClusterOption.getValue() + numClusterRangeOption.getValue()){
type = "Split";
message = splitKernel();
}
eventFinished = true;
}
break;
case 2:
if(nextEventCounter<=0){
if(numActiveKernels > 1 && numActiveKernels > numClusterOption.getValue() - numClusterRangeOption.getValue()){
message = fadeOut();
type = "Delete";
}
eventFinished = true;
}
break;
case 3:
if(nextEventCounter<=0){
if(numActiveKernels < numClusterOption.getValue() + numClusterRangeOption.getValue()){
message = fadeIn();
type = "Create";
}
eventFinished = true;
}
break;
}
if (eventFinished){
nextEventCounter = (int)(eventFrequencyOption.getValue()+(instanceRandom.nextBoolean()?-1:1)*eventFrequencyOption.getValue()*eventFrequencyRange*instanceRandom.nextDouble());
nextEventChoice = getNextEvent();
//System.out.println("Next event choice: "+nextEventChoice);
}
if(!message.isEmpty()){
message+=" (numKernels = "+numActiveKernels+" at "+numGeneratedInstances+")";
if(!type.equals("Merge") || message.startsWith("Clusters merging"))
fireClusterChange(numGeneratedInstances, type, message);
}
}
private int getNextEvent() {
int choice = -1;
boolean lowerLimit = numActiveKernels <= numClusterOption.getValue() - numClusterRangeOption.getValue();
boolean upperLimit = numActiveKernels >= numClusterOption.getValue() + numClusterRangeOption.getValue();
if(!lowerLimit || !upperLimit){
int mode = -1;
if(eventDeleteCreateOption.isSet() && eventMergeSplitOption.isSet()){
mode = instanceRandom.nextInt(2);
}
if(mode==0 || (mode==-1 && eventMergeSplitOption.isSet())){
//have we reached a limit? if not free choice
if(!lowerLimit && !upperLimit)
choice = instanceRandom.nextInt(2);
else
//we have a limit. if lower limit, choose split
if(lowerLimit)
choice = 1;
//otherwise we reached upper level, choose merge
else
choice = 0;
}
if(mode==1 || (mode==-1 && eventDeleteCreateOption.isSet())){
//have we reached a limit? if not free choice
if(!lowerLimit && !upperLimit)
choice = instanceRandom.nextInt(2)+2;
else
//we have a limit. if lower limit, choose create
if(lowerLimit)
choice = 3;
//otherwise we reached upper level, choose delete
else
choice = 2;
}
}
return choice;
}
private String fadeOut(){
int id = instanceRandom.nextInt(kernels.size());
while(kernels.get(id).kill!=-1)
id = instanceRandom.nextInt(kernels.size());
String message = kernels.get(id).fadeOut();
return message;
}
private String fadeIn(){
GeneratorCluster gc = new GeneratorCluster(clusterIdCounter++);
kernels.add(gc);
numActiveKernels++;
normalizeWeights();
return "Creating new cluster";
}
private String changeWeight(boolean increase){
double changeRate = 0.1;
int id = instanceRandom.nextInt(kernels.size());
while(kernels.get(id).kill!=-1)
id = instanceRandom.nextInt(kernels.size());
int sign = 1;
if(!increase)
sign = -1;
double weight_old = kernels.get(id).generator.getWeight();
double delta = sign*numActiveKernels*instanceRandom.nextDouble()*changeRate;
kernels.get(id).generator.setWeight(weight_old+delta);
normalizeWeights();
String message;
if(increase)
message = "Increase ";
else
message = "Decrease ";
message+=" weight on Cluster "+id+" from "+weight_old+" to "+(weight_old+delta);
return message;
}
private String changeRadius(boolean increase){
double maxChangeRate = 0.1;
int id = instanceRandom.nextInt(kernels.size());
while(kernels.get(id).kill!=-1)
id = instanceRandom.nextInt(kernels.size());
int sign = 1;
if(!increase)
sign = -1;
double r_old = kernels.get(id).generator.getRadius();
double r_new =r_old+sign*r_old*instanceRandom.nextDouble()*maxChangeRate;
if(r_new >= 0.5) return "Radius to big";
kernels.get(id).generator.setRadius(r_new);
String message;
if(increase)
message = "Increase ";
else
message = "Decrease ";
message+=" radius on Cluster "+id+" from "+r_old+" to "+r_new;
return message;
}
private String splitKernel(){
int id = instanceRandom.nextInt(kernels.size());
while(kernels.get(id).kill!=-1)
id = instanceRandom.nextInt(kernels.size());
String message = kernels.get(id).splitKernel();
return message;
}
private String mergeKernels(int steps){
if(numActiveKernels >1 && ((mergeClusterA == null && mergeClusterB == null))){
//choose clusters to merge
double diseredDist = steps / speedOption.getValue() * maxDistanceMoveThresholdByStep;
double minDist = Double.MAX_VALUE;
// System.out.println("DisredDist:"+(2*diseredDist));
for(int i = 0; i < kernels.size(); i++){
for(int j = 0; j < i; j++){
if(kernels.get(i).kill!=-1 || kernels.get(j).kill!=-1){
continue;
}
else{
double kernelDist = kernels.get(i).generator.getCenterDistance(kernels.get(j).generator);
double d = kernelDist-2*diseredDist;
// System.out.println("Dist:"+i+" / "+j+" "+d);
if(Math.abs(d) < minDist &&
(minDist != Double.MAX_VALUE || d>0 || Math.abs(d) < 0.001)){
minDist = Math.abs(d);
mergeClusterA = kernels.get(i);
mergeClusterB = kernels.get(j);
}
}
}
}
if(mergeClusterA!=null && mergeClusterB!=null){
double[] merge_point = mergeClusterA.generator.getCenter();
double[] v = mergeClusterA.generator.getDistanceVector(mergeClusterB.generator);
for (int i = 0; i < v.length; i++) {
merge_point[i]= merge_point[i]+v[i]*0.5;
}
mergeClusterA.merging = true;
mergeClusterB.merging = true;
mergeClusterA.setDesitnation(merge_point);
mergeClusterB.setDesitnation(merge_point);
if(debug){
System.out.println("Center1"+Arrays.toString(mergeClusterA.generator.getCenter()));
System.out.println("Center2"+Arrays.toString(mergeClusterB.generator.getCenter()));
System.out.println("Vector"+Arrays.toString(v));
System.out.println("Try to merge cluster "+mergeClusterA.generator.getId()+
" into "+mergeClusterB.generator.getId()+
" at "+Arrays.toString(merge_point)+
" time "+numGeneratedInstances);
}
return "Init merge";
}
}
if(mergeClusterA != null && mergeClusterB != null){
//movekernels will move the kernels close to each other,
//we just need to check and merge here if they are close enough
return mergeClusterA.tryMerging(mergeClusterB);
}
return "";
}
/************************* TOOLS **************************************/
public void getDescription(StringBuilder sb, int indent) {
}
private double[] getNoisePoint(){
double [] sample = new double [numAttsOption.getValue()];
boolean incluster = true;
int counter = 20;
while(incluster){
for (int j = 0; j < numAttsOption.getValue(); j++) {
sample[j] = instanceRandom.nextDouble();
}
incluster = false;
if(!noiseInClusterOption.isSet() && counter > 0){
counter--;
for(int c = 0; c < kernels.size(); c++){
for(int m = 0; m < kernels.get(c).microClusters.size(); m++){
Instance inst = new DenseInstance(1, sample);
if(kernels.get(c).microClusters.get(m).getInclusionProbability(inst) > 0){
incluster = true;
break;
}
}
if(incluster)
break;
}
}
}
// double [] sample = new double [numAttsOption.getValue()];
// for (int j = 0; j < numAttsOption.getValue(); j++) {
// sample[j] = instanceRandom.nextDouble();
// }
return sample;
}
private int chooseWeightedElement() {
double r = instanceRandom.nextDouble();
// Determine index of choosen element
int i = 0;
while (r > 0.0) {
r -= kernels.get(i).generator.getWeight();
i++;
}
--i; // Overcounted once
//System.out.println(i);
return i;
}
private void normalizeWeights(){
double sumWeights = 0.0;
for (int i = 0; i < kernels.size(); i++) {
sumWeights+=kernels.get(i).generator.getWeight();
}
for (int i = 0; i < kernels.size(); i++) {
kernels.get(i).generator.setWeight(kernels.get(i).generator.getWeight()/sumWeights);
}
}
/*************** EVENT Listener *********************/
// should go into the superclass of the generator, create new one for cluster streams?
/** Add a listener */
synchronized public void addClusterChangeListener(ClusterEventListener l) {
if (listeners == null)
listeners = new Vector();
listeners.addElement(l);
}
/** Remove a listener */
synchronized public void removeClusterChangeListener(ClusterEventListener l) {
if (listeners == null)
listeners = new Vector();
listeners.removeElement(l);
}
/** Fire a ClusterChangeEvent to all registered listeners */
protected void fireClusterChange(long timestamp, String type, String message) {
// if we have no listeners, do nothing...
if (listeners != null && !listeners.isEmpty()) {
// create the event object to send
ClusterEvent event =
new ClusterEvent(this, timestamp, type , message);
// make a copy of the listener list in case
// anyone adds/removes listeners
Vector targets;
synchronized (this) {
targets = (Vector) listeners.clone();
}
// walk through the listener list and
// call the sunMoved method in each
Enumeration e = targets.elements();
while (e.hasMoreElements()) {
ClusterEventListener l = (ClusterEventListener) e.nextElement();
l.changeCluster(event);
}
}
}
@Override
public String getPurposeString() {
return "Generates a random radial basis function stream.";
}
public String getParameterString(){
return "";
}
}