blob: 7f23c1bc834861a2c1ab711ba488a7272ebe6612 [file] [log] [blame]
package com.yahoo.labs.samoa.evaluation.measures;
/*
* #%L
* SAMOA
* %%
* Copyright (C) 2010 RWTH Aachen University, Germany
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.yahoo.labs.samoa.instances.Instance;
import com.yahoo.labs.samoa.moa.cluster.Clustering;
import com.yahoo.labs.samoa.moa.cluster.SphereCluster;
import com.yahoo.labs.samoa.moa.evaluation.MeasureCollection;
import com.yahoo.labs.samoa.moa.core.DataPoint;
import java.util.ArrayList;
public class General extends MeasureCollection{
private int numPoints;
private int numFClusters;
private int numDims;
private double pointInclusionProbThreshold = 0.8;
private Clustering clustering;
private ArrayList<DataPoint> points;
public General() {
super();
}
@Override
protected String[] getNames() {
//String[] names = {"GPrecision","GRecall","Redundancy","Overlap","numCluster","numClasses","Compactness"};
return new String[]{"GPrecision","GRecall","Redundancy","numCluster","numClasses"};
}
// @Override
// protected boolean[] getDefaultEnabled() {
// boolean [] defaults = {false, false, false, false, false ,false};
// return defaults;
// }
@Override
public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList<DataPoint> points) throws Exception{
this.points = points;
this.clustering = clustering;
numPoints = points.size();
numFClusters = clustering.size();
numDims = points.get(0).numAttributes()-1;
int totalRedundancy = 0;
int trueCoverage = 0;
int totalCoverage = 0;
int numNoise = 0;
for (int p = 0; p < numPoints; p++) {
int coverage = 0;
for (int c = 0; c < numFClusters; c++) {
//contained in cluster c?
if(clustering.get(c).getInclusionProbability(points.get(p)) >= pointInclusionProbThreshold){
coverage++;
}
}
if(points.get(p).classValue()==-1){
numNoise++;
}
else{
if(coverage>0) trueCoverage++;
}
if(coverage>0) totalCoverage++; //points covered by clustering (incl. noise)
if(coverage>1) totalRedundancy++; //include noise
}
addValue("numCluster", clustering.size());
addValue("numClasses", trueClustering.size());
addValue("Redundancy", ((double)totalRedundancy/(double)numPoints));
addValue("GPrecision", (totalCoverage==0?0:((double)trueCoverage/(double)(totalCoverage))));
addValue("GRecall", ((double)trueCoverage/(double)(numPoints-numNoise)));
// if(isEnabled(3)){
// addValue("Compactness", computeCompactness());
// }
// if(isEnabled(3)){
// addValue("Overlap", computeOverlap());
// }
}
private double computeOverlap(){
for (int c = 0; c < numFClusters; c++) {
if(!(clustering.get(c) instanceof SphereCluster)){
System.out.println("Overlap only supports Sphere Cluster. Found: "+clustering.get(c).getClass());
return Double.NaN;
}
}
boolean[] overlap = new boolean[numFClusters];
for (int c0 = 0; c0 < numFClusters; c0++) {
if(overlap[c0]) continue;
SphereCluster s0 = (SphereCluster)clustering.get(c0);
for (int c1 = c0; c1 < clustering.size(); c1++) {
if(c1 == c0) continue;
SphereCluster s1 = (SphereCluster)clustering.get(c1);
if(s0.overlapRadiusDegree(s1) > 0){
overlap[c0] = overlap[c1] = true;
}
}
}
double totalOverlap = 0;
for (int c0 = 0; c0 < numFClusters; c0++) {
if(overlap[c0])
totalOverlap++;
}
// if(totalOverlap/(double)numFClusters > .8) RunVisualizer.pause();
if(numFClusters>0) totalOverlap/=(double)numFClusters;
return totalOverlap;
}
private double computeCompactness(){
if(numFClusters == 0) return 0;
for (int c = 0; c < numFClusters; c++) {
if(!(clustering.get(c) instanceof SphereCluster)){
System.out.println("Compactness only supports Sphere Cluster. Found: "+clustering.get(c).getClass());
return Double.NaN;
}
}
//TODO weight radius by number of dimensions
double totalCompactness = 0;
for (int c = 0; c < numFClusters; c++) {
ArrayList<Instance> containedPoints = new ArrayList<Instance>();
for (int p = 0; p < numPoints; p++) {
//p in c
if(clustering.get(c).getInclusionProbability(points.get(p)) >= pointInclusionProbThreshold){
containedPoints.add(points.get(p));
}
}
double compactness = 0;
if(containedPoints.size()>1){
//cluster not empty
SphereCluster minEnclosingCluster = new SphereCluster(containedPoints, numDims);
double minRadius = minEnclosingCluster.getRadius();
double cfRadius = ((SphereCluster)clustering.get(c)).getRadius();
if(Math.abs(minRadius-cfRadius) < 0.1e-10){
compactness = 1;
}
else
if(minRadius < cfRadius)
compactness = minRadius/cfRadius;
else{
System.out.println("Optimal radius bigger then real one ("+(cfRadius-minRadius)+"), this is really wrong");
compactness = 1;
}
}
else{
double cfRadius = ((SphereCluster)clustering.get(c)).getRadius();
if(cfRadius==0) compactness = 1;
}
//weight by weight of cluster???
totalCompactness+=compactness;
clustering.get(c).setMeasureValue("Compactness", Double.toString(compactness));
}
return (totalCompactness/numFClusters);
}
}