blob: f5f565b90570bd42f12af9334b5a6454ac2b086f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.samoa.evaluation.measures;
import java.util.ArrayList;
import org.apache.samoa.instances.Instance;
import org.apache.samoa.moa.cluster.Clustering;
import org.apache.samoa.moa.cluster.SphereCluster;
import org.apache.samoa.moa.core.DataPoint;
import org.apache.samoa.moa.evaluation.MeasureCollection;
public class General extends MeasureCollection {
private int numPoints;
private int numFClusters;
private int numDims;
private double pointInclusionProbThreshold = 0.8;
private Clustering clustering;
private ArrayList<DataPoint> points;
public General() {
super();
}
@Override
protected String[] getNames() {
// String[] names =
// {"GPrecision","GRecall","Redundancy","Overlap","numCluster","numClasses","Compactness"};
return new String[] { "GPrecision", "GRecall", "Redundancy", "numCluster", "numClasses" };
}
// @Override
// protected boolean[] getDefaultEnabled() {
// boolean [] defaults = {false, false, false, false, false ,false};
// return defaults;
// }
@Override
public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList<DataPoint> points)
throws Exception {
this.points = points;
this.clustering = clustering;
numPoints = points.size();
numFClusters = clustering.size();
numDims = points.get(0).numAttributes() - 1;
int totalRedundancy = 0;
int trueCoverage = 0;
int totalCoverage = 0;
int numNoise = 0;
for (int p = 0; p < numPoints; p++) {
int coverage = 0;
for (int c = 0; c < numFClusters; c++) {
// contained in cluster c?
if (clustering.get(c).getInclusionProbability(points.get(p)) >= pointInclusionProbThreshold) {
coverage++;
}
}
if (points.get(p).classValue() == -1) {
numNoise++;
}
else {
if (coverage > 0)
trueCoverage++;
}
if (coverage > 0)
totalCoverage++; // points covered by clustering (incl. noise)
if (coverage > 1)
totalRedundancy++; // include noise
}
addValue("numCluster", clustering.size());
addValue("numClasses", trueClustering.size());
addValue("Redundancy", ((double) totalRedundancy / (double) numPoints));
addValue("GPrecision", (totalCoverage == 0 ? 0 : ((double) trueCoverage / (double) (totalCoverage))));
addValue("GRecall", ((double) trueCoverage / (double) (numPoints - numNoise)));
// if(isEnabled(3)){
// addValue("Compactness", computeCompactness());
// }
// if(isEnabled(3)){
// addValue("Overlap", computeOverlap());
// }
}
private double computeOverlap() {
for (int c = 0; c < numFClusters; c++) {
if (!(clustering.get(c) instanceof SphereCluster)) {
System.out.println("Overlap only supports Sphere Cluster. Found: " + clustering.get(c).getClass());
return Double.NaN;
}
}
boolean[] overlap = new boolean[numFClusters];
for (int c0 = 0; c0 < numFClusters; c0++) {
if (overlap[c0])
continue;
SphereCluster s0 = (SphereCluster) clustering.get(c0);
for (int c1 = c0; c1 < clustering.size(); c1++) {
if (c1 == c0)
continue;
SphereCluster s1 = (SphereCluster) clustering.get(c1);
if (s0.overlapRadiusDegree(s1) > 0) {
overlap[c0] = overlap[c1] = true;
}
}
}
double totalOverlap = 0;
for (int c0 = 0; c0 < numFClusters; c0++) {
if (overlap[c0])
totalOverlap++;
}
// if(totalOverlap/(double)numFClusters > .8) RunVisualizer.pause();
if (numFClusters > 0)
totalOverlap /= (double) numFClusters;
return totalOverlap;
}
private double computeCompactness() {
if (numFClusters == 0)
return 0;
for (int c = 0; c < numFClusters; c++) {
if (!(clustering.get(c) instanceof SphereCluster)) {
System.out.println("Compactness only supports Sphere Cluster. Found: " + clustering.get(c).getClass());
return Double.NaN;
}
}
// TODO weight radius by number of dimensions
double totalCompactness = 0;
for (int c = 0; c < numFClusters; c++) {
ArrayList<Instance> containedPoints = new ArrayList<Instance>();
for (int p = 0; p < numPoints; p++) {
// p in c
if (clustering.get(c).getInclusionProbability(points.get(p)) >= pointInclusionProbThreshold) {
containedPoints.add(points.get(p));
}
}
double compactness = 0;
if (containedPoints.size() > 1) {
// cluster not empty
SphereCluster minEnclosingCluster = new SphereCluster(containedPoints, numDims);
double minRadius = minEnclosingCluster.getRadius();
double cfRadius = ((SphereCluster) clustering.get(c)).getRadius();
if (Math.abs(minRadius - cfRadius) < 0.1e-10) {
compactness = 1;
}
else if (minRadius < cfRadius)
compactness = minRadius / cfRadius;
else {
System.out.println("Optimal radius bigger then real one (" + (cfRadius - minRadius)
+ "), this is really wrong");
compactness = 1;
}
}
else {
double cfRadius = ((SphereCluster) clustering.get(c)).getRadius();
if (cfRadius == 0)
compactness = 1;
}
// weight by weight of cluster???
totalCompactness += compactness;
clustering.get(c).setMeasureValue("Compactness", Double.toString(compactness));
}
return (totalCompactness / numFClusters);
}
}