| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.samoa.evaluation.measures; |
| |
| import java.util.ArrayList; |
| |
| import org.apache.samoa.instances.Instance; |
| import org.apache.samoa.moa.cluster.Clustering; |
| import org.apache.samoa.moa.cluster.SphereCluster; |
| import org.apache.samoa.moa.core.DataPoint; |
| import org.apache.samoa.moa.evaluation.MeasureCollection; |
| |
| public class General extends MeasureCollection { |
| private int numPoints; |
| private int numFClusters; |
| private int numDims; |
| private double pointInclusionProbThreshold = 0.8; |
| private Clustering clustering; |
| private ArrayList<DataPoint> points; |
| |
| public General() { |
| super(); |
| } |
| |
| @Override |
| protected String[] getNames() { |
| // String[] names = |
| // {"GPrecision","GRecall","Redundancy","Overlap","numCluster","numClasses","Compactness"}; |
| return new String[] { "GPrecision", "GRecall", "Redundancy", "numCluster", "numClasses" }; |
| } |
| |
| // @Override |
| // protected boolean[] getDefaultEnabled() { |
| // boolean [] defaults = {false, false, false, false, false ,false}; |
| // return defaults; |
| // } |
| |
| @Override |
| public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList<DataPoint> points) |
| throws Exception { |
| |
| this.points = points; |
| this.clustering = clustering; |
| numPoints = points.size(); |
| numFClusters = clustering.size(); |
| numDims = points.get(0).numAttributes() - 1; |
| |
| int totalRedundancy = 0; |
| int trueCoverage = 0; |
| int totalCoverage = 0; |
| |
| int numNoise = 0; |
| for (int p = 0; p < numPoints; p++) { |
| int coverage = 0; |
| for (int c = 0; c < numFClusters; c++) { |
| // contained in cluster c? |
| if (clustering.get(c).getInclusionProbability(points.get(p)) >= pointInclusionProbThreshold) { |
| coverage++; |
| } |
| } |
| |
| if (points.get(p).classValue() == -1) { |
| numNoise++; |
| } |
| else { |
| if (coverage > 0) |
| trueCoverage++; |
| } |
| |
| if (coverage > 0) |
| totalCoverage++; // points covered by clustering (incl. noise) |
| if (coverage > 1) |
| totalRedundancy++; // include noise |
| } |
| |
| addValue("numCluster", clustering.size()); |
| addValue("numClasses", trueClustering.size()); |
| addValue("Redundancy", ((double) totalRedundancy / (double) numPoints)); |
| addValue("GPrecision", (totalCoverage == 0 ? 0 : ((double) trueCoverage / (double) (totalCoverage)))); |
| addValue("GRecall", ((double) trueCoverage / (double) (numPoints - numNoise))); |
| // if(isEnabled(3)){ |
| // addValue("Compactness", computeCompactness()); |
| // } |
| // if(isEnabled(3)){ |
| // addValue("Overlap", computeOverlap()); |
| // } |
| } |
| |
| private double computeOverlap() { |
| for (int c = 0; c < numFClusters; c++) { |
| if (!(clustering.get(c) instanceof SphereCluster)) { |
| System.out.println("Overlap only supports Sphere Cluster. Found: " + clustering.get(c).getClass()); |
| return Double.NaN; |
| } |
| } |
| |
| boolean[] overlap = new boolean[numFClusters]; |
| |
| for (int c0 = 0; c0 < numFClusters; c0++) { |
| if (overlap[c0]) |
| continue; |
| SphereCluster s0 = (SphereCluster) clustering.get(c0); |
| for (int c1 = c0; c1 < clustering.size(); c1++) { |
| if (c1 == c0) |
| continue; |
| SphereCluster s1 = (SphereCluster) clustering.get(c1); |
| if (s0.overlapRadiusDegree(s1) > 0) { |
| overlap[c0] = overlap[c1] = true; |
| } |
| } |
| } |
| |
| double totalOverlap = 0; |
| for (int c0 = 0; c0 < numFClusters; c0++) { |
| if (overlap[c0]) |
| totalOverlap++; |
| } |
| |
| // if(totalOverlap/(double)numFClusters > .8) RunVisualizer.pause(); |
| if (numFClusters > 0) |
| totalOverlap /= (double) numFClusters; |
| return totalOverlap; |
| } |
| |
| private double computeCompactness() { |
| if (numFClusters == 0) |
| return 0; |
| for (int c = 0; c < numFClusters; c++) { |
| if (!(clustering.get(c) instanceof SphereCluster)) { |
| System.out.println("Compactness only supports Sphere Cluster. Found: " + clustering.get(c).getClass()); |
| return Double.NaN; |
| } |
| } |
| |
| // TODO weight radius by number of dimensions |
| double totalCompactness = 0; |
| for (int c = 0; c < numFClusters; c++) { |
| ArrayList<Instance> containedPoints = new ArrayList<Instance>(); |
| for (int p = 0; p < numPoints; p++) { |
| // p in c |
| if (clustering.get(c).getInclusionProbability(points.get(p)) >= pointInclusionProbThreshold) { |
| containedPoints.add(points.get(p)); |
| } |
| } |
| double compactness = 0; |
| if (containedPoints.size() > 1) { |
| // cluster not empty |
| SphereCluster minEnclosingCluster = new SphereCluster(containedPoints, numDims); |
| double minRadius = minEnclosingCluster.getRadius(); |
| double cfRadius = ((SphereCluster) clustering.get(c)).getRadius(); |
| if (Math.abs(minRadius - cfRadius) < 0.1e-10) { |
| compactness = 1; |
| } |
| else if (minRadius < cfRadius) |
| compactness = minRadius / cfRadius; |
| else { |
| System.out.println("Optimal radius bigger then real one (" + (cfRadius - minRadius) |
| + "), this is really wrong"); |
| compactness = 1; |
| } |
| } |
| else { |
| double cfRadius = ((SphereCluster) clustering.get(c)).getRadius(); |
| if (cfRadius == 0) |
| compactness = 1; |
| } |
| |
| // weight by weight of cluster??? |
| totalCompactness += compactness; |
| clustering.get(c).setMeasureValue("Compactness", Double.toString(compactness)); |
| } |
| return (totalCompactness / numFClusters); |
| } |
| |
| } |