| package com.yahoo.labs.samoa.evaluation.measures; |
| |
| /* |
| * #%L |
| * SAMOA |
| * %% |
| * Copyright (C) 2010 RWTH Aachen University, Germany |
| * %% |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * #L% |
| */ |
| |
| import java.util.ArrayList; |
| |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| |
| import com.yahoo.labs.samoa.moa.cluster.Clustering; |
| import com.yahoo.labs.samoa.moa.core.DataPoint; |
| import com.yahoo.labs.samoa.moa.evaluation.MeasureCollection; |
| import com.yahoo.labs.samoa.moa.evaluation.MembershipMatrix; |
| |
| public class EntropyCollection extends MeasureCollection { |
| |
| private static final Logger logger = LoggerFactory.getLogger(EntropyCollection.class); |
| |
| @Override |
| protected String[] getNames() { |
| return new String[] { "GT cross entropy", "FC cross entropy", "Homogeneity", "Completeness", "V-Measure", |
| "VarInformation" }; |
| } |
| |
| @Override |
| protected boolean[] getDefaultEnabled() { |
| return new boolean[] { false, false, false, false, false, false }; |
| } |
| |
| @Override |
| public void evaluateClustering(Clustering fclustering, Clustering hClustering, ArrayList<DataPoint> points) |
| throws Exception { |
| |
| MembershipMatrix mm = new MembershipMatrix(fclustering, points); |
| int numClasses = mm.getNumClasses(); |
| int numCluster = fclustering.size() + 1; |
| int n = mm.getTotalEntries(); |
| |
| double FCentropy = 0; |
| if (numCluster > 1) { |
| for (int fc = 0; fc < numCluster; fc++) { |
| double weight = mm.getClusterSum(fc) / (double) n; |
| if (weight > 0) |
| FCentropy += weight * Math.log10(weight); |
| } |
| FCentropy /= (-1 * Math.log10(numCluster)); |
| } |
| |
| logger.debug("FC entropy: {}", FCentropy); |
| |
| double GTentropy = 0; |
| if (numClasses > 1) { |
| for (int hc = 0; hc < numClasses; hc++) { |
| double weight = mm.getClassSum(hc) / (double) n; |
| if (weight > 0) |
| GTentropy += weight * Math.log10(weight); |
| } |
| GTentropy /= (-1 * Math.log10(numClasses)); |
| } |
| |
| logger.debug("GT entropy: {}", GTentropy); |
| |
| // cluster based entropy |
| double FCcrossEntropy = 0; |
| |
| for (int fc = 0; fc < numCluster; fc++) { |
| double e = 0; |
| int clusterWeight = mm.getClusterSum(fc); |
| if (clusterWeight > 0) { |
| for (int hc = 0; hc < numClasses; hc++) { |
| double p = mm.getClusterClassWeight(fc, hc) / (double) clusterWeight; |
| if (p != 0) { |
| e += p * Math.log10(p); |
| } |
| } |
| FCcrossEntropy += ((clusterWeight / (double) n) * e); |
| } |
| } |
| if (numCluster > 1) { |
| FCcrossEntropy /= -1 * Math.log10(numCluster); |
| } |
| |
| addValue("FC cross entropy", 1 - FCcrossEntropy); |
| logger.debug("FC cross entropy: {}", 1 - FCcrossEntropy); |
| |
| // class based entropy |
| double GTcrossEntropy = 0; |
| for (int hc = 0; hc < numClasses; hc++) { |
| double e = 0; |
| int classWeight = mm.getClassSum(hc); |
| if (classWeight > 0) { |
| for (int fc = 0; fc < numCluster; fc++) { |
| double p = mm.getClusterClassWeight(fc, hc) / (double) classWeight; |
| if (p != 0) { |
| e += p * Math.log10(p); |
| } |
| } |
| } |
| GTcrossEntropy += ((classWeight / (double) n) * e); |
| } |
| if (numClasses > 1) |
| GTcrossEntropy /= -1 * Math.log10(numClasses); |
| addValue("GT cross entropy", 1 - GTcrossEntropy); |
| logger.debug("GT cross entropy: {}", 1 - GTcrossEntropy); |
| |
| double homogeneity; |
| if (FCentropy == 0) |
| homogeneity = 1; |
| else |
| homogeneity = 1 - FCcrossEntropy / FCentropy; |
| |
| // TODO set err values for now, needs to be debugged |
| if (homogeneity > 1 || homogeneity < 0) |
| addValue("Homogeneity", -1); |
| else |
| addValue("Homogeneity", homogeneity); |
| |
| double completeness; |
| if (GTentropy == 0) |
| completeness = 1; |
| else |
| completeness = 1 - GTcrossEntropy / GTentropy; |
| addValue("Completeness", completeness); |
| |
| double beta = 1; |
| double vmeasure = (1 + beta) * homogeneity * completeness / (beta * homogeneity + completeness); |
| |
| if (vmeasure > 1 || homogeneity < 0) |
| addValue("V-Measure", -1); |
| else |
| addValue("V-Measure", vmeasure); |
| |
| double mutual = 0; |
| for (int i = 0; i < numCluster; i++) { |
| for (int j = 0; j < numClasses; j++) { |
| if (mm.getClusterClassWeight(i, j) == 0) |
| continue; |
| double m = Math.log10(mm.getClusterClassWeight(i, j) / (double) mm.getClusterSum(i) |
| / (double) mm.getClassSum(j) * (double) n); |
| m *= mm.getClusterClassWeight(i, j) / (double) n; |
| logger.debug("( {} / {}): ", m, m); |
| mutual += m; |
| } |
| } |
| if (numClasses > 1) |
| mutual /= Math.log10(numClasses); |
| |
| double varInfo = 1; |
| if (FCentropy + GTentropy > 0) |
| varInfo = 2 * mutual / (FCentropy + GTentropy); |
| |
| logger.debug("mutual: {} / VI: {}", mutual, varInfo); |
| addValue("VarInformation", varInfo); |
| |
| } |
| |
| } |