blob: a2f0100eb4f40e3755b3ab9e975cea03aa9b9dda [file] [log] [blame]
package org.apache.samoa.evaluation.measures;
/*
* #%L
* SAMOA
* %%
* Copyright (C) 2014 - 2015 Apache Software Foundation
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import java.util.ArrayList;
import org.apache.samoa.moa.cluster.Clustering;
import org.apache.samoa.moa.core.DataPoint;
import org.apache.samoa.moa.evaluation.MeasureCollection;
import org.apache.samoa.moa.evaluation.MembershipMatrix;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class EntropyCollection extends MeasureCollection {
private static final Logger logger = LoggerFactory.getLogger(EntropyCollection.class);
@Override
protected String[] getNames() {
return new String[] { "GT cross entropy", "FC cross entropy", "Homogeneity", "Completeness", "V-Measure",
"VarInformation" };
}
@Override
protected boolean[] getDefaultEnabled() {
return new boolean[] { false, false, false, false, false, false };
}
@Override
public void evaluateClustering(Clustering fclustering, Clustering hClustering, ArrayList<DataPoint> points)
throws Exception {
MembershipMatrix mm = new MembershipMatrix(fclustering, points);
int numClasses = mm.getNumClasses();
int numCluster = fclustering.size() + 1;
int n = mm.getTotalEntries();
double FCentropy = 0;
if (numCluster > 1) {
for (int fc = 0; fc < numCluster; fc++) {
double weight = mm.getClusterSum(fc) / (double) n;
if (weight > 0)
FCentropy += weight * Math.log10(weight);
}
FCentropy /= (-1 * Math.log10(numCluster));
}
logger.debug("FC entropy: {}", FCentropy);
double GTentropy = 0;
if (numClasses > 1) {
for (int hc = 0; hc < numClasses; hc++) {
double weight = mm.getClassSum(hc) / (double) n;
if (weight > 0)
GTentropy += weight * Math.log10(weight);
}
GTentropy /= (-1 * Math.log10(numClasses));
}
logger.debug("GT entropy: {}", GTentropy);
// cluster based entropy
double FCcrossEntropy = 0;
for (int fc = 0; fc < numCluster; fc++) {
double e = 0;
int clusterWeight = mm.getClusterSum(fc);
if (clusterWeight > 0) {
for (int hc = 0; hc < numClasses; hc++) {
double p = mm.getClusterClassWeight(fc, hc) / (double) clusterWeight;
if (p != 0) {
e += p * Math.log10(p);
}
}
FCcrossEntropy += ((clusterWeight / (double) n) * e);
}
}
if (numCluster > 1) {
FCcrossEntropy /= -1 * Math.log10(numCluster);
}
addValue("FC cross entropy", 1 - FCcrossEntropy);
logger.debug("FC cross entropy: {}", 1 - FCcrossEntropy);
// class based entropy
double GTcrossEntropy = 0;
for (int hc = 0; hc < numClasses; hc++) {
double e = 0;
int classWeight = mm.getClassSum(hc);
if (classWeight > 0) {
for (int fc = 0; fc < numCluster; fc++) {
double p = mm.getClusterClassWeight(fc, hc) / (double) classWeight;
if (p != 0) {
e += p * Math.log10(p);
}
}
}
GTcrossEntropy += ((classWeight / (double) n) * e);
}
if (numClasses > 1)
GTcrossEntropy /= -1 * Math.log10(numClasses);
addValue("GT cross entropy", 1 - GTcrossEntropy);
logger.debug("GT cross entropy: {}", 1 - GTcrossEntropy);
double homogeneity;
if (FCentropy == 0)
homogeneity = 1;
else
homogeneity = 1 - FCcrossEntropy / FCentropy;
// TODO set err values for now, needs to be debugged
if (homogeneity > 1 || homogeneity < 0)
addValue("Homogeneity", -1);
else
addValue("Homogeneity", homogeneity);
double completeness;
if (GTentropy == 0)
completeness = 1;
else
completeness = 1 - GTcrossEntropy / GTentropy;
addValue("Completeness", completeness);
double beta = 1;
double vmeasure = (1 + beta) * homogeneity * completeness / (beta * homogeneity + completeness);
if (vmeasure > 1 || homogeneity < 0)
addValue("V-Measure", -1);
else
addValue("V-Measure", vmeasure);
double mutual = 0;
for (int i = 0; i < numCluster; i++) {
for (int j = 0; j < numClasses; j++) {
if (mm.getClusterClassWeight(i, j) == 0)
continue;
double m = Math.log10(mm.getClusterClassWeight(i, j) / (double) mm.getClusterSum(i)
/ (double) mm.getClassSum(j) * (double) n);
m *= mm.getClusterClassWeight(i, j) / (double) n;
logger.debug("( {} / {}): ", m, m);
mutual += m;
}
}
if (numClasses > 1)
mutual /= Math.log10(numClasses);
double varInfo = 1;
if (FCentropy + GTentropy > 0)
varInfo = 2 * mutual / (FCentropy + GTentropy);
logger.debug("mutual: {} / VI: {}", mutual, varInfo);
addValue("VarInformation", varInfo);
}
}