blob: 0d311e4417360966dc8451d1110b40f47bcb0e8e [file] [log] [blame]
package com.yahoo.labs.samoa.evaluation.measures;
/*
* #%L
* SAMOA
* %%
* Copyright (C) 2010 RWTH Aachen University, Germany
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import java.util.ArrayList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.yahoo.labs.samoa.moa.cluster.Clustering;
import com.yahoo.labs.samoa.moa.core.DataPoint;
import com.yahoo.labs.samoa.moa.evaluation.MeasureCollection;
import com.yahoo.labs.samoa.moa.evaluation.MembershipMatrix;
public class EntropyCollection extends MeasureCollection{
private static final Logger logger = LoggerFactory.getLogger(EntropyCollection.class);
@Override
protected String[] getNames() {
return new String[]{"GT cross entropy","FC cross entropy","Homogeneity","Completeness","V-Measure","VarInformation"};
}
@Override
protected boolean[] getDefaultEnabled() {
return new boolean[]{false, false, false, false, false, false};
}
@Override
public void evaluateClustering(Clustering fclustering, Clustering hClustering, ArrayList<DataPoint> points) throws Exception {
MembershipMatrix mm = new MembershipMatrix(fclustering, points);
int numClasses = mm.getNumClasses();
int numCluster = fclustering.size()+1;
int n = mm.getTotalEntries();
double FCentropy = 0;
if(numCluster > 1){
for (int fc = 0; fc < numCluster; fc++){
double weight = mm.getClusterSum(fc)/(double)n;
if(weight > 0)
FCentropy+= weight * Math.log10(weight);
}
FCentropy/=(-1*Math.log10(numCluster));
}
logger.debug("FC entropy: {}", FCentropy);
double GTentropy = 0;
if(numClasses > 1){
for (int hc = 0; hc < numClasses; hc++){
double weight = mm.getClassSum(hc)/(double)n;
if(weight > 0)
GTentropy+= weight * Math.log10(weight);
}
GTentropy/=(-1*Math.log10(numClasses));
}
logger.debug("GT entropy: {}", GTentropy);
//cluster based entropy
double FCcrossEntropy = 0;
for (int fc = 0; fc < numCluster; fc++){
double e = 0;
int clusterWeight = mm.getClusterSum(fc);
if(clusterWeight>0){
for (int hc = 0; hc < numClasses; hc++) {
double p = mm.getClusterClassWeight(fc, hc)/(double)clusterWeight;
if(p!=0){
e+=p * Math.log10(p);
}
}
FCcrossEntropy+=((clusterWeight/(double)n) * e);
}
}
if(numCluster > 1){
FCcrossEntropy/=-1*Math.log10(numCluster);
}
addValue("FC cross entropy", 1-FCcrossEntropy);
logger.debug("FC cross entropy: {}", 1 - FCcrossEntropy);
//class based entropy
double GTcrossEntropy = 0;
for (int hc = 0; hc < numClasses; hc++){
double e = 0;
int classWeight = mm.getClassSum(hc);
if(classWeight>0){
for (int fc = 0; fc < numCluster; fc++) {
double p = mm.getClusterClassWeight(fc, hc)/(double)classWeight;
if(p!=0){
e+=p * Math.log10(p);
}
}
}
GTcrossEntropy+=((classWeight/(double)n) * e);
}
if(numClasses > 1)
GTcrossEntropy/=-1*Math.log10(numClasses);
addValue("GT cross entropy", 1-GTcrossEntropy);
logger.debug("GT cross entropy: {}", 1 - GTcrossEntropy);
double homogeneity;
if(FCentropy == 0)
homogeneity = 1;
else
homogeneity = 1 - FCcrossEntropy/FCentropy;
//TODO set err values for now, needs to be debugged
if(homogeneity > 1 || homogeneity < 0)
addValue("Homogeneity",-1);
else
addValue("Homogeneity",homogeneity);
double completeness;
if(GTentropy == 0)
completeness = 1;
else
completeness = 1 - GTcrossEntropy/GTentropy;
addValue("Completeness",completeness);
double beta = 1;
double vmeasure = (1+ beta)*homogeneity*completeness/(beta *homogeneity+completeness);
if(vmeasure > 1 || homogeneity < 0)
addValue("V-Measure",-1);
else
addValue("V-Measure",vmeasure);
double mutual = 0;
for (int i = 0; i < numCluster; i++){
for (int j = 0; j < numClasses; j++) {
if(mm.getClusterClassWeight(i, j)==0) continue;
double m = Math.log10(mm.getClusterClassWeight(i, j)/(double)mm.getClusterSum(i)/(double)mm.getClassSum(j)*(double)n);
m*= mm.getClusterClassWeight(i, j)/(double)n;
logger.debug("( {} / {}): ",m, m);
mutual+=m;
}
}
if(numClasses > 1)
mutual/=Math.log10(numClasses);
double varInfo = 1;
if(FCentropy + GTentropy > 0)
varInfo = 2*mutual/(FCentropy + GTentropy);
logger.debug("mutual: {} / VI: {}", mutual, varInfo);
addValue("VarInformation", varInfo);
}
}