blob: 2da9a59fa1e1854d74a62c8a5f41662b1ef10b8c [file] [log] [blame]
package org.apache.samoa.evaluation.measures;
/*
* #%L
* SAMOA
* %%
* Copyright (C) 2014 - 2015 Apache Software Foundation
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import java.util.ArrayList;
import org.apache.samoa.moa.cluster.Clustering;
import org.apache.samoa.moa.core.DataPoint;
import org.apache.samoa.moa.evaluation.MeasureCollection;
import org.apache.samoa.moa.evaluation.MembershipMatrix;
public class F1 extends MeasureCollection {
@Override
protected String[] getNames() {
return new String[] { "F1-P", "F1-R", "Purity" };
}
public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList<DataPoint> points) {
if (clustering.size() < 0) {
addValue(0, 0);
addValue(1, 0);
return;
}
MembershipMatrix mm = new MembershipMatrix(clustering, points);
// System.out.println(mm.toString());
int numClasses = mm.getNumClasses();
if (mm.hasNoiseClass())
numClasses--;
// F1 as defined in P3C, try using F1 optimization
double F1_P = 0.0;
double purity = 0;
int realClusters = 0;
for (int i = 0; i < clustering.size(); i++) {
int max_weight = 0;
int max_weight_index = -1;
// find max index
for (int j = 0; j < numClasses; j++) {
if (mm.getClusterClassWeight(i, j) > max_weight) {
max_weight = mm.getClusterClassWeight(i, j);
max_weight_index = j;
}
}
if (max_weight_index != -1) {
realClusters++;
double precision = mm.getClusterClassWeight(i, max_weight_index) / (double) mm.getClusterSum(i);
double recall = mm.getClusterClassWeight(i, max_weight_index) / (double) mm.getClassSum(max_weight_index);
double f1 = 0;
if (precision > 0 || recall > 0) {
f1 = 2 * precision * recall / (precision + recall);
}
F1_P += f1;
purity += precision;
// TODO should we move setMeasure stuff into the Cluster interface?
clustering.get(i).setMeasureValue("F1-P", Double.toString(f1));
}
}
if (realClusters > 0) {
F1_P /= realClusters;
purity /= realClusters;
}
addValue("F1-P", F1_P);
addValue("Purity", purity);
// F1 as defined in .... mainly maximizes F1 for each class
double F1_R = 0.0;
for (int j = 0; j < numClasses; j++) {
double max_f1 = 0;
for (int i = 0; i < clustering.size(); i++) {
double precision = mm.getClusterClassWeight(i, j) / (double) mm.getClusterSum(i);
double recall = mm.getClusterClassWeight(i, j) / (double) mm.getClassSum(j);
double f1 = 0;
if (precision > 0 || recall > 0) {
f1 = 2 * precision * recall / (precision + recall);
}
if (max_f1 < f1) {
max_f1 = f1;
}
}
F1_R += max_f1;
}
F1_R /= numClasses;
addValue("F1-R", F1_R);
}
}