blob: 3b304602b920553a4f43e90e4e1e2832b3ac0257 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.samoa.evaluation.measures;
import java.util.ArrayList;
import org.apache.samoa.moa.cluster.Clustering;
import org.apache.samoa.moa.core.DataPoint;
import org.apache.samoa.moa.evaluation.MeasureCollection;
import org.apache.samoa.moa.evaluation.MembershipMatrix;
public class F1 extends MeasureCollection {
@Override
protected String[] getNames() {
return new String[] { "F1-P", "F1-R", "Purity" };
}
public void evaluateClustering(Clustering clustering, Clustering trueClustering, ArrayList<DataPoint> points) {
if (clustering.size() < 0) {
addValue(0, 0);
addValue(1, 0);
return;
}
MembershipMatrix mm = new MembershipMatrix(clustering, points);
// System.out.println(mm.toString());
int numClasses = mm.getNumClasses();
if (mm.hasNoiseClass())
numClasses--;
// F1 as defined in P3C, try using F1 optimization
double F1_P = 0.0;
double purity = 0;
int realClusters = 0;
for (int i = 0; i < clustering.size(); i++) {
int max_weight = 0;
int max_weight_index = -1;
// find max index
for (int j = 0; j < numClasses; j++) {
if (mm.getClusterClassWeight(i, j) > max_weight) {
max_weight = mm.getClusterClassWeight(i, j);
max_weight_index = j;
}
}
if (max_weight_index != -1) {
realClusters++;
double precision = mm.getClusterClassWeight(i, max_weight_index) / (double) mm.getClusterSum(i);
double recall = mm.getClusterClassWeight(i, max_weight_index) / (double) mm.getClassSum(max_weight_index);
double f1 = 0;
if (precision > 0 || recall > 0) {
f1 = 2 * precision * recall / (precision + recall);
}
F1_P += f1;
purity += precision;
// TODO should we move setMeasure stuff into the Cluster interface?
clustering.get(i).setMeasureValue("F1-P", Double.toString(f1));
}
}
if (realClusters > 0) {
F1_P /= realClusters;
purity /= realClusters;
}
addValue("F1-P", F1_P);
addValue("Purity", purity);
// F1 as defined in .... mainly maximizes F1 for each class
double F1_R = 0.0;
for (int j = 0; j < numClasses; j++) {
double max_f1 = 0;
for (int i = 0; i < clustering.size(); i++) {
double precision = mm.getClusterClassWeight(i, j) / (double) mm.getClusterSum(i);
double recall = mm.getClusterClassWeight(i, j) / (double) mm.getClassSum(j);
double f1 = 0;
if (precision > 0 || recall > 0) {
f1 = 2 * precision * recall / (precision + recall);
}
if (max_f1 < f1) {
max_f1 = f1;
}
}
F1_R += max_f1;
}
F1_R /= numClasses;
addValue("F1-R", F1_R);
}
}