| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.samoa.moa.classifiers.core.attributeclassobservers; |
| |
| import org.apache.samoa.moa.classifiers.core.AttributeSplitSuggestion; |
| import org.apache.samoa.moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest; |
| import org.apache.samoa.moa.classifiers.core.splitcriteria.SplitCriterion; |
| import org.apache.samoa.moa.core.AutoExpandVector; |
| import org.apache.samoa.moa.core.DoubleVector; |
| import org.apache.samoa.moa.core.GreenwaldKhannaQuantileSummary; |
| import org.apache.samoa.moa.core.ObjectRepository; |
| import org.apache.samoa.moa.core.Utils; |
| import org.apache.samoa.moa.options.AbstractOptionHandler; |
| import org.apache.samoa.moa.tasks.TaskMonitor; |
| |
| import com.github.javacliparser.IntOption; |
| |
| /** |
| * Class for observing the class data distribution for a numeric attribute using Greenwald and Khanna methodology. This |
| * observer monitors the class distribution of a given attribute. Used in naive Bayes and decision trees to monitor data |
| * statistics on leaves. |
| * |
| * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) |
| * @version $Revision: 7 $ |
| */ |
| public class GreenwaldKhannaNumericAttributeClassObserver extends AbstractOptionHandler implements |
| NumericAttributeClassObserver { |
| |
| private static final long serialVersionUID = 1L; |
| |
| protected AutoExpandVector<GreenwaldKhannaQuantileSummary> attValDistPerClass = new AutoExpandVector<>(); |
| |
| public IntOption numTuplesOption = new IntOption("numTuples", 'n', |
| "The number of tuples.", 10, 1, Integer.MAX_VALUE); |
| |
| @Override |
| public void observeAttributeClass(double attVal, int classVal, double weight) { |
| if (!Utils.isMissingValue(attVal)) { |
| GreenwaldKhannaQuantileSummary valDist = this.attValDistPerClass.get(classVal); |
| if (valDist == null) { |
| valDist = new GreenwaldKhannaQuantileSummary(this.numTuplesOption.getValue()); |
| this.attValDistPerClass.set(classVal, valDist); |
| } |
| // TODO: not taking weight into account |
| valDist.insert(attVal); |
| } |
| } |
| |
| @Override |
| public double probabilityOfAttributeValueGivenClass(double attVal, |
| int classVal) { |
| // TODO: NaiveBayes broken until implemented |
| return 0.0; |
| } |
| |
| @Override |
| public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion( |
| SplitCriterion criterion, double[] preSplitDist, int attIndex, |
| boolean binaryOnly) { |
| AttributeSplitSuggestion bestSuggestion = null; |
| for (GreenwaldKhannaQuantileSummary qs : this.attValDistPerClass) { |
| if (qs != null) { |
| double[] cutpoints = qs.getSuggestedCutpoints(); |
| for (double cutpoint : cutpoints) { |
| double[][] postSplitDists = getClassDistsResultingFromBinarySplit(cutpoint); |
| double merit = criterion.getMeritOfSplit(preSplitDist, |
| postSplitDists); |
| if ((bestSuggestion == null) |
| || (merit > bestSuggestion.merit)) { |
| bestSuggestion = new AttributeSplitSuggestion( |
| new NumericAttributeBinaryTest(attIndex, |
| cutpoint, true), postSplitDists, merit); |
| } |
| } |
| } |
| } |
| return bestSuggestion; |
| } |
| |
| // assume all values equal to splitValue go to lhs |
| public double[][] getClassDistsResultingFromBinarySplit(double splitValue) { |
| DoubleVector lhsDist = new DoubleVector(); |
| DoubleVector rhsDist = new DoubleVector(); |
| for (int i = 0; i < this.attValDistPerClass.size(); i++) { |
| GreenwaldKhannaQuantileSummary estimator = this.attValDistPerClass.get(i); |
| if (estimator != null) { |
| long countBelow = estimator.getCountBelow(splitValue); |
| lhsDist.addToValue(i, countBelow); |
| rhsDist.addToValue(i, estimator.getTotalCount() - countBelow); |
| } |
| } |
| return new double[][] { lhsDist.getArrayRef(), rhsDist.getArrayRef() }; |
| } |
| |
| @Override |
| public void getDescription(StringBuilder sb, int indent) { |
| // TODO Auto-generated method stub |
| } |
| |
| @Override |
| protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) { |
| // TODO Auto-generated method stub |
| } |
| |
| @Override |
| public void observeAttributeTarget(double attVal, double target) { |
| throw new UnsupportedOperationException("Not supported yet."); |
| } |
| } |