package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers;

/*
 * #%L
 * SAMOA
 * %%
 *    Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import com.yahoo.labs.samoa.moa.core.ObjectRepository;
import com.yahoo.labs.samoa.moa.tasks.TaskMonitor;
import com.yahoo.labs.samoa.moa.core.Utils;

import java.util.Set;
import java.util.TreeSet;
import com.yahoo.labs.samoa.moa.classifiers.core.AttributeSplitSuggestion;
import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.NumericAttributeBinaryTest;
import com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria.SplitCriterion;

import com.yahoo.labs.samoa.moa.core.AutoExpandVector;
import com.yahoo.labs.samoa.moa.core.DoubleVector;
import com.yahoo.labs.samoa.moa.core.GaussianEstimator;
import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler;
import com.github.javacliparser.IntOption;

/**
 * Class for observing the class data distribution for a numeric attribute using gaussian estimators. This observer
 * monitors the class distribution of a given attribute. Used in naive Bayes and decision trees to monitor data
 * statistics on leaves.
 * 
 * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
 * @version $Revision: 7 $
 */
public class GaussianNumericAttributeClassObserver extends AbstractOptionHandler
    implements NumericAttributeClassObserver {

  private static final long serialVersionUID = 1L;

  protected DoubleVector minValueObservedPerClass = new DoubleVector();

  protected DoubleVector maxValueObservedPerClass = new DoubleVector();

  protected AutoExpandVector<GaussianEstimator> attValDistPerClass = new AutoExpandVector<>();

  /**
   * @param classVal
   * @return The requested Estimator if it exists, or null if not present.
   */
  public GaussianEstimator getEstimator(int classVal) {
    return this.attValDistPerClass.get(classVal);
  }

  public IntOption numBinsOption = new IntOption("numBins", 'n',
      "The number of bins.", 10, 1, Integer.MAX_VALUE);

  @Override
  public void observeAttributeClass(double attVal, int classVal, double weight) {
    if (!Utils.isMissingValue(attVal)) {
      GaussianEstimator valDist = this.attValDistPerClass.get(classVal);
      if (valDist == null) {
        valDist = new GaussianEstimator();
        this.attValDistPerClass.set(classVal, valDist);
        this.minValueObservedPerClass.setValue(classVal, attVal);
        this.maxValueObservedPerClass.setValue(classVal, attVal);
      } else {
        if (attVal < this.minValueObservedPerClass.getValue(classVal)) {
          this.minValueObservedPerClass.setValue(classVal, attVal);
        }
        if (attVal > this.maxValueObservedPerClass.getValue(classVal)) {
          this.maxValueObservedPerClass.setValue(classVal, attVal);
        }
      }
      valDist.addObservation(attVal, weight);
    }
  }

  @Override
  public double probabilityOfAttributeValueGivenClass(double attVal,
      int classVal) {
    GaussianEstimator obs = this.attValDistPerClass.get(classVal);
    return obs != null ? obs.probabilityDensity(attVal) : 0.0;
  }

  @Override
  public AttributeSplitSuggestion getBestEvaluatedSplitSuggestion(
      SplitCriterion criterion, double[] preSplitDist, int attIndex,
      boolean binaryOnly) {
    AttributeSplitSuggestion bestSuggestion = null;
    double[] suggestedSplitValues = getSplitPointSuggestions();
    for (double splitValue : suggestedSplitValues) {
      double[][] postSplitDists = getClassDistsResultingFromBinarySplit(splitValue);
      double merit = criterion.getMeritOfSplit(preSplitDist,
          postSplitDists);
      if ((bestSuggestion == null) || (merit > bestSuggestion.merit)) {
        bestSuggestion = new AttributeSplitSuggestion(
            new NumericAttributeBinaryTest(attIndex, splitValue,
                true), postSplitDists, merit);
      }
    }
    return bestSuggestion;
  }

  public double[] getSplitPointSuggestions() {
    Set<Double> suggestedSplitValues = new TreeSet<>();
    double minValue = Double.POSITIVE_INFINITY;
    double maxValue = Double.NEGATIVE_INFINITY;
    for (int i = 0; i < this.attValDistPerClass.size(); i++) {
      GaussianEstimator estimator = this.attValDistPerClass.get(i);
      if (estimator != null) {
        if (this.minValueObservedPerClass.getValue(i) < minValue) {
          minValue = this.minValueObservedPerClass.getValue(i);
        }
        if (this.maxValueObservedPerClass.getValue(i) > maxValue) {
          maxValue = this.maxValueObservedPerClass.getValue(i);
        }
      }
    }
    if (minValue < Double.POSITIVE_INFINITY) {
      double range = maxValue - minValue;
      for (int i = 0; i < this.numBinsOption.getValue(); i++) {
        double splitValue = range / (this.numBinsOption.getValue() + 1.0) * (i + 1)
            + minValue;
        if ((splitValue > minValue) && (splitValue < maxValue)) {
          suggestedSplitValues.add(splitValue);
        }
      }
    }
    double[] suggestions = new double[suggestedSplitValues.size()];
    int i = 0;
    for (double suggestion : suggestedSplitValues) {
      suggestions[i++] = suggestion;
    }
    return suggestions;
  }

  // assume all values equal to splitValue go to lhs
  public double[][] getClassDistsResultingFromBinarySplit(double splitValue) {
    DoubleVector lhsDist = new DoubleVector();
    DoubleVector rhsDist = new DoubleVector();
    for (int i = 0; i < this.attValDistPerClass.size(); i++) {
      GaussianEstimator estimator = this.attValDistPerClass.get(i);
      if (estimator != null) {
        if (splitValue < this.minValueObservedPerClass.getValue(i)) {
          rhsDist.addToValue(i, estimator.getTotalWeightObserved());
        } else if (splitValue >= this.maxValueObservedPerClass.getValue(i)) {
          lhsDist.addToValue(i, estimator.getTotalWeightObserved());
        } else {
          double[] weightDist = estimator.estimatedWeight_LessThan_EqualTo_GreaterThan_Value(splitValue);
          lhsDist.addToValue(i, weightDist[0] + weightDist[1]);
          rhsDist.addToValue(i, weightDist[2]);
        }
      }
    }
    return new double[][] { lhsDist.getArrayRef(), rhsDist.getArrayRef() };
  }

  @Override
  public void getDescription(StringBuilder sb, int indent) {
  }

  @Override
  protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) {
  }

  @Override
  public void observeAttributeTarget(double attVal, double target) {
    throw new UnsupportedOperationException("Not supported yet.");
  }
}
