blob: 822c2be6dd3ec009aef2ff36871ccc72d75b759f [file] [log] [blame]
package org.apache.samoa.learners.classifiers.rules;
/*
* #%L
* SAMOA
* %%
* Copyright (C) 2014 - 2015 Apache Software Foundation
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.google.common.collect.ImmutableSet;
import java.util.Set;
import org.apache.samoa.core.Processor;
import org.apache.samoa.instances.Instances;
import org.apache.samoa.learners.RegressionLearner;
import org.apache.samoa.learners.classifiers.rules.distributed.AMRDefaultRuleProcessor;
import org.apache.samoa.learners.classifiers.rules.distributed.AMRLearnerProcessor;
import org.apache.samoa.learners.classifiers.rules.distributed.AMRRuleSetProcessor;
import org.apache.samoa.moa.classifiers.rules.core.attributeclassobservers.FIMTDDNumericAttributeClassLimitObserver;
import org.apache.samoa.topology.Stream;
import org.apache.samoa.topology.TopologyBuilder;
import com.github.javacliparser.ClassOption;
import com.github.javacliparser.Configurable;
import com.github.javacliparser.FlagOption;
import com.github.javacliparser.FloatOption;
import com.github.javacliparser.IntOption;
import com.github.javacliparser.MultiChoiceOption;
/**
* Horizontal AMRules Regressor is a distributed learner for regression rules learner. It applies both horizontal
* parallelism (dividing incoming streams) and vertical parallelism on AMRules algorithm.
*
* @author Anh Thu Vu
*
*/
public class HorizontalAMRulesRegressor implements RegressionLearner, Configurable {
/**
*
*/
private static final long serialVersionUID = 2785944439173586051L;
// Options
public FloatOption splitConfidenceOption = new FloatOption(
"splitConfidence",
'c',
"Hoeffding Bound Parameter. The allowable error in split decision, values closer to 0 will take longer to decide.",
0.0000001, 0.0, 1.0);
public FloatOption tieThresholdOption = new FloatOption("tieThreshold",
't', "Hoeffding Bound Parameter. Threshold below which a split will be forced to break ties.",
0.05, 0.0, 1.0);
public IntOption gracePeriodOption = new IntOption("gracePeriod",
'g', "Hoeffding Bound Parameter. The number of instances a leaf should observe between split attempts.",
200, 1, Integer.MAX_VALUE);
public FlagOption DriftDetectionOption = new FlagOption("DoNotDetectChanges", 'H',
"Drift Detection. Page-Hinkley.");
public FloatOption pageHinckleyAlphaOption = new FloatOption(
"pageHinckleyAlpha",
'a',
"The alpha value to use in the Page Hinckley change detection tests.",
0.005, 0.0, 1.0);
public IntOption pageHinckleyThresholdOption = new IntOption(
"pageHinckleyThreshold",
'l',
"The threshold value (Lambda) to be used in the Page Hinckley change detection tests.",
35, 0, Integer.MAX_VALUE);
public FlagOption noAnomalyDetectionOption = new FlagOption("noAnomalyDetection", 'A',
"Disable anomaly Detection.");
public FloatOption multivariateAnomalyProbabilityThresholdOption = new FloatOption(
"multivariateAnomalyProbabilityThresholdd",
'm',
"Multivariate anomaly threshold value.",
0.99, 0.0, 1.0);
public FloatOption univariateAnomalyProbabilityThresholdOption = new FloatOption(
"univariateAnomalyprobabilityThreshold",
'u',
"Univariate anomaly threshold value.",
0.10, 0.0, 1.0);
public IntOption anomalyNumInstThresholdOption = new IntOption(
"anomalyThreshold",
'n',
"The threshold value of anomalies to be used in the anomaly detection.",
30, 0, Integer.MAX_VALUE); // num minimum of instances to detect anomalies. 15.
public FlagOption unorderedRulesOption = new FlagOption("setUnorderedRulesOn", 'U',
"unorderedRules.");
public ClassOption numericObserverOption = new ClassOption("numericObserver",
'z', "Numeric observer.",
FIMTDDNumericAttributeClassLimitObserver.class,
"FIMTDDNumericAttributeClassLimitObserver");
public MultiChoiceOption predictionFunctionOption = new MultiChoiceOption(
"predictionFunctionOption", 'P', "The prediction function to use.", new String[] {
"Adaptative", "Perceptron", "Target Mean" }, new String[] {
"Adaptative", "Perceptron", "Target Mean" }, 0);
public FlagOption constantLearningRatioDecayOption = new FlagOption(
"learningRatio_Decay_set_constant", 'd',
"Learning Ratio Decay in Perceptron set to be constant. (The next parameter).");
public FloatOption learningRatioOption = new FloatOption(
"learningRatio", 's',
"Constante Learning Ratio to use for training the Perceptrons in the leaves.", 0.025);
public MultiChoiceOption votingTypeOption = new MultiChoiceOption(
"votingType", 'V', "Voting Type.", new String[] {
"InverseErrorWeightedVote", "UniformWeightedVote" }, new String[] {
"InverseErrorWeightedVote", "UniformWeightedVote" }, 0);
public IntOption learnerParallelismOption = new IntOption(
"leanerParallelism",
'p',
"The number of local statistics PI to do distributed computation",
1, 1, Integer.MAX_VALUE);
public IntOption ruleSetParallelismOption = new IntOption(
"modelParallelism",
'r',
"The number of replicated model (rule set) PIs",
1, 1, Integer.MAX_VALUE);
// Processor
private AMRRuleSetProcessor model;
private Stream modelResultStream;
private Stream rootResultStream;
// private Stream resultStream;
@Override
public void init(TopologyBuilder topologyBuilder, Instances dataset, int parallelism) {
// Create MODEL PIs
this.model = new AMRRuleSetProcessor.Builder(dataset)
.noAnomalyDetection(noAnomalyDetectionOption.isSet())
.multivariateAnomalyProbabilityThreshold(multivariateAnomalyProbabilityThresholdOption.getValue())
.univariateAnomalyProbabilityThreshold(univariateAnomalyProbabilityThresholdOption.getValue())
.anomalyNumberOfInstancesThreshold(anomalyNumInstThresholdOption.getValue())
.unorderedRules(unorderedRulesOption.isSet())
.voteType(votingTypeOption.getChosenIndex())
.build();
topologyBuilder.addProcessor(model, this.ruleSetParallelismOption.getValue());
// MODEL PIs streams
Stream forwardToRootStream = topologyBuilder.createStream(this.model);
Stream forwardToLearnerStream = topologyBuilder.createStream(this.model);
this.modelResultStream = topologyBuilder.createStream(this.model);
this.model.setDefaultRuleStream(forwardToRootStream);
this.model.setStatisticsStream(forwardToLearnerStream);
this.model.setResultStream(this.modelResultStream);
// Create DefaultRule PI
AMRDefaultRuleProcessor root = new AMRDefaultRuleProcessor.Builder(dataset)
.threshold(pageHinckleyThresholdOption.getValue())
.alpha(pageHinckleyAlphaOption.getValue())
.changeDetection(this.DriftDetectionOption.isSet())
.predictionFunction(predictionFunctionOption.getChosenIndex())
.constantLearningRatioDecay(constantLearningRatioDecayOption.isSet())
.learningRatio(learningRatioOption.getValue())
.splitConfidence(splitConfidenceOption.getValue())
.tieThreshold(tieThresholdOption.getValue())
.gracePeriod(gracePeriodOption.getValue())
.numericObserver((FIMTDDNumericAttributeClassLimitObserver) numericObserverOption.getValue())
.build();
topologyBuilder.addProcessor(root);
// Default Rule PI streams
Stream newRuleStream = topologyBuilder.createStream(root);
this.rootResultStream = topologyBuilder.createStream(root);
root.setRuleStream(newRuleStream);
root.setResultStream(this.rootResultStream);
// Create Learner PIs
AMRLearnerProcessor learner = new AMRLearnerProcessor.Builder(dataset)
.splitConfidence(splitConfidenceOption.getValue())
.tieThreshold(tieThresholdOption.getValue())
.gracePeriod(gracePeriodOption.getValue())
.noAnomalyDetection(noAnomalyDetectionOption.isSet())
.multivariateAnomalyProbabilityThreshold(multivariateAnomalyProbabilityThresholdOption.getValue())
.univariateAnomalyProbabilityThreshold(univariateAnomalyProbabilityThresholdOption.getValue())
.anomalyNumberOfInstancesThreshold(anomalyNumInstThresholdOption.getValue())
.build();
topologyBuilder.addProcessor(learner, this.learnerParallelismOption.getValue());
Stream predicateStream = topologyBuilder.createStream(learner);
learner.setOutputStream(predicateStream);
// Connect streams
// to MODEL
topologyBuilder.connectInputAllStream(newRuleStream, this.model);
topologyBuilder.connectInputAllStream(predicateStream, this.model);
// to ROOT
topologyBuilder.connectInputShuffleStream(forwardToRootStream, root);
// to LEARNER
topologyBuilder.connectInputKeyStream(forwardToLearnerStream, learner);
topologyBuilder.connectInputAllStream(newRuleStream, learner);
}
@Override
public Processor getInputProcessor() {
return model;
}
@Override
public Set<Stream> getResultStreams() {
Set<Stream> streams = ImmutableSet.of(this.modelResultStream, this.rootResultStream);
return streams;
}
}