blob: ba935a5749641c61c764dc17e1a5e82761f92e83 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.apache.samoa.learners.classifiers.trees;
import java.util.HashMap;
import java.util.Map;
import org.apache.samoa.instances.Instance;
import org.apache.samoa.moa.classifiers.core.AttributeSplitSuggestion;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ActiveLearningNode extends LearningNode {
private static final long serialVersionUID = -2892102872646338908L;
private static final Logger logger = LoggerFactory.getLogger(ActiveLearningNode.class);
protected double weightSeenAtLastSplitEvaluation;
protected Map<Integer, String> attributeContentEventKeys;
protected AttributeSplitSuggestion bestSuggestion;
protected AttributeSplitSuggestion secondBestSuggestion;
protected long id;
protected int parallelismHint;
protected int suggestionCtr;
protected int thrownAwayInstance;
protected boolean isSplitting;
ActiveLearningNode(double[] classObservation, int parallelismHint) {
this.weightSeenAtLastSplitEvaluation = this.getWeightSeen(); = VerticalHoeffdingTree.LearningNodeIdGenerator.generate();
this.attributeContentEventKeys = new HashMap<>();
this.isSplitting = false;
this.parallelismHint = parallelismHint;
protected long getId() {
return id;
protected AttributeBatchContentEvent[] attributeBatchContentEvent;
public AttributeBatchContentEvent[] getAttributeBatchContentEvent() {
return this.attributeBatchContentEvent;
public void setAttributeBatchContentEvent(AttributeBatchContentEvent[] attributeBatchContentEvent) {
this.attributeBatchContentEvent = attributeBatchContentEvent;
public void learnFromInstance(Instance inst, ModelAggregatorProcessor proc) {
// TODO: what statistics should we keep for unused instance?
if (isSplitting) { // currently throw all instance will splitting
this.observedClassDistribution.addToValue((int) inst.classValue(),
// done: parallelize by sending attributes one by one
// TODO: meanwhile, we can try to use the ThreadPool to execute it
// separately
// TODO: parallelize by sending in batch, i.e. split the attributes into
// chunk instead of send the attribute one by one
for (int i = 0; i < inst.numAttributes() - 1; i++) {
int instAttIndex = modelAttIndexToInstanceAttIndex(i, inst);
Integer obsIndex = i;
String key = attributeContentEventKeys.get(obsIndex);
if (key == null) {
key = this.generateKey(i);
attributeContentEventKeys.put(obsIndex, key);
AttributeContentEvent ace = new AttributeContentEvent.Builder(, i, key)
.classValue((int) inst.classValue())
if (this.attributeBatchContentEvent == null) {
this.attributeBatchContentEvent = new AttributeBatchContentEvent[inst.numAttributes() - 1];
if (this.attributeBatchContentEvent[i] == null) {
this.attributeBatchContentEvent[i] = new AttributeBatchContentEvent.Builder(, i, key)
// .attrValue(inst.value(instAttIndex))
// .classValue((int) inst.classValue())
// .weight(inst.weight()]
// proc.sendToAttributeStream(ace);
public double[] getClassVotes(Instance inst, ModelAggregatorProcessor map) {
return this.observedClassDistribution.getArrayCopy();
public double getWeightSeen() {
return this.observedClassDistribution.sumOfValues();
public void setWeightSeenAtLastSplitEvaluation(double weight) {
this.weightSeenAtLastSplitEvaluation = weight;
public double getWeightSeenAtLastSplitEvaluation() {
return this.weightSeenAtLastSplitEvaluation;
public void requestDistributedSuggestions(long splitId, ModelAggregatorProcessor modelAggrProc) {
this.isSplitting = true;
this.suggestionCtr = 0;
this.thrownAwayInstance = 0;
ComputeContentEvent cce = new ComputeContentEvent(splitId,,
public void addDistributedSuggestions(AttributeSplitSuggestion bestSuggestion, AttributeSplitSuggestion secondBestSuggestion) {
// starts comparing from the best suggestion
if (bestSuggestion != null) {
if ((this.bestSuggestion == null) || (bestSuggestion.compareTo(this.bestSuggestion) > 0)) {
this.secondBestSuggestion = this.bestSuggestion;
this.bestSuggestion = bestSuggestion;
if (secondBestSuggestion != null) {
if ((this.secondBestSuggestion == null) || (secondBestSuggestion.compareTo(this.secondBestSuggestion) > 0)) {
this.secondBestSuggestion = secondBestSuggestion;
} else {
if ((this.secondBestSuggestion == null) || (bestSuggestion.compareTo(this.secondBestSuggestion) > 0)) {
this.secondBestSuggestion = bestSuggestion;
// TODO: optimize the code to use less memory
public boolean isSplitting() {
return this.isSplitting;
void endSplitting() {
this.isSplitting = false;
logger.trace("wasted instance: {}", this.thrownAwayInstance);
this.thrownAwayInstance = 0;
this.bestSuggestion = null;
this.secondBestSuggestion = null;
public AttributeSplitSuggestion getDistributedBestSuggestion() {
return this.bestSuggestion;
public AttributeSplitSuggestion getDistributedSecondBestSuggestion() {
return this.secondBestSuggestion;
public boolean isAllSuggestionsCollected() {
return (this.suggestionCtr == this.parallelismHint);
private static int modelAttIndexToInstanceAttIndex(int index, Instance inst) {
return inst.classIndex() > index ? index : index + 1;
protected String generateKey(int obsIndex) {
final int prime = 31;
int result = 1;
result = prime * result + (int) ( ^ ( >>> 32));
result = prime * result + obsIndex;
return Integer.toString(result);