blob: 6af0df278229fc0ec4be5a30f2a1aad0f8dc8e9e [file] [log] [blame]
package org.apache.samoa.streams;
/*
* #%L
* SAMOA
* %%
* Copyright (C) 2014 - 2015 Apache Software Foundation
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import com.github.javacliparser.IntOption;
import org.apache.samoa.instances.*;
import org.apache.samoa.moa.core.InstanceExample;
import org.apache.samoa.moa.core.ObjectRepository;
import org.apache.samoa.moa.options.AbstractOptionHandler;
import org.apache.samoa.streams.InstanceStream;
import org.apache.samoa.moa.tasks.TaskMonitor;
import java.util.ArrayList;
import java.util.Random;
/**
* Text generator that simulates sentiment analysis on tweets.
*/
public class TextGenerator extends AbstractOptionHandler implements InstanceStream {
private static final long serialVersionUID = 3028905554604259131L;
public IntOption numAttsOption = new IntOption("numAtts", 'a',
"The number of attributes to generate.", 1000, 0, Integer.MAX_VALUE);
public IntOption instanceRandomSeedOption = new IntOption(
"instanceRandomSeed", 'i',
"Seed for random generation of instances.", 1);
protected InstancesHeader streamHeader;
protected Random instanceRandom;
protected int[] wordTwitterGenerator;
protected double[] freqTwitterGenerator;
protected double[] sumFreqTwitterGenerator;
protected int[] classTwitterGenerator;
protected int sizeTable;
protected double probPositive = 0.1;
protected double probNegative = 0.1;
protected double zipfExponent = 1.5;
protected double lengthTweet = 15;
protected int countTweets = 0;
@Override
public InstancesHeader getHeader() {
return this.streamHeader;
}
@Override
public long estimatedRemainingInstances() {
return -1;
}
@Override
public boolean hasMoreInstances() {
return true;
}
@Override
public InstanceExample nextInstance() {
int[] votes;
double[] attVals;
attVals = new double[this.numAttsOption.getValue() + 1];
do {
int length = (int) (lengthTweet * (1.0 + this.instanceRandom.nextGaussian()));
if (length < 1) length = 1;
votes = new int[3];
for (int j = 0; j < length; j++) {
double rand = this.instanceRandom.nextDouble();
//binary search
int i = 0;
int min = 0;
int max = sizeTable - 1;
int mid;
do {
mid = (min + max) / 2;
if (rand > this.sumFreqTwitterGenerator[mid]) {
min = mid + 1;
} else {
max = mid - 1;
}
} while ((this.sumFreqTwitterGenerator[mid] != rand) && (min <= max));
attVals[this.wordTwitterGenerator[mid]] = 1;
votes[this.classTwitterGenerator[mid]]++;
}
} while (votes[1] == votes[2]);
Instance inst = new DenseInstance(1.0, attVals);
inst.setDataset(getHeader());
inst.setClassValue((votes[1] > votes[2]) ? 0 : 1);
this.countTweets++;
return new InstanceExample(inst);
}
@Override
public boolean isRestartable() {
return true;
}
@Override
public void restart() {
this.sizeTable = this.numAttsOption.getValue();
//Prepare table of words to generate tweets
this.wordTwitterGenerator = new int[sizeTable];
this.freqTwitterGenerator = new double[sizeTable];
this.sumFreqTwitterGenerator = new double[sizeTable];
this.classTwitterGenerator = new int[sizeTable];
this.countTweets = 0;
double sum = 0;
this.instanceRandom = new Random(this.instanceRandomSeedOption.getValue());
for (int i = 0; i < this.sizeTable; i++) {
this.wordTwitterGenerator[i] = i + 1;
this.freqTwitterGenerator[i] = 1.0 / Math.pow(i + 1, zipfExponent);
sum += this.freqTwitterGenerator[i];
this.sumFreqTwitterGenerator[i] = sum;
double rand = this.instanceRandom.nextDouble();
this.classTwitterGenerator[i] = (rand < probPositive ? 1 : (rand < probNegative + probPositive ? 2 : 0));
}
for (int i = 0; i < this.sizeTable; i++) {
this.freqTwitterGenerator[i] /= sum;
this.sumFreqTwitterGenerator[i] /= sum;
}
}
@Override
protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) {
generateHeader();
restart();
}
@Override
public void getDescription(StringBuilder sb, int indent) {
}
private void generateHeader() {
ArrayList<Attribute> attributes = new ArrayList();
for (int i = 0; i < this.numAttsOption.getValue(); i++) {
attributes.add(new Attribute("att" + (i + 1)));
}
ArrayList<String> classLabels = new ArrayList();
for (int i = 0; i < 2; i++) {
classLabels.add("class" + (i + 1));
}
attributes.add(new Attribute("class", classLabels));
this.streamHeader = new InstancesHeader(new Instances(
getCLICreationString(InstanceStream.class), attributes, 0));
this.streamHeader.setClassIndex(this.streamHeader.numAttributes() - 1);
}
public void changePolarity(int numberWords) {
for (int i = 0; i < numberWords; ) {
int randWord = this.instanceRandom.nextInt(this.sizeTable);
int polarity = this.classTwitterGenerator[randWord];
if (polarity == 1) {
this.classTwitterGenerator[i] = 2;
i++;
}
if (polarity == 2) {
this.classTwitterGenerator[i] = 1;
i++;
}
}
}
public void changeFreqWords(int numberWords) {
for (int i = 0; i < numberWords; i++) {
int randWordTo = this.instanceRandom.nextInt(this.sizeTable);
int randWordFrom = this.instanceRandom.nextInt(this.sizeTable);
this.wordTwitterGenerator[randWordTo] = randWordFrom;
this.wordTwitterGenerator[randWordFrom] = randWordTo;
}
}
}