SAMOA-43: Add TextReader
diff --git a/samoa-api/src/main/java/org/apache/samoa/streams/TextGenerator.java b/samoa-api/src/main/java/org/apache/samoa/streams/TextGenerator.java
new file mode 100644
index 0000000..c165f33
--- /dev/null
+++ b/samoa-api/src/main/java/org/apache/samoa/streams/TextGenerator.java
@@ -0,0 +1,205 @@
+package org.apache.samoa.streams;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import com.github.javacliparser.IntOption;
+import org.apache.samoa.instances.*;
+import org.apache.samoa.moa.core.InstanceExample;
+import org.apache.samoa.moa.core.ObjectRepository;
+import org.apache.samoa.moa.options.AbstractOptionHandler;
+import org.apache.samoa.moa.streams.InstanceStream;
+import org.apache.samoa.moa.tasks.TaskMonitor;
+
+import java.util.ArrayList;
+import java.util.Random;
+
+/**
+ * Text generator that simulates sentiment analysis on tweets.
+ */
+public class TextGenerator extends AbstractOptionHandler implements InstanceStream {
+
+    private static final long serialVersionUID = 3028905554604259131L;
+
+    public IntOption numAttsOption = new IntOption("numAtts", 'a',
+            "The number of attributes to generate.", 1000, 0, Integer.MAX_VALUE);
+
+    public IntOption instanceRandomSeedOption = new IntOption(
+            "instanceRandomSeed", 'i',
+            "Seed for random generation of instances.", 1);
+
+    protected InstancesHeader streamHeader;
+
+    protected Random instanceRandom;
+
+    protected int[] wordTwitterGenerator;
+    protected double[] freqTwitterGenerator;
+    protected double[] sumFreqTwitterGenerator;
+    protected int[] classTwitterGenerator;
+
+    protected int sizeTable;
+    protected double probPositive = 0.1;
+    protected double probNegative = 0.1;
+    protected double zipfExponent = 1.5;
+    protected double lengthTweet = 15;
+
+    protected int countTweets = 0;
+
+    @Override
+    public InstancesHeader getHeader() {
+        return this.streamHeader;
+    }
+
+    @Override
+    public long estimatedRemainingInstances() {
+        return -1;
+    }
+
+    @Override
+    public boolean hasMoreInstances() {
+        return true;
+    }
+
+    @Override
+    public InstanceExample nextInstance() {
+        int[] votes;
+        double[] attVals;
+        attVals = new double[this.numAttsOption.getValue() + 1];
+
+        do {
+            int length = (int) (lengthTweet * (1.0 + this.instanceRandom.nextGaussian()));
+            if (length < 1) length = 1;
+            votes = new int[3];
+            for (int j = 0; j < length; j++) {
+                double rand = this.instanceRandom.nextDouble();
+                //binary search
+                int i = 0;
+                int min = 0;
+                int max = sizeTable - 1;
+                int mid;
+                do {
+                    mid = (min + max) / 2;
+                    if (rand > this.sumFreqTwitterGenerator[mid]) {
+                        min = mid + 1;
+                    } else {
+                        max = mid - 1;
+                    }
+                } while ((this.sumFreqTwitterGenerator[mid] != rand) && (min <= max));
+
+                attVals[this.wordTwitterGenerator[mid]] = 1;
+                votes[this.classTwitterGenerator[mid]]++;
+
+            }
+        } while (votes[1] == votes[2]);
+
+        Instance inst = new DenseInstance(1.0, attVals);
+        inst.setDataset(getHeader());
+        inst.setClassValue((votes[1] > votes[2]) ? 0 : 1);
+        this.countTweets++;
+        return new InstanceExample(inst);
+    }
+
+    @Override
+    public boolean isRestartable() {
+        return true;
+    }
+
+    @Override
+    public void restart() {
+
+        this.sizeTable = this.numAttsOption.getValue();
+
+        //Prepare table of words to generate tweets
+        this.wordTwitterGenerator = new int[sizeTable];
+        this.freqTwitterGenerator = new double[sizeTable];
+        this.sumFreqTwitterGenerator = new double[sizeTable];
+        this.classTwitterGenerator = new int[sizeTable];
+
+        this.countTweets = 0;
+
+        double sum = 0;
+        this.instanceRandom = new Random(this.instanceRandomSeedOption.getValue());
+        for (int i = 0; i < this.sizeTable; i++) {
+            this.wordTwitterGenerator[i] = i + 1;
+            this.freqTwitterGenerator[i] = 1.0 / Math.pow(i + 1, zipfExponent);
+            sum += this.freqTwitterGenerator[i];
+            this.sumFreqTwitterGenerator[i] = sum;
+            double rand = this.instanceRandom.nextDouble();
+            this.classTwitterGenerator[i] = (rand < probPositive ? 1 : (rand < probNegative + probPositive ? 2 : 0));
+        }
+        for (int i = 0; i < this.sizeTable; i++) {
+            this.freqTwitterGenerator[i] /= sum;
+            this.sumFreqTwitterGenerator[i] /= sum;
+        }
+
+    }
+
+    @Override
+    protected void prepareForUseImpl(TaskMonitor monitor, ObjectRepository repository) {
+        generateHeader();
+        restart();
+    }
+
+    @Override
+    public void getDescription(StringBuilder sb, int indent) {
+
+    }
+    private void generateHeader() {
+        ArrayList<Attribute> attributes = new ArrayList();
+        for (int i = 0; i < this.numAttsOption.getValue(); i++) {
+            attributes.add(new Attribute("att" + (i + 1)));
+        }
+        ArrayList<String>  classLabels = new ArrayList();
+        for (int i = 0; i < 2; i++) {
+            classLabels.add("class" + (i + 1));
+        }
+        attributes.add(new Attribute("class", classLabels));
+        this.streamHeader = new InstancesHeader(new Instances(
+                getCLICreationString(InstanceStream.class), attributes, 0));
+        this.streamHeader.setClassIndex(this.streamHeader.numAttributes() - 1);
+    }
+
+
+    public void changePolarity(int numberWords) {
+        for (int i = 0; i < numberWords; ) {
+            int randWord = this.instanceRandom.nextInt(this.sizeTable);
+            int polarity = this.classTwitterGenerator[randWord];
+            if (polarity == 1) {
+                this.classTwitterGenerator[i] = 2;
+                i++;
+            }
+            if (polarity == 2) {
+                this.classTwitterGenerator[i] = 1;
+                i++;
+            }
+        }
+    }
+
+    public void changeFreqWords(int numberWords) {
+        for (int i = 0; i < numberWords; i++) {
+            int randWordTo = this.instanceRandom.nextInt(this.sizeTable);
+            int randWordFrom = this.instanceRandom.nextInt(this.sizeTable);
+            this.wordTwitterGenerator[randWordTo] = randWordFrom;
+            this.wordTwitterGenerator[randWordFrom] = randWordTo;
+        }
+    }
+
+
+}