SAMOA-67 : Integration of MOA instances in SAMOA
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java
index a25dc62..dd82eda 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/ArffLoader.java
@@ -29,39 +29,95 @@
import java.util.logging.Level;
import java.util.logging.Logger;
-/**
- * @author abifet
- */
public class ArffLoader implements Loader {
+ /**
+ * The instance information.
+ */
protected InstanceInformation instanceInformation;
- transient protected StreamTokenizer streamTokenizer;
+ protected InstancesHeader streamHeader;
- protected Reader reader;
+ /**
+ * The stream tokenizer.
+ */
+ protected transient StreamTokenizer streamTokenizer;
- protected int size;
-
- protected int classAttribute;
-
- public ArffLoader() {
- }
-
+ /**
+ * Instantiates a new arff loader.
+ *
+ * @param reader the reader
+ * @param size the size
+ * @param classAttribute the class attribute
+ */
public ArffLoader(Reader reader, int size, int classAttribute) {
- this.reader = reader;
- this.size = size;
- this.classAttribute = classAttribute;
- initStreamTokenizer(reader);
+ // size is not used
+ this(reader);
+ if (classAttribute < 0) {
+ this.instanceInformation.setClassIndex(this.instanceInformation.numAttributes() - 1);
+ //System.out.print(this.instanceInformation.classIndex());
+ } else if (classAttribute > 0) {
+ this.instanceInformation.setClassIndex(classAttribute - 1);
+ }
}
+ protected Range range;
+
+ /**
+ * Instantiates a new arff loader.
+ *
+ * @param reader the reader
+ */
+ public ArffLoader(Reader reader) {
+ this(reader, null);
+ }
+
+ /**
+ * Instantiates a new arff loader.
+ *
+ * @param reader the reader
+ * @param range
+ */
+ public ArffLoader(Reader reader, Range range) {
+ this.range = range;
+ BufferedReader br = new BufferedReader(reader);
+
+ //Init streamTokenizer
+ streamTokenizer = new StreamTokenizer(br);
+ streamTokenizer.resetSyntax();
+ streamTokenizer.whitespaceChars(0, ' ');
+ streamTokenizer.wordChars(' ' + 1, '\u00FF');
+ streamTokenizer.whitespaceChars(',', ',');
+ streamTokenizer.commentChar('%');
+ streamTokenizer.quoteChar('"');
+ streamTokenizer.quoteChar('\'');
+ streamTokenizer.ordinaryChar('{');
+ streamTokenizer.ordinaryChar('}');
+ streamTokenizer.eolIsSignificant(true);
+
+ this.instanceInformation = this.getHeader();
+
+ if (range != null) { //is MultiLabel
+ this.instanceInformation.setRangeOutputIndices(range);
+ }
+
+ }
+
+ /**
+ * Gets the structure.
+ *
+ * @return the structure
+ */
public InstanceInformation getStructure() {
return this.instanceInformation;
}
- public Instance readInstance(Reader reader) {
- if (streamTokenizer == null) {
- initStreamTokenizer(reader);
- }
+ /**
+ * Reads instance. It detects if it is dense or sparse.
+ *
+ * @return the instance
+ */
+ public Instance readInstance() {
while (streamTokenizer.ttype == StreamTokenizer.TT_EOL) {
try {
streamTokenizer.nextToken();
@@ -78,94 +134,121 @@
}
+ /**
+ * Reads instance. It detects if it is dense or sparse.
+ *
+ * @return the instance
+ */
+ public Instance readInstance(Reader reader) {
+ while (streamTokenizer.ttype == StreamTokenizer.TT_EOL) {
+ try {
+ streamTokenizer.nextToken();
+ } catch (IOException ex) {
+ Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex);
+ }
+ }
+ if (streamTokenizer.ttype == '{') {
+ return readInstanceSparse();
+ // return readDenseInstanceSparse();
+ } else {
+ return readInstanceDense();
+ }
+
+ }
+
+ /**
+ * Reads a dense instance from the file.
+ *
+ * @return the instance
+ */
public Instance readInstanceDense() {
- Instance instance = new DenseInstance(this.instanceInformation.numAttributes() + 1);
- // System.out.println(this.instanceInformation.numAttributes());
+ Instance instance = newDenseInstance(this.instanceInformation.numAttributes());
+ //System.out.println(this.instanceInformation.numAttributes());
int numAttribute = 0;
try {
while (numAttribute == 0 && streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
- // For each line
+ //For each line
while (streamTokenizer.ttype != StreamTokenizer.TT_EOL
&& streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
- // For each item
+ //For each item
if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) {
- // System.out.println(streamTokenizer.nval + "Num ");
- this.setValue(instance, numAttribute, streamTokenizer.nval, true);
- //numAttribute++;
+ //System.out.println(streamTokenizer.nval + "Num ");
+ instance.setValue(numAttribute, streamTokenizer.nval);//this.setValue(instance, numAttribute, streamTokenizer.nval, true);
+ ++numAttribute;
- } else if (streamTokenizer.sval != null && (
- streamTokenizer.ttype == StreamTokenizer.TT_WORD
- || streamTokenizer.ttype == 34 || streamTokenizer.ttype == 39)) {
- // System.out.println(streamTokenizer.sval + "Str");
- boolean isNumeric = attributes.get(numAttribute).isNumeric();
+ } else if (streamTokenizer.sval != null && (streamTokenizer.ttype == StreamTokenizer.TT_WORD
+ || streamTokenizer.ttype == 34 || streamTokenizer.ttype == 39)) {
+ //System.out.println(streamTokenizer.sval + "Str");
+ boolean isNumeric = this.auxAttributes.get(numAttribute).isNumeric();
double value;
if ("?".equals(streamTokenizer.sval)) {
- value = Double.NaN; // Utils.missingValue();
+ value = Double.NaN; //Utils.missingValue();
} else if (isNumeric == true) {
value = Double.valueOf(streamTokenizer.sval).doubleValue();
} else {
- value = this.instanceInformation.attribute(numAttribute).indexOfValue(
- streamTokenizer.sval);
+ value = this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval);
}
- this.setValue(instance, numAttribute, value, isNumeric);
- //numAttribute++;
+ instance.setValue(numAttribute,value);//this.setValue(instance, numAttribute, value, isNumeric);
+ ++numAttribute;
}
- numAttribute++;
streamTokenizer.nextToken();
}
streamTokenizer.nextToken();
- // System.out.println("EOL");
+ //System.out.println("EOL");
}
} catch (IOException ex) {
Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex);
}
- //System.out.println(instance);
return (numAttribute > 0) ? instance : null;
}
- private void setValue(Instance instance, int numAttribute, double value, boolean isNumber) {
+ protected void setValue(Instance instance, int numAttribute, double value, boolean isNumber) {
double valueAttribute;
- if (this.instanceInformation.attribute(numAttribute).isNominal) {
- valueAttribute = value;
- //this.instanceInformation.attribute(numAttribute).indexOfValue(Double.toString(value));
- // System.out.println(value +"/"+valueAttribute+" ");
+
+ if (isNumber && this.auxAttributes.get(numAttribute).isNominal) {
+ valueAttribute = value;//this.auxAttributes.get(numAttribute).indexOfValue(Double.toString(value));
+ //System.out.println(value +"/"+valueAttribute+" ");
} else {
valueAttribute = value;
- // System.out.println(value +"/"+valueAttribute+" ");
+ //System.out.println(value +"/"+valueAttribute+" ");
}
if (this.instanceInformation.classIndex() == numAttribute) {
- instance.setClassValue(valueAttribute);
- // System.out.println(value
- // +"<"+this.instanceInformation.classIndex()+">");
+ setClassValue(instance, valueAttribute);
+ //System.out.println(value +"<"+this.instanceInformation.classIndex()+">");
} else {
+ //if(numAttribute>this.instanceInformation.classIndex())
+ // numAttribute--;
instance.setValue(numAttribute, valueAttribute);
}
}
+ /**
+ * Reads a sparse instance.
+ *
+ * @return the instance
+ */
private Instance readInstanceSparse() {
- // Return a Sparse Instance
- Instance instance = new SparseInstance(1.0, null); // (this.instanceInformation.numAttributes()
- // + 1);
- // System.out.println(this.instanceInformation.numAttributes());
+ //Return a Sparse Instance
+ Instance instance = newSparseInstance(1.0); //, null); //(this.instanceInformation.numAttributes() + 1);
+ //System.out.println(this.instanceInformation.numAttributes());
int numAttribute;
ArrayList<Double> attributeValues = new ArrayList<Double>();
List<Integer> indexValues = new ArrayList<Integer>();
try {
- // while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
+ //while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
streamTokenizer.nextToken(); // Remove the '{' char
- // For each line
+ //For each line
while (streamTokenizer.ttype != StreamTokenizer.TT_EOL
&& streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
while (streamTokenizer.ttype != '}') {
- // For each item
- // streamTokenizer.nextToken();
- // while (streamTokenizer.ttype != '}'){
- // System.out.println(streamTokenizer.nval +"-"+
- // streamTokenizer.sval);
- // numAttribute = (int) streamTokenizer.nval;
+ //For each item
+ //streamTokenizer.nextToken();
+ //while (streamTokenizer.ttype != '}'){
+ //System.out.println(streamTokenizer.nval +"-"+ streamTokenizer.sval);
+ //numAttribute = (int) streamTokenizer.nval;
if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) {
numAttribute = (int) streamTokenizer.nval;
} else {
@@ -174,32 +257,26 @@
streamTokenizer.nextToken();
if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) {
- // System.out.print(streamTokenizer.nval + " ");
- this.setSparseValue(instance, indexValues, attributeValues, numAttribute,
- streamTokenizer.nval, true);
- // numAttribute++;
+ //System.out.print(streamTokenizer.nval + " ");
+ this.setSparseValue(instance, indexValues, attributeValues, numAttribute, streamTokenizer.nval, true);
+ //numAttribute++;
- } else if (streamTokenizer.sval != null && (
- streamTokenizer.ttype == StreamTokenizer.TT_WORD
- || streamTokenizer.ttype == 34)) {
- // System.out.print(streamTokenizer.sval + "-");
- if (attributes.get(numAttribute).isNumeric()) {
- this.setSparseValue(instance, indexValues, attributeValues, numAttribute,
- Double.valueOf(streamTokenizer.sval).doubleValue(), true);
+ } else if (streamTokenizer.sval != null && (streamTokenizer.ttype == StreamTokenizer.TT_WORD
+ || streamTokenizer.ttype == 34 || streamTokenizer.ttype == 39)) {
+ //System.out.print(streamTokenizer.sval + "-");
+ if (this.auxAttributes.get(numAttribute).isNumeric()) {
+ this.setSparseValue(instance, indexValues, attributeValues, numAttribute, Double.valueOf(streamTokenizer.sval).doubleValue(), true);
} else {
- this.setSparseValue(instance, indexValues, attributeValues, numAttribute,
- this.instanceInformation
- .attribute(numAttribute).indexOfValue(streamTokenizer.sval),
- false);
+ this.setSparseValue(instance, indexValues, attributeValues, numAttribute, this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval), false);
}
}
streamTokenizer.nextToken();
}
- streamTokenizer.nextToken(); // Remove the '}' char
+ streamTokenizer.nextToken(); //Remove the '}' char
}
streamTokenizer.nextToken();
- // System.out.println("EOL");
- // }
+ //System.out.println("EOL");
+ //}
} catch (IOException ex) {
Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex);
@@ -210,78 +287,74 @@
arrayIndexValues[i] = indexValues.get(i).intValue();
arrayAttributeValues[i] = attributeValues.get(i).doubleValue();
}
- instance.addSparseValues(arrayIndexValues, arrayAttributeValues,
- this.instanceInformation.numAttributes());
+ instance.addSparseValues(arrayIndexValues, arrayAttributeValues, this.instanceInformation.numAttributes());
return instance;
}
- private void setSparseValue(Instance instance, List<Integer> indexValues,
- List<Double> attributeValues,
- int numAttribute, double value, boolean isNumber) {
+ private void setSparseValue(Instance instance, List<Integer> indexValues, List<Double> attributeValues, int numAttribute, double value, boolean isNumber) {
double valueAttribute;
- if (isNumber && this.instanceInformation.attribute(numAttribute).isNominal) {
- valueAttribute =
- this.instanceInformation.attribute(numAttribute).indexOfValue(Double.toString(value));
+ if (isNumber && this.auxAttributes.get(numAttribute).isNominal) {
+ valueAttribute = this.auxAttributes.get(numAttribute).indexOfValue(Double.toString(value));
} else {
valueAttribute = value;
}
- if (this.instanceInformation.classIndex() == numAttribute) {
- instance.setClassValue(valueAttribute);
- } else {
- // instance.setValue(numAttribute, valueAttribute);
- indexValues.add(numAttribute);
- attributeValues.add(valueAttribute);
- }
- // System.out.println(numAttribute+":"+valueAttribute+","+this.instanceInformation.classIndex()+","+value);
+ //if (this.instanceInformation.classIndex() == numAttribute) {
+ // setClassValue(instance, valueAttribute);
+ //} else {
+ //instance.setValue(numAttribute, valueAttribute);
+ indexValues.add(numAttribute);
+ attributeValues.add(valueAttribute);
+ //}
+ //System.out.println(numAttribute+":"+valueAttribute+","+this.instanceInformation.classIndex()+","+value);
}
+ /**
+ * Reads an instance sparse and returns a dense one.
+ *
+ * @return the instance
+ */
private Instance readDenseInstanceSparse() {
- // Returns a dense instance
- Instance instance = new DenseInstance(this.instanceInformation.numAttributes() + 1);
- // System.out.println(this.instanceInformation.numAttributes());
+ //Returns a dense instance
+ Instance instance = newDenseInstance(this.instanceInformation.numAttributes());
+ //System.out.println(this.instanceInformation.numAttributes());
int numAttribute;
try {
- // while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
+ //while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
streamTokenizer.nextToken(); // Remove the '{' char
- // For each line
+ //For each line
while (streamTokenizer.ttype != StreamTokenizer.TT_EOL
&& streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
while (streamTokenizer.ttype != '}') {
- // For each item
- // streamTokenizer.nextToken();
- // while (streamTokenizer.ttype != '}'){
- // System.out.print(streamTokenizer.nval+":");
+ //For each item
+ //streamTokenizer.nextToken();
+ //while (streamTokenizer.ttype != '}'){
+ //System.out.print(streamTokenizer.nval+":");
numAttribute = (int) streamTokenizer.nval;
streamTokenizer.nextToken();
if (streamTokenizer.ttype == StreamTokenizer.TT_NUMBER) {
- // System.out.print(streamTokenizer.nval + " ");
- this.setValue(instance, numAttribute, streamTokenizer.nval, true);
- // numAttribute++;
+ //System.out.print(streamTokenizer.nval + " ");
+ instance.setValue(numAttribute, streamTokenizer.nval);//this.setValue(instance, numAttribute, streamTokenizer.nval, true);
+ //numAttribute++;
- } else if (streamTokenizer.sval != null && (
- streamTokenizer.ttype == StreamTokenizer.TT_WORD
+ } else if (streamTokenizer.sval != null && (streamTokenizer.ttype == StreamTokenizer.TT_WORD
|| streamTokenizer.ttype == 34)) {
- // System.out.print(streamTokenizer.sval +
- // "/"+this.instanceInformation.attribute(numAttribute).indexOfValue(streamTokenizer.sval)+" ");
- if (attributes.get(numAttribute).isNumeric()) {
- this.setValue(instance, numAttribute,
- Double.valueOf(streamTokenizer.sval).doubleValue(), true);
+ //System.out.print(streamTokenizer.sval + "/"+this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval)+" ");
+ if (this.auxAttributes.get(numAttribute).isNumeric()) {
+ instance.setValue(numAttribute, Double.valueOf(streamTokenizer.sval).doubleValue());//this.setValue(instance, numAttribute, Double.valueOf(streamTokenizer.sval).doubleValue(), true);
} else {
- this.setValue(instance, numAttribute,
- this.instanceInformation.attribute(numAttribute)
- .indexOfValue(streamTokenizer.sval), false);
- // numAttribute++;
+ instance.setValue(numAttribute, this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval));//this.setValue(instance, numAttribute, this.auxAttributes.get(numAttribute).indexOfValue(streamTokenizer.sval), false);
+ //numAttribute++;
}
}
streamTokenizer.nextToken();
}
- streamTokenizer.nextToken(); // Remove the '}' char
+ streamTokenizer.nextToken(); //Remove the '}' char
}
streamTokenizer.nextToken();
- // System.out.println("EOL");
- // }
+ //System.out.println("EOL");
+ //}
} catch (IOException ex) {
Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex);
@@ -289,30 +362,37 @@
return instance;
}
- protected List<Attribute> attributes;
+ //protected List<Attribute> inputAttributes;
+ // protected List<Attribute> outputAttributes;
+
+ protected List<Attribute> auxAttributes;
private InstanceInformation getHeader() {
-
+ //commented JD
+ //this.range.setUpper(10000); //TO DO: Create a new range object with isInRange that does not need the upper limit
String relation = "file stream";
- // System.out.println("RELATION " + relation);
- attributes = new ArrayList<Attribute>();
+ //System.out.println("RELATION " + relation);
+ //inputAttributes = new ArrayList<Attribute>();
+ //outputAttributes = new ArrayList<Attribute>();
+ //ArrayList<Attribute>
+ auxAttributes = new ArrayList<Attribute>();//JD
+ int numAttributes = 0;
try {
streamTokenizer.nextToken();
while (streamTokenizer.ttype != StreamTokenizer.TT_EOF) {
- // For each line
- // if (streamTokenizer.ttype == '@') {
- if (streamTokenizer.ttype == StreamTokenizer.TT_WORD
- && streamTokenizer.sval.startsWith("@") == true) {
- // streamTokenizer.nextToken();
+ //For each line
+ //if (streamTokenizer.ttype == '@') {
+ if (streamTokenizer.ttype == StreamTokenizer.TT_WORD && streamTokenizer.sval.startsWith("@") == true) {
+ //streamTokenizer.nextToken();
String token = streamTokenizer.sval.toUpperCase();
if (token.startsWith("@RELATION")) {
streamTokenizer.nextToken();
relation = streamTokenizer.sval;
- // System.out.println("RELATION " + relation);
+ // System.out.println("RELATION " + relation);
} else if (token.startsWith("@ATTRIBUTE")) {
streamTokenizer.nextToken();
String name = streamTokenizer.sval;
- // System.out.println("* " + name);
+ //System.out.println("* " + name);
if (name == null) {
name = Double.toString(streamTokenizer.nval);
}
@@ -320,81 +400,95 @@
String type = streamTokenizer.sval;
// System.out.println("* " + name + ":" + type + " ");
if (streamTokenizer.ttype == '{') {
- parseDoubleBrackests(name);
- } else if (streamTokenizer.ttype == 10) {//for the buggy non-formal input arff file
streamTokenizer.nextToken();
- if (streamTokenizer.ttype == '{') {
- parseDoubleBrackests(name);
+ List<String> attributeLabels = new ArrayList<String>();
+ while (streamTokenizer.ttype != '}') {
+
+ if (streamTokenizer.sval != null) {
+ attributeLabels.add(streamTokenizer.sval);
+ // System.out.print(streamTokenizer.sval + ",");
+ } else {
+ attributeLabels.add(Double.toString(streamTokenizer.nval));
+ //System.out.print(streamTokenizer.nval + ",");
+ }
+
+ streamTokenizer.nextToken();
}
+ // System.out.println();
+ //attributes.add(new Attribute(name, attributeLabels));
+ //commented JD
+ /* if (this.range.isInRange(numAttribute)) {
+ outputAttributes.add(new Attribute(name, attributeLabels));
+ } else {
+ inputAttributes.add(new Attribute(name, attributeLabels));
+ }*/
+ auxAttributes.add(new Attribute(name, attributeLabels));
+ ++numAttributes;
} else {
// Add attribute
- attributes.add(new Attribute(name));
+ //commented JD
+ /*if (this.range.isInRange(numAttribute)) {
+ outputAttributes.add(new Attribute(name));
+ } else {
+ inputAttributes.add(new Attribute(name));
+ }*/
+ auxAttributes.add(new Attribute(name));
+ ++numAttributes;
}
} else if (token.startsWith("@DATA")) {
- // System.out.print("END");
+ //System.out.print("END");
streamTokenizer.nextToken();
break;
}
}
streamTokenizer.nextToken();
}
+ if (range != null) {
+ this.range.setUpper(numAttributes);
+ }
+ /*if (range==null) //is single-target. All instances should go to inputAtrributes (see setClassIndex(int) from InstanceInformation )
+ inputAttributes=auxAttributes;
+ else//is multi-target
+ {
+ this.range.setUpper(numAttribute);
+ for (int i=0; i<auxAttributes.size();i++)
+ {
+ //if (this.range.isInRange(i))
+ // outputAttributes.add(auxAttributes.get(i));
+ //else
+ inputAttributes.add(auxAttributes.get(i));
+
+ }
+ }*/
} catch (IOException ex) {
Logger.getLogger(ArffLoader.class.getName()).log(Level.SEVERE, null, ex);
}
- return new InstanceInformation(relation, attributes);
+ // this.range.setUpper(inputAttributes.size()+outputAttributes.size());
+ return new InstanceInformation(relation, auxAttributes);
}
- private void parseDoubleBrackests(String name) throws IOException {
-
- streamTokenizer.nextToken();
- List<String> attributeLabels = new ArrayList<String>();
- while (streamTokenizer.ttype != '}') {
-
- if (streamTokenizer.sval != null) {
- attributeLabels.add(streamTokenizer.sval);
- // System.out.print(streamTokenizer.sval + ",");
- } else {
- attributeLabels.add(Double.toString(streamTokenizer.nval));
- // System.out.print(streamTokenizer.nval + ",");
- }
-
- streamTokenizer.nextToken();
- }
- // System.out.println();
- attributes.add(new Attribute(name, attributeLabels));
-
+ protected Instance newSparseInstance(double d, double[] res) {
+ Instance inst = new SparseInstance(d, res); //is it dense?
+ //inst.setInstanceInformation(this.instanceInformation);
+ return inst;
}
- private void initStreamTokenizer(Reader reader) {
- BufferedReader br = new BufferedReader(reader);
-
- // Init streamTokenizer
- streamTokenizer = new StreamTokenizer(br);
-
- streamTokenizer.resetSyntax();
- streamTokenizer.whitespaceChars(0, ' ');
- streamTokenizer.wordChars(' ' + 1, '\u00FF');
- streamTokenizer.whitespaceChars(',', ',');
- streamTokenizer.commentChar('%');
- streamTokenizer.quoteChar('"');
- streamTokenizer.quoteChar('\'');
- streamTokenizer.ordinaryChar('{');
- streamTokenizer.ordinaryChar('}');
- streamTokenizer.eolIsSignificant(true);
-
- this.instanceInformation = this.getHeader();
- if (classAttribute < 0) {
- this.instanceInformation.setClassIndex(this.instanceInformation.numAttributes() - 1);
- // System.out.print(this.instanceInformation.classIndex());
- } else if (classAttribute > 0) {
- this.instanceInformation.setClassIndex(classAttribute - 1);
- }
+ protected Instance newSparseInstance(double d) {
+ Instance inst = new SparseInstance(d);
+ //inst.setInstanceInformation(this.instanceInformation);
+ return inst;
}
- @Override
- public Instance readInstance() {
- return readInstance(this.reader);
+ protected Instance newDenseInstance(int numberAttributes) {
+ Instance inst = new DenseInstance(numberAttributes);
+ //inst.setInstanceInformation(this.instanceInformation);
+ return inst;
}
+
+ private void setClassValue(Instance instance, double valueAttribute) {
+ instance.setValue(this.instanceInformation.classIndex(), valueAttribute);
+ }
+
}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java
index f14e6c3..128ace7 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Attribute.java
@@ -1,7 +1,3 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
package org.apache.samoa.instances;
/*
@@ -28,53 +24,79 @@
import java.text.SimpleDateFormat;
import java.util.*;
-/**
- * @author abifet
- */
public class Attribute implements Serializable {
- public static final String ARFF_ATTRIBUTE = "@attribute";
- public static final String ARFF_ATTRIBUTE_NUMERIC = "numeric";
- public static final String ARFF_ATTRIBUTE_NOMINAL = "nominal";
- public static final String ARFF_ATTRIBUTE_DATE = "date";
+ /** The keyword used to denote the start of an arff attribute declaration */
+ public final static String ARFF_ATTRIBUTE = "@attribute";
+
+ /** A keyword used to denote a numeric attribute */
+ public final static String ARFF_ATTRIBUTE_INTEGER = "integer";
+
+ /** A keyword used to denote a numeric attribute */
+ public final static String ARFF_ATTRIBUTE_REAL = "real";
+
+ /** A keyword used to denote a numeric attribute */
+ public final static String ARFF_ATTRIBUTE_NUMERIC = "numeric";
+
+ /** The keyword used to denote a string attribute */
+ public final static String ARFF_ATTRIBUTE_STRING = "string";
+
+ /** The keyword used to denote a date attribute */
+ public final static String ARFF_ATTRIBUTE_DATE = "date";
+
+ /** The keyword used to denote a relation-valued attribute */
+ public final static String ARFF_ATTRIBUTE_RELATIONAL = "relational";
+
+ /** The keyword used to denote the end of the declaration of a subrelation */
+ public final static String ARFF_END_SUBRELATION = "@end";
+
+ /** Strings longer than this will be stored compressed. */
+ private static final int STRING_COMPRESS_THRESHOLD = 200;
/**
- *
+ * The is nominal.
*/
protected boolean isNominal;
+
/**
- *
+ * The is numeric.
*/
protected boolean isNumeric;
+
/**
- *
+ * The is date.
*/
protected boolean isDate;
+
+ /**
+ * Date format specification for date attributes
+ */
+ protected SimpleDateFormat m_DateFormat;
+
/**
- *
+ * The name.
*/
protected String name;
+
/**
- *
+ * The attribute values.
*/
protected List<String> attributeValues;
/**
+ * Gets the attribute values.
*
- * @return
+ * @return the attribute values
*/
public List<String> getAttributeValues() {
return attributeValues;
}
- /**
- *
- */
- protected int index;
/**
+ * Instantiates a new attribute.
*
- * @param string
+ * @param string the string
*/
public Attribute(String string) {
this.name = string;
@@ -82,9 +104,10 @@
}
/**
+ * Instantiates a new attribute.
*
- * @param attributeName
- * @param attributeValues
+ * @param attributeName the attribute name
+ * @param attributeValues the attribute values
*/
public Attribute(String attributeName, List<String> attributeValues) {
this.name = attributeName;
@@ -93,48 +116,71 @@
}
/**
+ * Instantiates a new attribute.
*
+ * @param attributeName the attribute name
+ * @param dateFormat the format of the date used
+ */
+ public Attribute(String attributeName, String dateFormat) {
+ this.name = attributeName;
+ this.valuesStringAttribute = null;
+ this.isDate = true;
+
+ if (dateFormat != null) {
+ m_DateFormat = new SimpleDateFormat(dateFormat);
+ } else {
+ m_DateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+ }
+ }
+
+ /**
+ * Instantiates a new attribute.
*/
public Attribute() {
this("");
}
/**
+ * Checks if is nominal.
*
- * @return
+ * @return true, if is nominal
*/
public boolean isNominal() {
return this.isNominal;
}
/**
+ * Name.
*
- * @return
+ * @return the string
*/
public String name() {
return this.name;
}
/**
+ * Value.
*
- * @param value
- * @return
+ * @param value the value
+ * @return the string
*/
public String value(int value) {
return attributeValues.get(value);
}
/**
+ * Checks if is numeric.
*
- * @return
+ * @return true, if is numeric
*/
public boolean isNumeric() {
return isNumeric;
}
/**
+ * Num values.
*
- * @return
+ * @return the int
*/
public int numValues() {
if (isNumeric()) {
@@ -145,28 +191,43 @@
}
/**
+ * Index.
*
- * @return
+ * @return the int
*/
- public int index() { // RuleClassifier
- return this.index;
- }
+ // public int index() { //RuleClassifier
+ // return this.index;
+ // }
+ /**
+ * Format date.
+ *
+ * @param value the value
+ * @return the string
+ */
String formatDate(double value) {
- SimpleDateFormat sdf = new SimpleDateFormat();
- return sdf.format(new Date((long) value));
+ return this.m_DateFormat.format(new Date((long) value));
}
+ /**
+ * Checks if is date.
+ *
+ * @return true, if is date
+ */
boolean isDate() {
return isDate;
}
+ /**
+ * The values string attribute.
+ */
private Map<String, Integer> valuesStringAttribute;
/**
+ * Index of value.
*
- * @param value
- * @return
+ * @param value the value
+ * @return the int
*/
public final int indexOfValue(String value) {
@@ -189,13 +250,20 @@
}
}
- @Override
- public String toString() {
+ /**
+ * Returns a description of this attribute in ARFF format. Quotes
+ * strings if they contain whitespace characters, or if they
+ * are a question mark.
+ *
+ * @return a description of this attribute as a string
+ */
+ public final String toString() {
+
StringBuffer text = new StringBuffer();
- text.append(ARFF_ATTRIBUTE).append(" ").append(Utils.quote(this.name)).append(" ");
+ text.append(ARFF_ATTRIBUTE).append(" ").append(Utils.quote(this.name())).append(" ");
- if (isNominal) {
+ if (this.isNominal){
text.append('{');
Enumeration enu = enumerateValues();
while (enu.hasMoreElements()) {
@@ -204,10 +272,12 @@
text.append(',');
}
text.append('}');
- } else if (isNumeric) {
+ } else if (this.isNumeric){
text.append(ARFF_ATTRIBUTE_NUMERIC);
- } else if (isDate) {
- text.append(ARFF_ATTRIBUTE_DATE);
+ } else if (this.isDate){
+ text.append(ARFF_ATTRIBUTE_DATE).append(" ").append(Utils.quote(m_DateFormat.toPattern()));
+ } else{
+ text.append("UNKNOW");
}
return text.toString();
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AttributesInformation.java b/samoa-instances/src/main/java/org/apache/samoa/instances/AttributesInformation.java
new file mode 100644
index 0000000..58ece8e
--- /dev/null
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/AttributesInformation.java
@@ -0,0 +1,152 @@
+package org.apache.samoa.instances;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
+public class AttributesInformation implements Serializable {
+
+ /**
+ * The attribute information.
+ */
+ protected List<Attribute> attributes;
+ protected List<Integer> indexValues;
+ /**
+ * The number of attributes.
+ */
+ protected int numberAttributes;
+
+ /**
+ * The attribute used for default for numerical values
+ */
+ protected Attribute defaultNumericAttribute;
+
+ public AttributesInformation(AttributesInformation chunk) {
+ this.attributes = chunk.attributes;
+ this.indexValues = chunk.indexValues;
+ this.numberAttributes = chunk.numberAttributes;
+ }
+
+ public AttributesInformation(List<Attribute> v, List<Integer> i, int numberAttributes) {
+ this.attributes = v;
+ this.indexValues = i;
+ this.numberAttributes = numberAttributes;
+ }
+
+ public AttributesInformation(List<Attribute> v, int numberAttributes) {
+ this.attributes = v;
+ this.indexValues = new ArrayList<Integer>(numberAttributes);
+ for (int i = 0; i < numberAttributes; i++) {
+ this.indexValues.add(i);
+ }
+ this.numberAttributes = numberAttributes;
+ }
+
+ public AttributesInformation() {
+ this.attributes = null;
+ this.indexValues = null;
+ this.numberAttributes = 0;
+ this.defaultNumericAttribute = null;
+ }
+
+ /**
+ * Attribute.
+ *
+ * @param indexAttribute the index Attribute
+ * @return the attribute
+ */
+ public Attribute attribute(int indexAttribute) {
+ if (this.attributes == null) {
+ //All attributes are numeric
+ return defaultNumericAttribute();
+ }
+ int location = locateIndex(indexAttribute);
+ if (location == -1) {
+ //if there is not attribute information, it is numeric
+ return defaultNumericAttribute();
+ }
+ return attributes.get(location);
+ }
+
+ public void add(Attribute attribute, int value) {
+ this.attributes.add(attribute);
+ this.indexValues.add(value);
+ }
+
+ /**
+ * Sets the attribute information.
+ *
+ * @param v the new attribute information
+ */
+ public void setAttributes(List<Attribute> v) {
+ this.attributes = v;
+ this.numberAttributes=v.size();
+ }
+
+ /**
+ * Locates the greatest index that is not greater than the given index.
+ *
+ * @return the internal index of the attribute index. Returns -1 if no index
+ * with this property could be found
+ */
+ public int locateIndex(int index) {
+
+ int min = 0;
+ int max = this.indexValues.size() - 1;
+
+ if (max == -1) {
+ return -1;
+ }
+
+ // Binary search
+ while ((this.indexValues.get(min) <= index) && (this.indexValues.get(max) >= index)) {
+ int current = (max + min) / 2;
+ if (this.indexValues.get(current) > index) {
+ max = current - 1;
+ } else if (this.indexValues.get(current) < index) {
+ min = current + 1;
+ } else {
+ return current;
+ }
+ }
+ if (this.indexValues.get(max) < index) {
+ return max;
+ } else {
+ return min - 1;
+ }
+ }
+
+ private Attribute defaultNumericAttribute() {
+ if (this.defaultNumericAttribute == null) {
+ this.defaultNumericAttribute = new Attribute("default");
+ }
+ return this.defaultNumericAttribute;
+ }
+
+ public void setAttributes(List<Attribute> v, List<Integer> indexValues) {
+ this.attributes = v;
+ this.numberAttributes=v.size();
+ this.indexValues=indexValues;
+ }
+
+}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroBinaryLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroBinaryLoader.java
index 5c57aa1..84b7eec 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroBinaryLoader.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroBinaryLoader.java
@@ -29,9 +29,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-/**
- * Load Data from Binary Avro Stream and parse to corresponding Dense & Parse Instances
- */
public class AvroBinaryLoader extends AvroLoader {
private static final long serialVersionUID = 1L;
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroJsonLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroJsonLoader.java
index b765405..622347a 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroJsonLoader.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroJsonLoader.java
@@ -34,9 +34,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-/**
- * Load Data from JSON Avro Stream and parse to corresponding Dense & Parse Instances
- */
public class AvroJsonLoader extends AvroLoader {
private static final long serialVersionUID = 1L;
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
index 2b36744..d3e7f27 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/AvroLoader.java
@@ -31,11 +31,6 @@
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
-/**
- * Load Data from Avro Stream and parse to corresponding Dense & Parse Instances Abstract Class: Subclass this class for
- * different types of Avro Encodings
- *
- */
public abstract class AvroLoader implements Loader {
private static final long serialVersionUID = 1L;
@@ -133,11 +128,7 @@
*/
private void setDenseValue(Instance instance, int numAttribute, double valueAttribute) {
-
- if (this.instanceInformation.classIndex() == numAttribute)
- instance.setClassValue(valueAttribute);
- else
- instance.setValue(numAttribute, valueAttribute);
+ instance.setValue(numAttribute, valueAttribute);
}
/**
@@ -267,16 +258,16 @@
private boolean isNumeric(Field field) {
if (field.schema().getType() == Schema.Type.DOUBLE
- || field.schema().getType() == Schema.Type.FLOAT
- || field.schema().getType() == Schema.Type.LONG
- || field.schema().getType() == Schema.Type.INT)
+ || field.schema().getType() == Schema.Type.FLOAT
+ || field.schema().getType() == Schema.Type.LONG
+ || field.schema().getType() == Schema.Type.INT)
return true;
if (field.schema().getType() == Schema.Type.UNION) {
for (Schema schema: field.schema().getTypes()) {
if (schema.getType() == Schema.Type.DOUBLE
- || schema.getType() == Schema.Type.FLOAT
- || schema.getType() == Schema.Type.LONG
- || schema.getType() == Schema.Type.INT)
+ || schema.getType() == Schema.Type.FLOAT
+ || schema.getType() == Schema.Type.LONG
+ || schema.getType() == Schema.Type.INT)
return true;
}
}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java
index 80feb11..d8a789b 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstance.java
@@ -1,7 +1,3 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
package org.apache.samoa.instances;
/*
@@ -24,65 +20,42 @@
* #L%
*/
-import java.text.SimpleDateFormat;
+public class DenseInstance extends InstanceImpl {
-/**
- * @author abifet
- */
-public class DenseInstance extends SingleLabelInstance {
-
- private static final long serialVersionUID = 280360594027716737L;
-
- public DenseInstance() {
- // necessary for kryo serializer
- }
-
+ /**
+ * Instantiates a new dense instance.
+ *
+ * @param weight the weight
+ * @param res the res
+ */
public DenseInstance(double weight, double[] res) {
super(weight, res);
}
- public DenseInstance(SingleLabelInstance inst) {
+ /**
+ * Instantiates a new dense instance.
+ *
+ * @param inst the inst
+ */
+ public DenseInstance(InstanceImpl inst) {
super(inst);
}
+ /**
+ * Instantiates a new dense instance.
+ *
+ * @param inst the inst
+ */
public DenseInstance(Instance inst) {
- super((SingleLabelInstance) inst);
+ super((InstanceImpl) inst);
}
+ /**
+ * Instantiates a new dense instance.
+ *
+ * @param numberAttributes the number attributes
+ */
public DenseInstance(double numberAttributes) {
super((int) numberAttributes);
- // super(1, new double[(int) numberAttributes-1]);
- // Add missing values
- // for (int i = 0; i < numberAttributes-1; i++) {
- // //this.setValue(i, Double.NaN);
- // }
-
- }
-
- @Override
- public String toString() {
- StringBuffer text = new StringBuffer();
-
- //append all attributes except the class attribute.
- for (int attIndex = 0; attIndex < this.numAttributes()-1; attIndex++) {
- if (!this.isMissing(attIndex)) {
- if (this.attribute(attIndex).isNominal()) {
- int valueIndex = (int) this.value(attIndex);
- String stringValue = this.attribute(attIndex).value(valueIndex);
- text.append(stringValue).append(",");
- } else if (this.attribute(attIndex).isNumeric()) {
- text.append(this.value(attIndex)).append(",");
- } else if (this.attribute(attIndex).isDate()) {
- SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
- text.append(dateFormatter.format(this.value(attIndex))).append(",");
- }
- } else {
- text.append("?,");
- }
- }
- //append the class value at the end of the instance.
- text.append(this.classAttribute().value((int)classValue()));
-
- return text.toString();
}
}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java b/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java
index ecb2f88..6781e91 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/DenseInstanceData.java
@@ -1,7 +1,3 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
package org.apache.samoa.instances;
/*
@@ -24,74 +20,151 @@
* #L%
*/
-/**
- *
- * @author abifet
- */
-public class DenseInstanceData implements InstanceData {
+public class DenseInstanceData implements InstanceData{
+ /**
+ * Instantiates a new dense instance data.
+ *
+ * @param array the array
+ */
public DenseInstanceData(double[] array) {
this.attributeValues = array;
}
+ /**
+ * Instantiates a new dense instance data.
+ *
+ * @param length the length
+ */
public DenseInstanceData(int length) {
this.attributeValues = new double[length];
}
+ /**
+ * Instantiates a new dense instance data.
+ */
public DenseInstanceData() {
this(0);
}
+ /** The attribute values. */
protected double[] attributeValues;
+ /**
+ * Num attributes.
+ *
+ * @return the int
+ */
@Override
public int numAttributes() {
return this.attributeValues.length;
}
+ /**
+ * Value.
+ *
+ * @param indexAttribute the index attribute
+ * @return the double
+ */
@Override
public double value(int indexAttribute) {
- return this.attributeValues[indexAttribute];
+ if (this.attributeValues.length <= indexAttribute)
+ return this.attributeValues[this.attributeValues.length-1];
+
+ return this.attributeValues[indexAttribute];
}
+ /**
+ * Checks if is missing.
+ *
+ * @param indexAttribute the index attribute
+ * @return true, if is missing
+ */
@Override
public boolean isMissing(int indexAttribute) {
return Double.isNaN(this.value(indexAttribute));
}
+ /**
+ * Num values.
+ *
+ * @return the int
+ */
@Override
public int numValues() {
return numAttributes();
}
+ /**
+ * Index.
+ *
+ * @param indexAttribute the index attribute
+ * @return the int
+ */
@Override
public int index(int indexAttribute) {
return indexAttribute;
}
+ /**
+ * Value sparse.
+ *
+ * @param indexAttribute the index attribute
+ * @return the double
+ */
@Override
public double valueSparse(int indexAttribute) {
return value(indexAttribute);
}
+ /**
+ * Checks if is missing sparse.
+ *
+ * @param indexAttribute the index attribute
+ * @return true, if is missing sparse
+ */
@Override
public boolean isMissingSparse(int indexAttribute) {
return isMissing(indexAttribute);
}
- /*
- * @Override public double value(Attribute attribute) { return
- * value(attribute.index()); }
+ /**
+ * To double array.
+ *
+ * @return the double[]
*/
-
@Override
public double[] toDoubleArray() {
- return attributeValues.clone();
+ return attributeValues;
}
+ /**
+ * Sets the value.
+ *
+ * @param attributeIndex the attribute index
+ * @param d the d
+ */
@Override
public void setValue(int attributeIndex, double d) {
this.attributeValues[attributeIndex] = d;
}
+ @Override
+ public void deleteAttributeAt(int index) {
+
+ double[] newValues = new double[attributeValues.length - 1];
+
+ System.arraycopy(attributeValues, 0, newValues, 0, index);
+ if (index < attributeValues.length - 1) {
+ System.arraycopy(attributeValues, index + 1, newValues, index,
+ attributeValues.length - (index + 1));
+ }
+ attributeValues = newValues;
+ }
+
+ @Override
+ public InstanceData copy() {
+ return new DenseInstanceData(this.attributeValues);
+ }
+
}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java
index ee99914..da4dcdd 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Instance.java
@@ -1,7 +1,3 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
package org.apache.samoa.instances;
/*
@@ -26,68 +22,310 @@
import java.io.Serializable;
-/**
- *
- * @author abifet
- */
-
public interface Instance extends Serializable {
- double weight();
+ /**
+ * Gets the weight of the instance.
+ *
+ * @return the weight
+ */
+ public double weight();
- void setWeight(double weight);
+ /**
+ * Sets the weight.
+ *
+ * @param weight the new weight
+ */
+ public void setWeight(double weight);
- // Attributes
- Attribute attribute(int instAttIndex);
+ /**
+ * Attribute.
+ *
+ * @param instAttIndex the inst att index
+ * @return the attribute
+ */
+ public Attribute attribute(int instAttIndex);
- void deleteAttributeAt(int i);
+ /**
+ * Delete attribute at.
+ *
+ * @param i the index
+ */
+ public void deleteAttributeAt(int i);
- void insertAttributeAt(int i);
+ /**
+ * Insert attribute at.
+ *
+ * @param i the index
+ */
+ public void insertAttributeAt(int i);
- int numAttributes();
+ /**
+ * Gets the number of attributes.
+ *
+ * @return the number of attributes
+ */
+ public int numAttributes();
+ /**
+ * Adds the sparse values.
+ *
+ * @param indexValues the index values
+ * @param attributeValues the attribute values
+ * @param numberAttributes the number attributes
+ */
public void addSparseValues(int[] indexValues, double[] attributeValues, int numberAttributes);
- // Values
- int numValues();
+ /**
+ * Gets the number of values, mainly for sparse instances.
+ *
+ * @return the number of values
+ */
+ public int numValues();
- String stringValue(int i);
+ /**
+ * Gets the value of a discrete attribute as a string.
+ *
+ * @param i the i
+ * @return the string
+ */
+ public String stringValue(int i);
- double value(int instAttIndex);
+ /**
+ * Gets the value of an attribute.
+ *
+ * @param instAttIndex the inst att index
+ * @return the double
+ */
+ public double value(int instAttIndex);
- double value(Attribute attribute);
+ /**
+ * Sets an attribute as missing
+ *
+ * @param instAttIndex, the attribute's index
+ */
+ public void setMissing(int instAttIndex);
- void setValue(int m_numAttributes, double d);
+ /**
+ * Sets the value of an attribute.
+ *
+ * @param instAttIndex the index
+ * @param value the value
+ */
+ public void setValue(int instAttIndex, double value);
- boolean isMissing(int instAttIndex);
+ /**
+ * Checks if an attribute is missing.
+ *
+ * @param instAttIndex the inst att index
+ * @return true, if is missing
+ */
+ public boolean isMissing(int instAttIndex);
- int index(int i);
+ /**
+ * Gets the index of the attribute given the index of the array in a sparse
+ * representation.
+ *
+ * @param arrayIndex the index of the array
+ * @return the index
+ */
+ public int index(int arrayIndex);
- double valueSparse(int i);
+ /**
+ * Gets the value of an attribute in a sparse representation of the
+ * instance.
+ *
+ * @param i the i
+ * @return the value
+ */
+ public double valueSparse(int i);
- boolean isMissingSparse(int p1);
+ /**
+ * Checks if the attribute is missing sparse.
+ *
+ * @param p1 the p1
+ * @return true, if is missing sparse
+ */
+ public boolean isMissingSparse(int p1);
- double[] toDoubleArray();
+ /**
+ * To double array.
+ *
+ * @return the double[]
+ */
+ public double[] toDoubleArray();
- // Class
- Attribute classAttribute();
+ /**
+ * Class attribute.
+ *
+ * @return the attribute
+ */
+ public Attribute classAttribute();
- int classIndex();
+ /**
+ * Class index.
+ *
+ * @return the int
+ */
+ public int classIndex();
- boolean classIsMissing();
+ /**
+ * Class is missing.
+ *
+ * @return true, if successful
+ */
+ public boolean classIsMissing();
- double classValue();
+ /**
+ * Class value.
+ *
+ * @return the double
+ */
+ public double classValue();
- int numClasses();
+ /**
+ * Num classes.
+ *
+ * @return the int
+ */
+ public int numClasses();
- void setClassValue(double d);
+ /**
+ * Sets the class value.
+ *
+ * @param d the new class value
+ */
+ public void setClassValue(double d);
- Instance copy();
+ /**
+ * Copy.
+ *
+ * @return the instance
+ */
+ public Instance copy();
- // Dataset
- void setDataset(Instances dataset);
+ /**
+ * Sets the dataset.
+ *
+ * @param dataset the new dataset
+ */
+ public void setDataset(Instances dataset);
- Instances dataset();
+ /**
+ * Dataset.
+ *
+ * @return the instances
+ */
+ public Instances dataset();
- String toString();
+ /**
+ * Gets the number of input attributes.
+ *
+ * @return the number of input attributes
+ */
+ public int numInputAttributes();
+
+ /**
+ * Gets the number of output attributes.
+ *
+ * @return the number of output attributes
+ */
+ public int numOutputAttributes();
+
+ /**
+ * Gets the number of output attributes.
+ *
+ * @return the number of output attributes
+ */
+ public int numberOutputTargets();
+
+ /**
+ * Gets the value of an output attribute.
+ *
+ * @param attributeIndex the index
+ * @return the value
+ */
+ public double classValue(int attributeIndex);
+
+ /**
+ * Sets the value of an output attribute.
+ *
+ * @param indexClass the output attribute index
+ * @param valueAttribute the value of the attribute
+ */
+ public void setClassValue(int indexClass, double valueAttribute);
+
+ /**
+ * Gets an output attribute given its index.
+ *
+ * @param attributeIndex the index
+ * @return the attribute
+ */
+ public Attribute outputAttribute(int attributeIndex);
+
+ /**
+ * Gets an input attribute given its index.
+ *
+ * @param attributeIndex the index
+ * @return the attribute
+ */
+ public Attribute inputAttribute(int attributeIndex);
+
+ /**
+ * Gets the value of an input attribute.
+ *
+ * @param attributeIndex the index
+ * @return the value
+ */
+ public double valueInputAttribute(int attributeIndex);
+
+ /**
+ * Gets the value of an output attribute.
+ *
+ * @param attributeIndex the index
+ * @return the value
+ */
+ public double valueOutputAttribute(int attributeIndex);
+
+ /**
+ * Index of an Attribute.
+ *
+ * @param attribute, the attribute to be found.
+ * @return the index of an attribute
+ */
+ public int indexOfAttribute(Attribute attribute);
+
+ /**
+ * Gets the value of an attribute, given the attribute.
+ *
+ * @param attribute the attribute
+ * @return the double
+ */
+ public double value(Attribute attribute);
+
+ /**
+ * Sets an attribute as missing
+ *
+ * @param attribute, the Attribute
+ */
+ public void setMissing(Attribute attribute);
+
+ /**
+ * Sets the value of an attribute.
+ *
+ * @param attribute, the Attribute
+ * @param value the value
+ */
+ public void setValue(Attribute attribute, double value);
+
+ /**
+ * Checks if an attribute is missing.
+ *
+ * @param attribute, the Attribute
+ * @return true, if is missing
+ */
+ public boolean isMissing(Attribute attribute);
+
+
+
}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java
index eca4145..b735ea5 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceData.java
@@ -1,7 +1,3 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
package org.apache.samoa.instances;
/*
@@ -26,30 +22,90 @@
import java.io.Serializable;
-/**
- *
- * @author abifet
- */
public interface InstanceData extends Serializable {
+ /**
+ * Num attributes.
+ *
+ * @return the int
+ */
public int numAttributes();
+ /**
+ * Value.
+ *
+ * @param instAttIndex the inst att index
+ * @return the double
+ */
public double value(int instAttIndex);
+ /**
+ * Checks if is missing.
+ *
+ * @param instAttIndex the inst att index
+ * @return true, if is missing
+ */
public boolean isMissing(int instAttIndex);
+ /**
+ * Num values.
+ *
+ * @return the int
+ */
public int numValues();
+ /**
+ * Index.
+ *
+ * @param i the i
+ * @return the int
+ */
public int index(int i);
+ /**
+ * Value sparse.
+ *
+ * @param i the i
+ * @return the double
+ */
public double valueSparse(int i);
+ /**
+ * Checks if is missing sparse.
+ *
+ * @param p1 the p1
+ * @return true, if is missing sparse
+ */
public boolean isMissingSparse(int p1);
- // public double value(Attribute attribute);
-
+ /**
+ * To double array.
+ *
+ * @return the double[]
+ */
public double[] toDoubleArray();
+ /**
+ * Sets the value.
+ *
+ * @param m_numAttributes the m_num attributes
+ * @param d the d
+ */
public void setValue(int m_numAttributes, double d);
+
+ /**
+ * Deletes an attribute.
+ *
+ * @param index the indes
+ */
+ public void deleteAttributeAt(int index);
+
+ /**
+ * Produces a shallow copy of this instance data.
+ *
+ * @return the shallow copy
+ */
+ public InstanceData copy();
+
}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceImpl.java b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceImpl.java
new file mode 100644
index 0000000..ff77dc2
--- /dev/null
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceImpl.java
@@ -0,0 +1,483 @@
+package org.apache.samoa.instances;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.text.SimpleDateFormat;
+
+public class InstanceImpl implements MultiLabelInstance {
+
+ /**
+ * The weight.
+ */
+ protected double weight;
+
+ /**
+ * The instance data.
+ */
+ protected InstanceData instanceData;
+
+ /**
+ * The instance information.
+ */
+ protected InstancesHeader instanceHeader;
+
+ /**
+ * Instantiates a new instance.
+ *
+ * @param inst the inst
+ */
+ public InstanceImpl(InstanceImpl inst) {
+ this.weight = inst.weight;
+ this.instanceData = inst.instanceData;
+ this.instanceHeader = inst.instanceHeader;
+ }
+
+ //Dense
+ /**
+ * Instantiates a new instance.
+ *
+ * @param weight the weight
+ * @param res the res
+ */
+ public InstanceImpl(double weight, double[] res) {
+ this.weight = weight;
+ this.instanceData = new DenseInstanceData(res);
+ }
+
+ //Sparse
+ /**
+ * Instantiates a new instance.
+ *
+ * @param weight the weight
+ * @param attributeValues the attribute values
+ * @param indexValues the index values
+ * @param numberAttributes the number attributes
+ */
+ public InstanceImpl(double weight, double[] attributeValues, int[] indexValues, int numberAttributes) {
+ this.weight = weight;
+ this.instanceData = new SparseInstanceData(attributeValues, indexValues, numberAttributes);
+ }
+
+ /**
+ * Instantiates a new instance.
+ *
+ * @param weight the weight
+ * @param instanceData the instance data
+ */
+ public InstanceImpl(double weight, InstanceData instanceData) {
+ this.weight = weight;
+ this.instanceData = instanceData;
+ }
+
+ /**
+ * Instantiates a new instance.
+ *
+ * @param numAttributes the num attributes
+ */
+ public InstanceImpl(int numAttributes) {
+ this.instanceData = new DenseInstanceData(new double[numAttributes]); //JD
+ this.weight = 1;
+ }
+
+ /**
+ * Weight.
+ *
+ * @return the double
+ */
+ @Override
+ public double weight() {
+ return weight;
+ }
+
+ /**
+ * Sets the weight.
+ *
+ * @param weight the new weight
+ */
+ @Override
+ public void setWeight(double weight) {
+ this.weight = weight;
+ }
+
+ /**
+ * Attribute.
+ *
+ * @param instAttIndex the inst att index
+ * @return the attribute
+ */
+ @Override
+ public Attribute attribute(int instAttIndex) {
+ return this.instanceHeader.attribute(instAttIndex);
+ }
+
+ public int indexOfAttribute(Attribute attribute){
+ return this.instanceHeader.indexOf(attribute);
+ }
+
+ /**
+ * Delete attribute at.
+ *
+ * @param i the i
+ */
+ @Override
+ public void deleteAttributeAt(int i) {
+ this.instanceData.deleteAttributeAt(i);
+ }
+
+ /**
+ * Insert attribute at.
+ *
+ * @param i the i
+ */
+ @Override
+ public void insertAttributeAt(int i) {
+ throw new UnsupportedOperationException("Not yet implemented");
+ }
+
+ /**
+ * Num attributes.
+ *
+ * @return the int
+ */
+ @Override
+ public int numAttributes() {
+ return this.instanceData.numAttributes();
+ }
+
+ /**
+ * Value.
+ *
+ * @param instAttIndex the inst att index
+ * @return the double
+ */
+ @Override
+ public double value(int instAttIndex) {
+ return this.instanceData.value(instAttIndex);
+ }
+
+ /**
+ * Checks if is missing.
+ *
+ * @param instAttIndex the inst att index
+ * @return true, if is missing
+ */
+ @Override
+ public boolean isMissing(int instAttIndex) {
+ return this.instanceData.isMissing(instAttIndex);
+ }
+
+ /**
+ * Num values.
+ *
+ * @return the int
+ */
+ @Override
+ public int numValues() {
+ return this.instanceData.numValues();
+ }
+
+ /**
+ * Index.
+ *
+ * @param i the i
+ * @return the int
+ */
+ @Override
+ public int index(int i) {
+ return this.instanceData.index(i);
+ }
+
+ /**
+ * Value sparse.
+ *
+ * @param i the i
+ * @return the double
+ */
+ @Override
+ public double valueSparse(int i) {
+ return this.instanceData.valueSparse(i);
+ }
+
+ /**
+ * Checks if is missing sparse.
+ *
+ * @param p the p
+ * @return true, if is missing sparse
+ */
+ @Override
+ public boolean isMissingSparse(int p) {
+ return this.instanceData.isMissingSparse(p);
+ }
+
+ /**
+ * String value.
+ *
+ * @param i the i
+ * @return the string
+ */
+ @Override
+ public String stringValue(int i) {
+ throw new UnsupportedOperationException("Not yet implemented");
+ }
+
+ /**
+ * To double array.
+ *
+ * @return the double[]
+ */
+ @Override
+ public double[] toDoubleArray() {
+ return this.instanceData.toDoubleArray();
+ }
+
+ /**
+ * Sets the value.
+ *
+ * @param numAttribute the num attribute
+ * @param d the d
+ */
+ @Override
+ public void setValue(int numAttribute, double d) {
+ this.instanceData.setValue(numAttribute, d);
+ }
+
+ /**
+ * Class value.
+ *
+ * @return the double
+ */
+ @Override
+ public double classValue() {
+ return this.instanceData.value(classIndex());
+ }
+
+ /**
+ * Class index.
+ *
+ * @return the int
+ */
+ @Override
+ public int classIndex() {
+ int classIndex = instanceHeader.classIndex();
+ //return classIndex != Integer.MAX_VALUE ? classIndex : 0;
+ // return ? classIndex : 0;
+ if(classIndex == Integer.MAX_VALUE)
+ if(this.instanceHeader.instanceInformation.range!=null)
+ classIndex=instanceHeader.instanceInformation.range.getStart();
+ else
+ classIndex=0;
+ return classIndex;
+ }
+
+ /**
+ * Num classes.
+ *
+ * @return the int
+ */
+ @Override
+ public int numClasses() {
+ return this.instanceHeader.numClasses();
+ }
+
+ /**
+ * Class is missing.
+ *
+ * @return true, if successful
+ */
+ @Override
+ public boolean classIsMissing() {
+ return this.instanceData.isMissing(classIndex());
+ }
+
+ /**
+ * Class attribute.
+ *
+ * @return the attribute
+ */
+ @Override
+ public Attribute classAttribute() {
+ return this.instanceHeader.attribute(classIndex());
+ }
+
+ /**
+ * Sets the class value.
+ *
+ * @param d the new class value
+ */
+ @Override
+ public void setClassValue(double d) {
+ this.setValue(classIndex(), d);
+ }
+
+ /**
+ * Copy.
+ *
+ * @return the instance
+ */
+ @Override
+ public Instance copy() {
+ InstanceImpl inst = new InstanceImpl(this);
+ return inst;
+ }
+
+ /**
+ * Dataset.
+ *
+ * @return the instances
+ */
+ @Override
+ public Instances dataset() {
+ return this.instanceHeader;
+ }
+
+ /**
+ * Sets the dataset.
+ *
+ * @param dataset the new dataset
+ */
+ @Override
+ public void setDataset(Instances dataset) {
+ this.instanceHeader = new InstancesHeader(dataset);
+ }
+
+ /**
+ * Adds the sparse values.
+ *
+ * @param indexValues the index values
+ * @param attributeValues the attribute values
+ * @param numberAttributes the number attributes
+ */
+ @Override
+ public void addSparseValues(int[] indexValues, double[] attributeValues, int numberAttributes) {
+ this.instanceData = new SparseInstanceData(attributeValues, indexValues, numberAttributes); //???
+ }
+
+ /**
+ * Text representation of a InstanceImpl.
+ */
+ @Override
+ public String toString() {
+ StringBuilder str = new StringBuilder();
+ for (int attIndex = 0; attIndex < this.numAttributes(); attIndex++) {
+ if (!this.isMissing(attIndex)) {
+ if (this.attribute(attIndex).isNominal()) {
+ int valueIndex = (int) this.value(attIndex);
+ String stringValue = this.attribute(attIndex).value(valueIndex);
+ str.append(stringValue).append(",");
+ } else if (this.attribute(attIndex).isNumeric()) {
+ str.append(this.value(attIndex)).append(",");
+ } else if (this.attribute(attIndex).isDate()) {
+ SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
+ str.append(dateFormatter.format(this.value(attIndex))).append(",");
+ }
+ } else {
+ str.append("?,");
+ }
+ }
+ return str.toString();
+ }
+
+ @Override
+ public int numInputAttributes() {
+ return this.instanceHeader.numInputAttributes();
+ }
+
+ @Override
+ public int numOutputAttributes() {
+ return numberOutputTargets();
+ }
+
+ @Override
+ public int numberOutputTargets() {
+ return this.instanceHeader.numOutputAttributes();
+ }
+
+ @Override
+ public double classValue(int instAttIndex) {
+ return valueOutputAttribute(instAttIndex);
+ }
+
+ @Override
+ public void setClassValue(int indexClass, double valueAttribute) {
+ InstanceInformation instanceInformation = this.instanceHeader.getInstanceInformation();
+ this.instanceData.setValue(instanceInformation.outputAttributeIndex(indexClass), valueAttribute);
+
+ }
+
+ @Override
+ public Attribute outputAttribute(int outputIndex) {
+ InstanceInformation instanceInformation = this.instanceHeader.getInstanceInformation();
+ return instanceInformation.outputAttribute(outputIndex);
+ }
+
+ @Override
+ public Attribute inputAttribute(int attributeIndex) {
+ InstanceInformation instanceInformation = this.instanceHeader.getInstanceInformation();
+ return instanceInformation.inputAttribute(attributeIndex);
+ }
+
+ @Override
+ public double valueInputAttribute(int attributeIndex) {
+ InstanceInformation instanceInformation = this.instanceHeader.getInstanceInformation();
+ return this.instanceData.value(instanceInformation.inputAttributeIndex(attributeIndex));
+ }
+
+ @Override
+ public double valueOutputAttribute(int attributeIndex) {
+ InstanceInformation instanceInformation = this.instanceHeader.getInstanceInformation();
+ return this.instanceData.value(instanceInformation.outputAttributeIndex(attributeIndex));
+ }
+
+ @Override
+ public void setMissing(int instAttIndex) {
+ this.setValue(instAttIndex, Double.NaN);
+ }
+
+ /**
+ * Value.
+ *
+ * @param attribute the attribute
+ * @return the double
+ */
+ @Override
+ public double value(Attribute attribute) {
+ int index = this.instanceHeader.indexOf(attribute);
+ return value(index);
+ }
+
+ @Override
+ public void setMissing(Attribute attribute) {
+ int index = this.instanceHeader.indexOf(attribute);
+ this.setMissing(index);
+ }
+
+ @Override
+ public boolean isMissing(Attribute attribute) {
+ int index = this.instanceHeader.indexOf(attribute);
+ return this.isMissing(index);
+ }
+
+ @Override
+ public void setValue(Attribute attribute, double value) {
+ int index = this.instanceHeader.indexOf(attribute);
+ this.setValue(index, value);
+ }
+}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java
index 639f065..cfd7f51 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/InstanceInformation.java
@@ -1,7 +1,3 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
package org.apache.samoa.instances;
/*
@@ -27,82 +23,194 @@
import java.io.Serializable;
import java.util.List;
-/**
- *
- * @author abifet
- */
public class InstanceInformation implements Serializable {
- // Should we split Instances as a List of Instances, and InformationInstances
-
- /** The dataset's name. */
+ /**
+ * The dataset's name.
+ */
protected String relationName;
- /** The attribute information. */
- protected List<Attribute> attributes;
+ protected AttributesInformation attributesInformation;
- protected int classIndex;
+ /**
+ * The class index.
+ */
+ protected int classIndex = Integer.MAX_VALUE; //By default is multilabel
+ /**
+ * Range for multi-label instances.
+ */
+ protected Range range;
+
+ public Attribute inputAttribute(int w) {
+ return this.attributesInformation.attribute(inputAttributeIndex(w));
+ }
+
+ public Attribute outputAttribute(int w) {
+ return this.attributesInformation.attribute(outputAttributeIndex(w));
+ }
+
+ /**
+ * Instantiates a new instance information.
+ *
+ * @param chunk the chunk
+ */
public InstanceInformation(InstanceInformation chunk) {
this.relationName = chunk.relationName;
- this.attributes = chunk.attributes;
+ this.attributesInformation = chunk.attributesInformation;
this.classIndex = chunk.classIndex;
}
- public InstanceInformation(String st, List<Attribute> v) {
+ /**
+ * Instantiates a new instance information.
+ *
+ * @param st the st
+ * @param v the v
+ */
+ public InstanceInformation(String st, List<Attribute> input) {
this.relationName = st;
- this.attributes = v;
+ this.attributesInformation = new AttributesInformation(input, input.size());
}
+ /**
+ * Instantiates a new instance information.
+ */
public InstanceInformation() {
this.relationName = null;
- this.attributes = null;
+ this.attributesInformation = null;
}
- // Information Instances
-
+ //Information Instances
+ /* (non-Javadoc)
+ * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#setRelationName(java.lang.String)
+ */
public void setRelationName(String string) {
this.relationName = string;
}
+ /* (non-Javadoc)
+ * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#getRelationName()
+ */
public String getRelationName() {
return this.relationName;
}
+ /* (non-Javadoc)
+ * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#classIndex()
+ */
public int classIndex() {
- return classIndex;
+ return this.classIndex;
}
+ /* (non-Javadoc)
+ * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#setClassIndex(int)
+ */
public void setClassIndex(int classIndex) {
this.classIndex = classIndex;
}
+ /* (non-Javadoc)
+ * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#classAttribute()
+ */
public Attribute classAttribute() {
return this.attribute(this.classIndex());
}
+ /* (non-Javadoc)
+ * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#numAttributes()
+ */
public int numAttributes() {
- return this.attributes.size();
+ return this.attributesInformation.numberAttributes;
}
+ /* (non-Javadoc)
+ * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#attribute(int)
+ */
public Attribute attribute(int w) {
- return this.attributes.get(w);
+ return this.attributesInformation.attribute(w);
}
+ /* (non-Javadoc)
+ * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#numClasses()
+ */
public int numClasses() {
- return this.attributes.get(this.classIndex()).numValues();
+ return this.attributesInformation.attribute(classIndex()).numValues();
}
+ /* (non-Javadoc)
+ * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#deleteAttributeAt(java.lang.Integer)
+ */
public void deleteAttributeAt(Integer integer) {
throw new UnsupportedOperationException("Not yet implemented");
}
+ /* (non-Javadoc)
+ * @see com.yahoo.labs.samoa.instances.InstanceInformationInterface#insertAttributeAt(com.yahoo.labs.samoa.instances.Attribute, int)
+ */
public void insertAttributeAt(Attribute attribute, int i) {
throw new UnsupportedOperationException("Not yet implemented");
}
public void setAttributes(List<Attribute> v) {
- this.attributes = v;
+ if(this.attributesInformation==null)
+ this.attributesInformation= new AttributesInformation();
+ this.attributesInformation.setAttributes(v);
+ }
+
+ public int inputAttributeIndex(int index) {
+ int ret = 0;
+ if (classIndex == Integer.MAX_VALUE) {//Multi Label
+ if(index<range.getStart())//JD
+ ret= index;
+ else
+ ret= index+range.getSelectionLength();
+
+ } else { //Single Label
+ ret = classIndex() > index ? index : index + 1;
+ }
+ return ret;
+ }
+
+ public int outputAttributeIndex(int attributeIndex) {
+ int ret = 0;
+ if (classIndex == Integer.MAX_VALUE) {//Multi Label
+ ret=attributeIndex+range.getStart(); //JD - Range should be a "block"
+ } else { //Single Label
+ ret = classIndex;
+ }
+ return ret;
+ }
+
+ public int numInputAttributes() {
+ int ret = 0;
+ if (classIndex == Integer.MAX_VALUE) {//Multi Label
+ ret=this.numAttributes()-range.getSelectionLength(); //JD
+ } else { //Single Label
+ ret = this.numAttributes() - 1;
+ }
+ return ret;
+ }
+
+ public int numOutputAttributes() {
+ int ret = 0;
+ if (classIndex == Integer.MAX_VALUE) {//Multi Label
+ ret=range.getSelectionLength(); //JD
+ } else { //Single Label
+ ret = 1;
+ }
+ return ret;
+ }
+
+ public void setRangeOutputIndices(Range range) {
+ this.setClassIndex(Integer.MAX_VALUE);
+ this.range = range;
+ }
+
+ public void setAttributes(List<Attribute> v, List<Integer> indexValues) {
+ if(this.attributesInformation==null)
+ this.attributesInformation= new AttributesInformation();
+ this.attributesInformation.setAttributes(v,indexValues);
+
}
}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Instances.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Instances.java
index f7fb0d3..04fde39 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/Instances.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Instances.java
@@ -1,9 +1,7 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
package org.apache.samoa.instances;
+import java.io.InputStream;
+
/*
* #%L
* SAMOA
@@ -28,56 +26,84 @@
import java.io.Serializable;
import java.io.StringReader;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
import java.util.Random;
-import java.io.InputStream;
-/**
- *
- * @author abifet
- */
public class Instances implements Serializable {
- public static final String ARFF_RELATION = "@relation";
- public static final String ARFF_DATA = "@data";
+ /**
+ * The keyword used to denote the start of an arff header
+ */
+ public final static String ARFF_RELATION = "@relation";
+ /**
+ * The keyword used to denote the start of the arff data section
+ */
+ public final static String ARFF_DATA = "@data";
+
+ private static final long serialVersionUID = 8110510475535581577L;
+ /**
+ * The instance information.
+ */
protected InstanceInformation instanceInformation;
/**
* The instances.
*/
protected List<Instance> instances;
- transient protected Loader loader;
-
protected static enum AVRO_ENCODING_FORMAT {
JSON, BINARY
}
protected int classAttribute;
- public Instances(InstancesHeader modelContext) {
- throw new UnsupportedOperationException("Not yet implemented");
- }
+ /**
+ * A Hash that stores the indices of features.
+ */
+ protected HashMap<String, Integer> hsAttributesIndices;
+ transient protected Loader loader;
+
+ /**
+ * Instantiates a new instances.
+ *
+ * @param chunk the chunk
+ */
public Instances(Instances chunk) {
- this.instanceInformation = chunk.instanceInformation();
- // this.relationName = chunk.relationName;
- // this.attributes = chunk.attributes;
- this.instances = chunk.instances;
+ this(chunk, chunk.numInstances());
+ chunk.copyInstances(0, this, chunk.numInstances());
}
+ /**
+ * Instantiates a new instances.
+ */
public Instances() {
- // this.instanceInformation = chunk.instanceInformation();
- // this.relationName = chunk.relationName;
- // this.attributes = chunk.attributes;
- // this.instances = chunk.instances;
}
+ /**
+ * Instantiates a new instances.
+ *
+ * @param reader the reader
+ * @param size the size
+ * @param classAttribute the class attribute
+ */
public Instances(Reader reader, int size, int classAttribute) {
- this.classAttribute = classAttribute;
- loader = new ArffLoader(reader, 0, classAttribute);
+ this.loader = new ArffLoader(reader, 0, classAttribute);
this.instanceInformation = loader.getStructure();
- this.instances = new ArrayList<>();
+ this.instances = new ArrayList<Instance>();
+ }
+
+ /**
+ * Instantiates a new instances.
+ *
+ * @param reader the reader
+ * @param range
+ */
+ public Instances(Reader reader, Range range) {
+ this.loader = new ArffLoader(reader, 0, classAttribute);//new MultiTargetArffLoader(reader, range);
+ this.instanceInformation = loader.getStructure();
+ this.instances = new ArrayList<Instance>();
}
public Instances(InputStream inputStream, int classAttribute, String encodingFormat) {
@@ -92,110 +118,337 @@
this.instances = new ArrayList<>();
}
+ /**
+ * Instantiates a new instances.
+ *
+ * @param chunk the chunk
+ * @param capacity the capacity
+ */
public Instances(Instances chunk, int capacity) {
- this(chunk);
+ this.instanceInformation = chunk.instanceInformation();
+ if (capacity < 0) {
+ capacity = 0;
+ }
+ this.instances = new ArrayList<Instance>(capacity);
}
+ /**
+ * Instantiates a new instances.
+ *
+ * @param st the st
+ * @param v the v
+ * @param capacity the capacity
+ */
public Instances(String st, List<Attribute> v, int capacity) {
-
this.instanceInformation = new InstanceInformation(st, v);
- this.instances = new ArrayList<>();
+ this.instances = new ArrayList<Instance>(capacity);
}
- public Instances(Instances chunk, int i, int j) {
- throw new UnsupportedOperationException("Not yet implemented");
+ /**
+ * Instantiates a new instances.
+ *
+ * @param chunk the chunk
+ * @param first the first instance
+ * @param toCopy the j
+ */
+ public Instances(Instances chunk, int first, int toCopy) {
+
+ this(chunk, toCopy);
+
+ if ((first < 0) || ((first + toCopy) > chunk.numInstances())) {
+ throw new IllegalArgumentException("Parameters first and/or toCopy out "
+ + "of range");
+ }
+ chunk.copyInstances(first, this, toCopy);
}
- public Instances(StringReader st, int v) {
- throw new UnsupportedOperationException("Not yet implemented");
+ /**
+ * Instantiates a new instances.
+ *
+ * @param st the st
+ * @param capacity the capacity
+ */
+ public Instances(StringReader st, int capacity) {
+ this.instances = new ArrayList<Instance>(capacity);
}
- // Information Instances
+ //Information Instances
+ /**
+ * Sets the relation name.
+ *
+ * @param string the new relation name
+ */
public void setRelationName(String string) {
this.instanceInformation.setRelationName(string);
}
+ /**
+ * Gets the relation name.
+ *
+ * @return the relation name
+ */
public String getRelationName() {
return this.instanceInformation.getRelationName();
}
+ /**
+ * Class index.
+ *
+ * @return the int
+ */
public int classIndex() {
return this.instanceInformation.classIndex();
}
+ /**
+ * Sets the class index.
+ *
+ * @param classIndex the new class index
+ */
public void setClassIndex(int classIndex) {
this.instanceInformation.setClassIndex(classIndex);
}
+ /**
+ * Class attribute.
+ *
+ * @return the attribute
+ */
public Attribute classAttribute() {
return this.instanceInformation.classAttribute();
}
+ /**
+ * Num attributes.
+ *
+ * @return the int
+ */
public int numAttributes() {
return this.instanceInformation.numAttributes();
}
+ /**
+ * Attribute.
+ *
+ * @param w the w
+ * @return the attribute
+ */
public Attribute attribute(int w) {
return this.instanceInformation.attribute(w);
}
+ /**
+ * Num classes.
+ *
+ * @return the int
+ */
public int numClasses() {
return this.instanceInformation.numClasses();
}
+ /**
+ * Delete attribute at.
+ *
+ * @param integer the integer
+ */
public void deleteAttributeAt(Integer integer) {
this.instanceInformation.deleteAttributeAt(integer);
}
+ /**
+ * Insert attribute at.
+ *
+ * @param attribute the attribute
+ * @param i the i
+ */
public void insertAttributeAt(Attribute attribute, int i) {
+ if (this.instanceInformation == null) {
+ this.instanceInformation = new InstanceInformation();
+ }
this.instanceInformation.insertAttributeAt(attribute, i);
}
- // List of Instances
+ //List of Instances
+ /**
+ * Instance.
+ *
+ * @param num the num
+ * @return the instance
+ */
public Instance instance(int num) {
return this.instances.get(num);
}
+ /**
+ * Num instances.
+ *
+ * @return the int
+ */
public int numInstances() {
return this.instances.size();
}
+ /**
+ * Adds the.
+ *
+ * @param inst the inst
+ */
public void add(Instance inst) {
- this.instances.add(inst.copy());
+ this.instances.add(inst);
}
+ /**
+ * Randomize.
+ *
+ * @param random the random
+ */
public void randomize(Random random) {
for (int j = numInstances() - 1; j > 0; j--) {
swap(j, random.nextInt(j + 1));
}
}
+ /**
+ * Stratify.
+ *
+ * @param numFolds the num folds
+ */
public void stratify(int numFolds) {
- throw new UnsupportedOperationException("Not yet implemented");
+
+ if (classAttribute().isNominal()) {
+
+ // sort by class
+ int index = 1;
+ while (index < numInstances()) {
+ Instance instance1 = instance(index - 1);
+ for (int j = index; j < numInstances(); j++) {
+ Instance instance2 = instance(j);
+ if ((instance1.classValue() == instance2.classValue())
+ || (instance1.classIsMissing()
+ && instance2.classIsMissing())) {
+ swap(index, j);
+ index++;
+ }
+ }
+ index++;
+ }
+ stratStep(numFolds);
+ }
}
- public Instances trainCV(int numFolds, int n, Random random) {
- throw new UnsupportedOperationException("Not yet implemented");
+ protected void stratStep(int numFolds) {
+ ArrayList<Instance> newVec = new ArrayList<Instance>(this.instances.size());
+ int start = 0, j;
+
+ // create stratified batch
+ while (newVec.size() < numInstances()) {
+ j = start;
+ while (j < numInstances()) {
+ newVec.add(instance(j));
+ j = j + numFolds;
+ }
+ start++;
+ }
+ this.instances = newVec;
}
- public Instances testCV(int numFolds, int n) {
- throw new UnsupportedOperationException("Not yet implemented");
+ /**
+ * Train cv.
+ *
+ * @param numFolds the num folds
+ * @param numFold
+ * @param random the random
+ * @return the instances
+ */
+ public Instances trainCV(int numFolds, int numFold, Random random) {
+ Instances train = trainCV(numFolds, numFold);
+ train.randomize(random);
+ return train;
}
- /*
- * public Instances dataset() { throw new
- * UnsupportedOperationException("Not yet implemented"); }
+ public Instances trainCV(int numFolds, int numFold) {
+ int numInstForFold, first, offset;
+ Instances train;
+
+ numInstForFold = numInstances() / numFolds;
+ if (numFold < numInstances() % numFolds) {
+ numInstForFold++;
+ offset = numFold;
+ } else {
+ offset = numInstances() % numFolds;
+ }
+ train = new Instances(this, numInstances() - numInstForFold);
+ first = numFold * (numInstances() / numFolds) + offset;
+ copyInstances(0, train, first);
+ copyInstances(first + numInstForFold, train,
+ numInstances() - first - numInstForFold);
+ return train;
+ }
+
+ protected void copyInstances(int from, Instances dest, int num) {
+ for (int i = 0; i < num; i++) {
+ dest.add(instance(from + i));
+ }
+ }
+
+ /**
+ * Test cv.
+ *
+ * @param numFolds the num folds
+ * @param numFold the num fold
+ * @return the instances
+ */
+ public Instances testCV(int numFolds, int numFold) {
+
+ int numInstForFold, first, offset;
+ Instances test;
+
+ numInstForFold = numInstances() / numFolds;
+ if (numFold < numInstances() % numFolds) {
+ numInstForFold++;
+ offset = numFold;
+ } else {
+ offset = numInstances() % numFolds;
+ }
+ test = new Instances(this, numInstForFold);
+ first = numFold * (numInstances() / numFolds) + offset;
+ copyInstances(first, test, numInstForFold);
+ return test;
+ }
+
+ /* public Instances dataset() {
+ throw new UnsupportedOperationException("Not yet implemented");
+ }*/
+ /**
+ * Mean or mode.
+ *
+ * @param j the j
+ * @return the double
*/
public double meanOrMode(int j) {
- throw new UnsupportedOperationException("Not yet implemented"); // CobWeb
+ throw new UnsupportedOperationException("Not yet implemented"); //CobWeb
}
+ /**
+ * Read instance.
+ *
+ * @param fileReader the file reader
+ * @return true, if successful
+ */
public boolean readInstance(Reader fileReader) {
+ //ArffReader arff = new ArffReader(reader, this, m_Lines, 1);
if (loader == null) {
loader = new ArffLoader(fileReader, 0, this.classAttribute);
}
- return readInstance();
+ Instance inst = loader.readInstance();
+ if (inst != null) {
+ inst.setDataset(this);
+ add(inst);
+ return true;
+ } else {
+ return false;
+ }
}
public boolean readInstance() {
@@ -210,16 +463,37 @@
}
}
+ /**
+ * Delete.
+ */
public void delete() {
- this.instances = new ArrayList<>();
+ this.instances = new ArrayList<Instance>();
}
+ /**
+ * Delete.
+ */
+ public void delete(int index) {
+ this.instances.remove(index);
+ }
+
+ /**
+ * Swap.
+ *
+ * @param i the i
+ * @param j the j
+ */
public void swap(int i, int j) {
Instance in = instances.get(i);
instances.set(i, instances.get(j));
instances.set(j, in);
}
+ /**
+ * Instance information.
+ *
+ * @return the instance information
+ */
private InstanceInformation instanceInformation() {
return this.instanceInformation;
}
@@ -234,32 +508,122 @@
return null;
}
- @Override
- public String toString() {
- StringBuilder text = new StringBuilder();
-
- for (int i = 0; i < numInstances(); i++) {
- text.append(instance(i).toString());
- if (i < numInstances() - 1) {
- text.append('\n');
- }
- }
- return text.toString();
+ public int size() {
+ return this.numInstances();
}
- // toString() with header
- public String toStringArff() {
- StringBuilder text = new StringBuilder();
+ public void set(int i, Instance inst) {
+ this.instances.set(i, inst);
+ }
- text.append(ARFF_RELATION).append(" ")
- .append(Utils.quote(getRelationName())).append("\n\n");
+ public Instance get(int k) {
+ return this.instance(k);
+ }
+
+ public void setRangeOutputIndices(Range range) {
+ this.instanceInformation.setRangeOutputIndices(range);
+
+ }
+
+ public void setAttributes(List<Attribute> v) {
+ if (this.instanceInformation == null) {
+ this.instanceInformation = new InstanceInformation();
+ }
+ this.instanceInformation.setAttributes(v);
+ }
+
+ public void setAttributes(List<Attribute> v, List<Integer> indexValues) {
+ if (this.instanceInformation == null) {
+ this.instanceInformation = new InstanceInformation();
+ }
+ this.instanceInformation.setAttributes(v, indexValues);
+ }
+
+ /**
+ * Returns the dataset as a string in ARFF format. Strings are quoted if
+ * they contain whitespace characters, or if they are a question mark.
+ *
+ * @return the dataset in ARFF format as a string
+ */
+ public String toString() {
+
+ StringBuffer text = new StringBuffer();
+
+ text.append(ARFF_RELATION).append(" ").
+ append(Utils.quote(this.instanceInformation.getRelationName())).append("\n\n");
for (int i = 0; i < numAttributes(); i++) {
text.append(attribute(i).toString()).append("\n");
}
text.append("\n").append(ARFF_DATA).append("\n");
- text.append(toString());
+ text.append(stringWithoutHeader());
+ return text.toString();
+ }
+
+ /**
+ * Returns the dataset as a string in ARFF format. Strings are quoted if
+ * they contain whitespace characters, or if they are a question mark.
+ *
+ * @return the dataset in ARFF format as a string
+ */
+ public String toStringArff() {
+
+ StringBuffer text = new StringBuffer();
+
+ text.append(ARFF_RELATION).append(" ").
+ append(Utils.quote(this.instanceInformation.getRelationName())).append("\n\n");
+ for (int i = 0; i < numAttributes(); i++) {
+ text.append(attribute(i).toString()).append("\n");
+ }
+ text.append("\n").append(ARFF_DATA).append("\n");
+
+ text.append(stringWithoutHeader());
+ return text.toString();
+ }
+
+ /**
+ * Returns the instances in the dataset as a string in ARFF format. Strings
+ * are quoted if they contain whitespace characters, or if they are a
+ * question mark.
+ *
+ * @return the dataset in ARFF format as a string
+ */
+ protected String stringWithoutHeader() {
+
+ StringBuffer text = new StringBuffer();
+
+ for (int i = 0; i < numInstances(); i++) {
+ text.append(instance(i));
+ if (i < numInstances() - 1) {
+ text.append('\n');
+ }
+ }
return text.toString();
}
+
+ /**
+ * Returns the index of an Attribute.
+ *
+ * @param att, the attribute.
+ */
+ protected int indexOf(Attribute att) {
+ if (this.hsAttributesIndices == null || !this.hsAttributesIndices.containsKey(att.name)) {
+ computeAttributesIndices();
+ }
+
+ return this.hsAttributesIndices.get(att.name());
+ }
+
+ /**
+ * Completes the hashset with attributes indices.
+ */
+ private void computeAttributesIndices() {
+ this.hsAttributesIndices = new HashMap<String, Integer>();
+ // iterates through all existing attributes
+ // and sets an unique identifier for each one of them
+ for (int i = 0; i < this.numAttributes(); i++) {
+ hsAttributesIndices.put(this.attribute(i).name(), i);
+ }
+ }
}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/InstancesHeader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/InstancesHeader.java
index a5d5a74..c32cf88 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/InstancesHeader.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/InstancesHeader.java
@@ -20,12 +20,6 @@
* #L%
*/
-/**
- * Class for storing the header or context of a data stream. It allows to know the number of attributes and classes.
- *
- * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
- * @version $Revision: 7 $
- */
public class InstancesHeader extends Instances {
private static final long serialVersionUID = 1L;
@@ -38,14 +32,6 @@
super();
}
- /*
- * @Override public boolean add(Instance i) { throw new
- * UnsupportedOperationException(); }
- *
- * @Override public boolean readInstance(Reader r) throws IOException { throw
- * new UnsupportedOperationException(); }
- */
-
public static String getClassNameString(InstancesHeader context) {
if (context == null) {
return "[class]";
@@ -59,7 +45,7 @@
return "<class " + (classLabelIndex + 1) + ">";
}
return "<class " + (classLabelIndex + 1) + ":"
- + context.classAttribute().value(classLabelIndex) + ">";
+ + context.classAttribute().value(classLabelIndex) + ">";
}
// is impervious to class index changes - attIndex is true attribute index
@@ -72,7 +58,17 @@
int instAttIndex = attIndex < context.classIndex() ? attIndex
: attIndex + 1;
return "[att " + (attIndex + 1) + ":"
- + context.attribute(instAttIndex).name() + "]";
+ + context.attribute(instAttIndex).name() + "]";
+ }
+
+ public static String getInputAttributeNameString(InstancesHeader context,
+ int attIndex) {
+ if ((context == null) || (attIndex >= context.numInputAttributes())) {
+ return "[att " + (attIndex + 1) + "]";
+ }
+ int instAttIndex = attIndex;
+ return "[att " + (attIndex + 1) + ":"
+ + context.inputAttribute(instAttIndex).name() + "]";
}
// is impervious to class index changes - attIndex is true attribute index
@@ -107,17 +103,23 @@
return Double.toString(value);
}
- // add autom.
- /*
- * public int classIndex() { throw new
- * UnsupportedOperationException("Not yet implemented"); }
- *
- * public int numAttributes() { throw new
- * UnsupportedOperationException("Not yet implemented"); }
- *
- * @Override public Attribute attribute(int nPos) { throw new
- * UnsupportedOperationException("Not yet implemented"); }
- *
- * public int numClasses() { return 0; }
- */
+ public Attribute inputAttribute(int w) {
+ return this.instanceInformation.inputAttribute(w);
+ }
+
+ public Attribute outputAttribute(int w) {
+ return this.instanceInformation.outputAttribute(w);
+ }
+
+ public int numInputAttributes() {
+ return this.instanceInformation.numInputAttributes();
+ }
+
+ public int numOutputAttributes() {
+ return this.instanceInformation.numOutputAttributes();
+ }
+
+ public InstanceInformation getInstanceInformation() {
+ return this.instanceInformation;
+ }
}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Loader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Loader.java
index 7e04fbb..65835f1 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/Loader.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Loader.java
@@ -22,12 +22,6 @@
import java.io.Serializable;
-/**
- * Loads Instances from streams of different types of Input Formats e.g ARFF & AVRO
- *
- * @author jayadeepj
- */
-
public interface Loader extends Serializable {
/**
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/MultiLabelInstance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/MultiLabelInstance.java
new file mode 100644
index 0000000..9567c90
--- /dev/null
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/MultiLabelInstance.java
@@ -0,0 +1,25 @@
+package org.apache.samoa.instances;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+public interface MultiLabelInstance extends Instance {
+
+}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/MultiLabelPrediction.java b/samoa-instances/src/main/java/org/apache/samoa/instances/MultiLabelPrediction.java
new file mode 100644
index 0000000..0b51c56
--- /dev/null
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/MultiLabelPrediction.java
@@ -0,0 +1,139 @@
+package org.apache.samoa.instances;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.io.Serializable;
+import java.util.ArrayList;
+
+public class MultiLabelPrediction implements Prediction, Serializable {
+ protected ArrayList< ArrayList<Double> > prediction;
+
+ public MultiLabelPrediction() {
+ this(0);
+ }
+
+ public MultiLabelPrediction(int numOutputAttributes) {
+ prediction = new ArrayList< ArrayList<Double> >();
+ for (int i=0; i<numOutputAttributes;i++)
+ prediction.add(new ArrayList<Double>());
+ }
+
+ @Override
+ public int numOutputAttributes() {
+ return prediction.size();
+ }
+
+ @Override
+ public int numClasses(int outputAttributeIndex) {
+ int ret = 0;
+ if (prediction.size() > outputAttributeIndex) {
+ ret = prediction.get(outputAttributeIndex).size();
+ }
+ return ret;
+ }
+
+ @Override
+ public double[] getVotes(int outputAttributeIndex) {
+ int s = prediction.get(outputAttributeIndex).size();
+ double ret[] = null;
+ if (prediction.size() > outputAttributeIndex) {
+ ArrayList<Double> aux = prediction.get(outputAttributeIndex);
+ ret = new double[s];
+ for (int i =0;i < s;i++) {
+ ret[i] = aux.get(i).doubleValue();
+ }
+ }
+
+ return ret;
+ }
+
+ @Override
+ public double[] getVotes() {
+ return getVotes(0);
+ }
+
+ @Override
+ public double getVote(int outputAttributeIndex, int classIndex) {
+ double ret = 0.0;
+ if (prediction.size() > outputAttributeIndex) {
+ ret = (classIndex >= 0 && classIndex < prediction.get(outputAttributeIndex).size()) ?
+ prediction.get(outputAttributeIndex).get(classIndex) : 0;
+ }
+ return ret;
+ }
+
+ @Override
+ public void setVotes(int outputAttributeIndex, double[] votes) {
+ for(int i=0; i<votes.length; i++) {
+ if (i >= prediction.get(outputAttributeIndex).size()) {
+ prediction.get(outputAttributeIndex).ensureCapacity(i+1);
+ while (prediction.get(outputAttributeIndex).size() < i+1) {
+ prediction.get(outputAttributeIndex).add(0.0);
+ }
+ }
+
+ prediction.get(outputAttributeIndex).set(i,votes[i]);
+ }
+ }
+
+ @Override
+ public void setVotes(double[] votes) {
+ setVotes(0, votes);
+ }
+
+ @Override
+ public void setVote(int outputAttributeIndex, int classIndex, double vote) {
+ if (outputAttributeIndex >= prediction.get(outputAttributeIndex).size()) {
+ prediction.get(outputAttributeIndex).ensureCapacity(classIndex+1);
+ while (prediction.get(outputAttributeIndex).size() < classIndex+1) {
+ prediction.get(outputAttributeIndex).add(0.0);
+ }
+ }
+
+ prediction.get(outputAttributeIndex).set(classIndex, vote);
+ }
+
+ @Override
+ public String toString(){
+ StringBuffer sb= new StringBuffer();
+ for (int i=0; i<prediction.size(); i++){
+ sb.append("Out " + i + ": ");
+ for (int c=0; c<prediction.get(i).size(); c++)
+ {
+ sb.append(((int)(prediction.get(i).get(c)*1000)/1000.0)+ " ");
+ }
+ }
+ return sb.toString();
+ }
+
+ @Override
+ public boolean hasVotesForAttribute(int outputAttributeIndex) {
+ if(prediction.size()<(outputAttributeIndex+1))
+ return false;
+ return (prediction.get(outputAttributeIndex).size()==0) ? false : true;
+ }
+
+ @Override
+ public int size() {
+ return prediction.size();
+ }
+
+}
\ No newline at end of file
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/MultiTargetArffLoader.java b/samoa-instances/src/main/java/org/apache/samoa/instances/MultiTargetArffLoader.java
new file mode 100644
index 0000000..0e9a8fa
--- /dev/null
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/MultiTargetArffLoader.java
@@ -0,0 +1,47 @@
+package org.apache.samoa.instances;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.io.Reader;
+
+public class MultiTargetArffLoader extends ArffLoader {
+
+ public MultiTargetArffLoader(Reader reader) {
+ super(reader);
+ }
+
+ public MultiTargetArffLoader(Reader reader, Range range) {
+ super(reader, range);
+ }
+
+ @Override
+ protected Instance newSparseInstance(double d, double[] res) {
+ return new SparseInstance(d, res); // TODO
+ }
+
+ @Override
+ protected Instance newDenseInstance(int numAttributes) {
+ // numAttributes is this.instanceInformation.numAttributes()
+ this.range.setUpper(numAttributes);
+ return new DenseInstance(numAttributes);
+ }
+
+}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Prediction.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Prediction.java
new file mode 100644
index 0000000..059e912
--- /dev/null
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Prediction.java
@@ -0,0 +1,110 @@
+package org.apache.samoa.instances;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+public interface Prediction {
+
+ /**
+ * Number of output attributes.
+ *
+ * @return the number of output attributes
+ */
+ public int numOutputAttributes();
+
+ /**
+ * Different output attributes may have different number of classes.
+ * Regressors have one class per output attribute.
+ *
+ * @return the number of classes for attribute attributeIndex
+ */
+ public int numClasses(int outputAttributeIndex);
+
+ /*
+ * The predictions for each output attribute.
+ *
+ * @return the classes for each output attribute
+ *//*
+ public double [] getPrediction();
+ */
+
+ /**
+ * The votes for a given output attribute
+ *
+ * @return the votes for a given output attribute outputAttributeIndex.
+ */
+ public double [] getVotes(int outputAttributeIndex);
+
+ /**
+ * The vote assigned to a class of an output attribute
+ *
+ * @return the vote for an output attribute outputAttributeIndex and a class classIndex.
+ */
+ public double getVote(int outputAttributeIndex, int classIndex);
+
+ /**
+ * Sets the votes for a given output attribute
+ *
+ */
+ public void setVotes(int outputAttributeIndex, double [] votes);
+
+ /**
+ * Sets the votes for the first output attribute
+ *
+ */
+ public void setVotes(double[] votes);
+
+ /**
+ * Sets the vote for class of a given output attribute
+ *
+ */
+ public void setVote(int outputAttributeIndex, int classIndex, double vote);
+
+ /**
+ * The votes for the first output attribute
+ *
+ * @return the votes for the first output attribute outputAttributeIndex.
+ */
+ double[] getVotes();
+
+ /**
+ * Checks if there are votes for a given output attribute
+ *
+ * @return the votes for the first output attribute outputAttributeIndex.
+ */
+ boolean hasVotesForAttribute(int outputAttributeIndex);
+
+
+ /**
+ * The size of the prediction, that is the number of output attributes
+ *
+ * @return the votes for the first output attribute outputAttributeIndex.
+ */
+ public int size();
+
+ /**
+ * The text of the prediction, that is the description of the values of the prediction
+ *
+ * @return the text
+ */
+ public String toString();
+
+
+}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Range.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Range.java
new file mode 100644
index 0000000..52d036f
--- /dev/null
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Range.java
@@ -0,0 +1,114 @@
+package org.apache.samoa.instances;
+
+/*
+ * #%L
+ * SAMOA
+ * %%
+ * Copyright (C) 2014 - 2015 Apache Software Foundation
+ * %%
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * #L%
+ */
+
+import java.io.Serializable;
+
+public class Range implements Serializable {
+
+ //Only works for ranges "start-end"
+ private int start = 0;
+ private int end = 0;
+ private int upperLimit = 0;
+ private final String rangeText;
+
+ public Range(String range) {
+ this.rangeText = range;
+ //this.setRange(range); //needs upperLimit
+ }
+
+ /**
+ * Sets the range from a string representation.
+ *
+ * @param range the start and end string
+ *
+ */
+ public void setRange(String range) {
+ String single = range.trim();
+ int hyphenIndex = range.indexOf('-');
+
+ if (hyphenIndex > 0) {
+ this.start = rangeSingle(range.substring(0, hyphenIndex));
+ this.end = rangeSingle(range.substring(hyphenIndex + 1));
+ } else {
+ int number = rangeSingle(range);
+ if (number >= 0) { // first n attributes
+ this.start = 0;
+ this.end = number;
+ } else { // last n attributes
+ this.start = this.upperLimit + number > 0 ? this.upperLimit + number : 0;
+ this.end = this.upperLimit - 1;
+ }
+ }
+ }
+
+ /**
+ * Translates a single string selection into it's internal 0-based
+ * equivalent.
+ *
+ * @param single the string representing the selection (eg: 1 first last)
+ * @return the number corresponding to the selected value
+ */
+ protected /*@pure@*/ int rangeSingle(/*@non_null@*/String singleSelection) {
+
+ String single = singleSelection.trim();
+ if (single.toLowerCase().equals("first")) {
+ return 0;
+ }
+ if (single.toLowerCase().equals("last") || single.toLowerCase().equals("-1")) {
+ return -1;
+ }
+ int index = Integer.parseInt(single);
+ if (index >= 1) { //Non for negatives
+ index--;
+ }
+ return index;
+ }
+
+ boolean isInRange(int value) {
+ boolean ret = false;
+ if (value >= start && value <= end) {
+ ret = true;
+ }
+ return ret;
+ }
+
+ int getSelectionLength() {
+ return end - start + 1;
+ }
+
+ public void setUpper(int attributeNumber) {
+ this.upperLimit = attributeNumber;
+ this.setRange(this.rangeText);
+ }
+
+ //JD
+ public int getStart() {
+ return start;
+ }
+
+ //JD
+
+ public int getEnd() {
+ return end;
+ }
+
+}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/SingleClassInstanceData.java b/samoa-instances/src/main/java/org/apache/samoa/instances/SingleClassInstanceData.java
deleted file mode 100644
index dfb8474..0000000
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/SingleClassInstanceData.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-package org.apache.samoa.instances;
-
-/*
- * #%L
- * SAMOA
- * %%
- * Copyright (C) 2014 - 2015 Apache Software Foundation
- * %%
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * #L%
- */
-
-/**
- *
- * @author abifet
- */
-public class SingleClassInstanceData implements InstanceData {
-
- protected double classValue;
-
- @Override
- public int numAttributes() {
- return 1;
- }
-
- @Override
- public double value(int instAttIndex) {
- return classValue;
- }
-
- @Override
- public boolean isMissing(int indexAttribute) {
- return Double.isNaN(this.value(indexAttribute));
- }
-
- @Override
- public int numValues() {
- return 1;
- }
-
- @Override
- public int index(int i) {
- return 0;
- }
-
- @Override
- public double valueSparse(int i) {
- return value(i);
- }
-
- @Override
- public boolean isMissingSparse(int indexAttribute) {
- return Double.isNaN(this.value(indexAttribute));
- }
-
- /*
- * @Override public double value(Attribute attribute) { return
- * this.classValue; }
- */
-
- @Override
- public double[] toDoubleArray() {
- double[] array = { this.classValue };
- return array;
- }
-
- @Override
- public void setValue(int m_numAttributes, double d) {
- this.classValue = d;
- }
-
-}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/SingleLabelInstance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/SingleLabelInstance.java
deleted file mode 100644
index d69a0f5..0000000
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/SingleLabelInstance.java
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
-package org.apache.samoa.instances;
-
-/*
- * #%L
- * SAMOA
- * %%
- * Copyright (C) 2014 - 2015 Apache Software Foundation
- * %%
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * #L%
- */
-
-/**
- *
- * @author abifet
- */
-// public int[] m_AttValues; // for DataPoint
-
-public class SingleLabelInstance implements Instance {
-
- protected double weight;
-
- protected InstanceData instanceData;
-
- protected InstanceData classData;
-
- // Fast implementation without using Objects
- // protected double[] attributeValues;
- // protected double classValue;
-
- protected InstancesHeader instanceInformation;
-
- public SingleLabelInstance() {
- // necessary for kryo serializer
- }
-
- public SingleLabelInstance(SingleLabelInstance inst) {
- this.weight = inst.weight;
- this.instanceData = inst.instanceData; // copy
- this.classData = inst.classData; // copy
- // this.classValue = inst.classValue;
- // this.attributeValues = inst.attributeValues;
- this.instanceInformation = inst.instanceInformation;
- }
-
- // Dense
- public SingleLabelInstance(double weight, double[] res) {
- this.weight = weight;
- this.instanceData = new DenseInstanceData(res);
- // this.attributeValues = res;
- this.classData = new SingleClassInstanceData();
- // this.classValue = Double.NaN;
-
- }
-
- // Sparse
- public SingleLabelInstance(double weight, double[] attributeValues,
- int[] indexValues, int numberAttributes) {
- this.weight = weight;
- this.instanceData = new SparseInstanceData(attributeValues,
- indexValues, numberAttributes); // ???
- this.classData = new SingleClassInstanceData();
- // this.classValue = Double.NaN;
- // this.instanceInformation = new InstancesHeader();
-
- }
-
- public SingleLabelInstance(double weight, InstanceData instanceData) {
- this.weight = weight;
- this.instanceData = instanceData; // ???
- // this.classValue = Double.NaN;
- this.classData = new SingleClassInstanceData();
- // this.instanceInformation = new InstancesHeader();
- }
-
- public SingleLabelInstance(int numAttributes) {
- this.instanceData = new DenseInstanceData(new double[numAttributes]);
- // m_AttValues = new double[numAttributes];
- /*
- * for (int i = 0; i < m_AttValues.length; i++) { m_AttValues[i] =
- * Utils.missingValue(); }
- */
- this.weight = 1;
- this.classData = new SingleClassInstanceData();
- this.instanceInformation = new InstancesHeader();
- }
-
- @Override
- public double weight() {
- return weight;
- }
-
- @Override
- public void setWeight(double weight) {
- this.weight = weight;
- }
-
- @Override
- public Attribute attribute(int instAttIndex) {
- return this.instanceInformation.attribute(instAttIndex);
- }
-
- @Override
- public void deleteAttributeAt(int i) {
- // throw new UnsupportedOperationException("Not yet implemented");
- }
-
- @Override
- public void insertAttributeAt(int i) {
- throw new UnsupportedOperationException("Not yet implemented");
- }
-
- @Override
- public int numAttributes() {
- return this.instanceInformation.numAttributes();
- }
-
- @Override
- public double value(int instAttIndex) {
- return // attributeValues[instAttIndex]; //
- this.instanceData.value(instAttIndex);
- }
-
- @Override
- public boolean isMissing(int instAttIndex) {
- return // Double.isNaN(value(instAttIndex)); //
- this.instanceData.isMissing(instAttIndex);
- }
-
- @Override
- public int numValues() {
- return // this.attributeValues.length; //
- this.instanceData.numValues();
- }
-
- @Override
- public int index(int i) {
- return // i; //
- this.instanceData.index(i);
- }
-
- @Override
- public double valueSparse(int i) {
- return this.instanceData.valueSparse(i);
- }
-
- @Override
- public boolean isMissingSparse(int p) {
- return this.instanceData.isMissingSparse(p);
- }
-
- @Override
- public double value(Attribute attribute) {
- // throw new UnsupportedOperationException("Not yet implemented");
- // //Predicates.java
- return value(attribute.index());
-
- }
-
- @Override
- public String stringValue(int i) {
- throw new UnsupportedOperationException("Not yet implemented");
- }
-
- @Override
- public double[] toDoubleArray() {
- return // this.attributeValues; //
- this.instanceData.toDoubleArray();
- }
-
- @Override
- public void setValue(int numAttribute, double d) {
- this.instanceData.setValue(numAttribute, d);
- // this.attributeValues[numAttribute] = d;
- }
-
- @Override
- public double classValue() {
- return this.classData.value(0);
- // return classValue;
- }
-
- @Override
- public int classIndex() {
- return instanceInformation.classIndex();
- }
-
- @Override
- public int numClasses() {
- return this.instanceInformation.numClasses();
- }
-
- @Override
- public boolean classIsMissing() {
- return // Double.isNaN(this.classValue);//
- this.classData.isMissing(0);
- }
-
- @Override
- public Attribute classAttribute() {
- //return the class attribute
- return this.instanceInformation.attribute(classIndex());
- }
-
- @Override
- public void setClassValue(double d) {
- this.classData.setValue(0, d);
- }
-
- @Override
- public Instance copy() {
- SingleLabelInstance inst = new SingleLabelInstance(this);
- return inst;
- }
-
- @Override
- public Instances dataset() {
- return this.instanceInformation;
- }
-
- @Override
- public void setDataset(Instances dataset) {
- this.instanceInformation = new InstancesHeader(dataset);
- }
-
- public void addSparseValues(int[] indexValues, double[] attributeValues,
- int numberAttributes) {
- this.instanceData = new SparseInstanceData(attributeValues,
- indexValues, numberAttributes); // ???
- }
-
- @Override
- public String toString() {
- StringBuffer text = new StringBuffer();
-
- for (int i = 0; i < this.numValues(); i++) {
- if (i > 0)
- text.append(",");
- text.append(this.value(i));
- }
- text.append(",").append(this.weight());
-
- return text.toString();
- }
-
-}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java b/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java
index 56dbc7f..a62013a 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstance.java
@@ -1,7 +1,3 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
package org.apache.samoa.instances;
/*
@@ -24,63 +20,46 @@
* #L%
*/
-import java.text.SimpleDateFormat;
+public class SparseInstance extends InstanceImpl {
-/**
- *
- * @author abifet
- */
-public class SparseInstance extends SingleLabelInstance {
-
+ /**
+ * Instantiates a new sparse instance.
+ *
+ * @param d the d
+ * @param res the res
+ */
public SparseInstance(double d, double[] res) {
super(d, res);
}
- public SparseInstance(SingleLabelInstance inst) {
+ /**
+ * Instantiates a new sparse instance.
+ *
+ * @param inst the inst
+ */
+ public SparseInstance(InstanceImpl inst) {
super(inst);
}
+ /**
+ * Instantiates a new sparse instance.
+ *
+ * @param numberAttributes the number attributes
+ */
public SparseInstance(double numberAttributes) {
- // super(1, new double[(int) numberAttributes-1]);
super(1, null, null, (int) numberAttributes);
}
+ /**
+ * Instantiates a new sparse instance.
+ *
+ * @param weight the weight
+ * @param attributeValues the attribute values
+ * @param indexValues the index values
+ * @param numberAttributes the number attributes
+ */
public SparseInstance(double weight, double[] attributeValues, int[] indexValues, int numberAttributes) {
super(weight, attributeValues, indexValues, numberAttributes);
}
- @Override
- public String toString() {
- StringBuffer str = new StringBuffer();
-
- str.append("{");
-
- for (int i=0; i<this.numAttributes()-1;i++){
- if (!this.isMissing(i)) {
-
- //if the attribute is Nominal we print the string value of the attribute.
- if (this.attribute(i).isNominal()) {
- int valueIndex = (int) this.value(i);
- String stringValue = this.attribute(i).value(valueIndex);
- str.append(i).append(" ").append(stringValue).append(",");
- } else if (this.attribute(i).isNumeric()) {
- //if the attribute is numeric we print the value of the attribute only if it is not equal 0
- if (this.value(i) != 0) {
- str.append(i).append(" ").append(this.value(i)).append(",");
- }
- } else if (this.attribute(i).isDate()) {
- SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
- str.append(i).append(" ").append(dateFormatter.format(this.value(i))).append(",");
- }
- } else { //represent missing values
- str.append(i).append(" ").append("?,");
- }
- }
- //append the class value at the end of the instance.
- str.append(classIndex()).append(" ").append(this.classAttribute().value((int)classValue()));
-
- str.append("}");
-
- return str.toString();
- }
}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstanceData.java b/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstanceData.java
index 9c14b8f..77b634b 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstanceData.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/SparseInstanceData.java
@@ -1,7 +1,3 @@
-/*
- * To change this template, choose Tools | Templates
- * and open the template in the editor.
- */
package org.apache.samoa.instances;
/*
@@ -24,62 +20,121 @@
* #L%
*/
-/**
- *
- * @author abifet
- */
public class SparseInstanceData implements InstanceData {
+ /**
+ * Instantiates a new sparse instance data.
+ *
+ * @param attributeValues the attribute values
+ * @param indexValues the index values
+ * @param numberAttributes the number attributes
+ */
public SparseInstanceData(double[] attributeValues, int[] indexValues, int numberAttributes) {
this.attributeValues = attributeValues;
this.indexValues = indexValues;
this.numberAttributes = numberAttributes;
}
+ /**
+ * Instantiates a new sparse instance data.
+ *
+ * @param length the length
+ */
public SparseInstanceData(int length) {
this.attributeValues = new double[length];
this.indexValues = new int[length];
}
+ /**
+ * The attribute values.
+ */
protected double[] attributeValues;
+ /**
+ * Gets the attribute values.
+ *
+ * @return the attribute values
+ */
public double[] getAttributeValues() {
return attributeValues;
}
+ /**
+ * Sets the attribute values.
+ *
+ * @param attributeValues the new attribute values
+ */
public void setAttributeValues(double[] attributeValues) {
this.attributeValues = attributeValues;
}
+ /**
+ * Gets the index values.
+ *
+ * @return the index values
+ */
public int[] getIndexValues() {
return indexValues;
}
+ /**
+ * Sets the index values.
+ *
+ * @param indexValues the new index values
+ */
public void setIndexValues(int[] indexValues) {
this.indexValues = indexValues;
}
+ /**
+ * Gets the number attributes.
+ *
+ * @return the number attributes
+ */
public int getNumberAttributes() {
return numberAttributes;
}
+ /**
+ * Sets the number of attributes.
+ *
+ * @param numberAttributes the new number attributes
+ */
public void setNumberAttributes(int numberAttributes) {
this.numberAttributes = numberAttributes;
}
+ /**
+ * The index values.
+ */
protected int[] indexValues;
+
+ /**
+ * The number of attributes.
+ */
protected int numberAttributes;
+ /**
+ * Gets the number of attributes.
+ *
+ * @return the int
+ */
@Override
public int numAttributes() {
return this.numberAttributes;
}
+ /**
+ * Value.
+ *
+ * @param indexAttribute the index attribute
+ * @return the double
+ */
@Override
public double value(int indexAttribute) {
int location = locateIndex(indexAttribute);
- // return location == -1 ? 0 : this.attributeValues[location];
- // int index = locateIndex(attIndex);
+ //return location == -1 ? 0 : this.attributeValues[location];
+ // int index = locateIndex(attIndex);
if ((location >= 0) && (indexValues[location] == indexAttribute)) {
return attributeValues[location];
} else {
@@ -87,36 +142,65 @@
}
}
+ /**
+ * Checks if is missing.
+ *
+ * @param indexAttribute the index attribute
+ * @return true, if is missing
+ */
@Override
public boolean isMissing(int indexAttribute) {
return Double.isNaN(this.value(indexAttribute));
}
+ /**
+ * Num values.
+ *
+ * @return the int
+ */
@Override
public int numValues() {
return this.attributeValues.length;
}
+ /**
+ * Index.
+ *
+ * @param indexAttribute the index attribute
+ * @return the int
+ */
@Override
public int index(int indexAttribute) {
return this.indexValues[indexAttribute];
}
+ /**
+ * Value sparse.
+ *
+ * @param indexAttribute the index attribute
+ * @return the double
+ */
@Override
public double valueSparse(int indexAttribute) {
return this.attributeValues[indexAttribute];
}
+ /**
+ * Checks if is missing sparse.
+ *
+ * @param indexAttribute the index attribute
+ * @return true, if is missing sparse
+ */
@Override
public boolean isMissingSparse(int indexAttribute) {
return Double.isNaN(this.valueSparse(indexAttribute));
}
- /*
- * @Override public double value(Attribute attribute) { return
- * value(attribute.index()); }
+ /**
+ * To double array.
+ *
+ * @return the double[]
*/
-
@Override
public double[] toDoubleArray() {
double[] array = new double[numAttributes()];
@@ -126,6 +210,12 @@
return array;
}
+ /**
+ * Sets the value.
+ *
+ * @param attributeIndex the attribute index
+ * @param d the d
+ */
@Override
public void setValue(int attributeIndex, double d) {
int index = locateIndex(attributeIndex);
@@ -138,8 +228,9 @@
/**
* Locates the greatest index that is not greater than the given index.
- *
- * @return the internal index of the attribute index. Returns -1 if no index with this property could be found
+ *
+ * @return the internal index of the attribute index. Returns -1 if no index
+ * with this property could be found
*/
public int locateIndex(int index) {
@@ -168,4 +259,46 @@
}
}
+ /**
+ * Deletes an attribute at the given position (0 to numAttributes() - 1).
+ *
+ * @param pos the attribute's position
+ */
+ @Override
+ public void deleteAttributeAt(int position) {
+
+ int index = locateIndex(position);
+
+ this.numberAttributes--;
+ if ((index >= 0) && (indexValues[index] == position)) {
+ int[] tempIndices = new int[indexValues.length - 1];
+ double[] tempValues = new double[attributeValues.length - 1];
+ System.arraycopy(indexValues, 0, tempIndices, 0, index);
+ System.arraycopy(attributeValues, 0, tempValues, 0, index);
+ for (int i = index; i < indexValues.length - 1; i++) {
+ tempIndices[i] = indexValues[i + 1] - 1;
+ tempValues[i] = attributeValues[i + 1];
+ }
+ indexValues = tempIndices;
+ attributeValues = tempValues;
+ } else {
+ int[] tempIndices = new int[indexValues.length];
+ double[] tempValues = new double[attributeValues.length];
+ System.arraycopy(indexValues, 0, tempIndices, 0, index + 1);
+ System.arraycopy(attributeValues, 0, tempValues, 0, index + 1);
+ for (int i = index + 1; i < indexValues.length; i++) {
+ tempIndices[i] = indexValues[i] - 1;
+ tempValues[i] = attributeValues[i];
+ }
+ indexValues = tempIndices;
+ attributeValues = tempValues;
+ }
+ }
+
+ @Override
+ public InstanceData copy() {
+ return new SparseInstanceData(this.attributeValues,this.indexValues,this.numberAttributes);
+ }
+
+
}
diff --git a/samoa-instances/src/main/java/org/apache/samoa/instances/Utils.java b/samoa-instances/src/main/java/org/apache/samoa/instances/Utils.java
index 73990bb..778f003 100644
--- a/samoa-instances/src/main/java/org/apache/samoa/instances/Utils.java
+++ b/samoa-instances/src/main/java/org/apache/samoa/instances/Utils.java
@@ -88,4 +88,4 @@
return string;
}
-}
+}
\ No newline at end of file