blob: 82a43653de235713d950ecb93073ae2840abde77 [file] [log] [blame]
package io.prediction.examples.java.regression;
import io.prediction.controller.java.LJavaDataSource;
import scala.Tuple2;
import scala.Tuple3;
import java.io.IOException;
import java.lang.Iterable;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Pattern;
public class DataSource
extends LJavaDataSource<DataSourceParams, Integer, TrainingData, Double[], Double> {
private static final Pattern SPACE = Pattern.compile(" ");
public final DataSourceParams dsp;
public DataSource(DataSourceParams dsp) {
this.dsp = dsp;
}
public List<Tuple3<Integer, TrainingData, Iterable<Tuple2<Double[], Double>>>> read() {
List<String> lines;
try {
lines = Files.readAllLines(Paths.get(dsp.filepath), StandardCharsets.UTF_8);
} catch (IOException exception) {
System.out.println("Cannot read file");
lines = new ArrayList<String>();
}
int n = lines.size();
int featureCount = SPACE.split(lines.get(0)).length - 1;
Double[][] x = new Double[n][featureCount];
Double[] y = new Double[n];
for (int i = 0; i < n; i++) {
String[] line = SPACE.split(lines.get(i), 2);
y[i] = Double.parseDouble(line[0]);
String[] featureStrs = SPACE.split(line[1]);
for (int j = 0; j < featureCount; j++) {
x[i][j] = Double.parseDouble(featureStrs[j]);
}
}
TrainingData td = new TrainingData(x, y);
List<Tuple2<Double[], Double>> faList = new ArrayList<>();
for (int i = 0; i < 10; i++) {
faList.add(new Tuple2<Double[], Double>(x[i], y[i]));
}
List<Tuple3<Integer, TrainingData, Iterable<Tuple2<Double[], Double>>>> results =
new ArrayList <> ();
results.add(new Tuple3<Integer, TrainingData, Iterable<Tuple2<Double[], Double>>>(
new Integer(0), td, faList));
return results;
}
}