blob: a91f1aa4fa1c93fed0c1d35b7f519fdfb0ab7807 [file] [log] [blame]
package io.prediction.examples.stock
import io.prediction.controller.Params
import io.prediction.controller.PDataSource
import io.prediction.controller.LDataSource
import io.prediction.controller.EmptyParams
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.rdd.RDD
import org.apache.spark.broadcast.Broadcast
import com.mongodb.casbah.Imports._
import org.saddle._
import org.saddle.index.IndexTime
import com.github.nscala_time.time.Imports._
/** Primary parameter for [[[DataSource]]].
*
* @param baseDate identify the beginning of our global time window, and
* the rest are use index.
* @param fromIdx the first date for testing
* @param untilIdx the last date (exclusive) for testing
* @param trainingWindowSize number of days used for training
* @param testingWindowSize number of days for each testing data
*
* [[[DataSource]]] chops data into (overlapping) multiple
* pieces. Within each piece, it is further splitted into training and testing
* set. The testing sets is from <code>fromIdx</code> until
* <code>untilIdx</code> with a step size of <code>testingWindowSize</code>.
* A concrete example: (from, until, windowSize) = (100, 150, 20), it generates
* three testing sets corresponding to time range: [100, 120), [120, 140), [140, 150).
* For each testing sets, it also generates the training data set using
* <code>maxTrainingWindowSize</code>. Suppose trainingWindowSize = 50 and testing set =
* [100, 120), the training set draws data in time range [50, 100).
*/
case class DataSourceParams(
val appid: Int = 1008,
val baseDate: DateTime,
val fromIdx: Int,
val untilIdx: Int,
val trainingWindowSize: Int,
val maxTestingWindowSize: Int,
val marketTicker: String,
val tickerList: Seq[String]) extends Params {}