examples/experimental/scala-stock/src/main/scala/DataSource.scala - predictionio - Git at Google

 package io.prediction.examples.stock

 import io.prediction.controller.Params
 import io.prediction.controller.PDataSource
 import io.prediction.controller.LDataSource
 import io.prediction.controller.EmptyParams

 import org.apache.spark.SparkContext
 import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.broadcast.Broadcast

 import com.mongodb.casbah.Imports._
 import org.saddle._
 import org.saddle.index.IndexTime
 import com.github.nscala_time.time.Imports._


 /** Primary parameter for [[[DataSource]]].
   *
   * @param baseDate identify the beginning of our global time window, and
   * the rest are use index.
   * @param fromIdx the first date for testing
   * @param untilIdx the last date (exclusive) for testing
   * @param trainingWindowSize number of days used for training
   * @param testingWindowSize  number of days for each testing data
   *
   * [[[DataSource]]] chops data into (overlapping) multiple
   * pieces. Within each piece, it is further splitted into training and testing
   * set. The testing sets is from <code>fromIdx</code> until
   * <code>untilIdx</code> with a step size of <code>testingWindowSize</code>.
   * A concrete example: (from, until, windowSize) = (100, 150, 20), it generates
   * three testing sets corresponding to time range: [100, 120), [120, 140), [140, 150).
   * For each testing sets, it also generates the training data set using
   * <code>maxTrainingWindowSize</code>. Suppose trainingWindowSize = 50 and testing set =
   * [100, 120), the training set draws data in time range [50, 100).
   */

 case class DataSourceParams(
   val appid: Int = 1008,
   val baseDate: DateTime,
   val fromIdx: Int,
   val untilIdx: Int,
   val trainingWindowSize: Int,
   val maxTestingWindowSize: Int,
   val marketTicker: String,
   val tickerList: Seq[String]) extends Params {}
	package io.prediction.examples.stock

	import io.prediction.controller.Params
	import io.prediction.controller.PDataSource
	import io.prediction.controller.LDataSource
	import io.prediction.controller.EmptyParams

	import org.apache.spark.SparkContext
	import org.apache.spark.SparkContext._
	import org.apache.spark.rdd.RDD
	import org.apache.spark.broadcast.Broadcast

	import com.mongodb.casbah.Imports._
	import org.saddle._
	import org.saddle.index.IndexTime
	import com.github.nscala_time.time.Imports._


	/** Primary parameter for [[[DataSource]]].
	*
	* @param baseDate identify the beginning of our global time window, and
	* the rest are use index.
	* @param fromIdx the first date for testing
	* @param untilIdx the last date (exclusive) for testing
	* @param trainingWindowSize number of days used for training
	* @param testingWindowSize number of days for each testing data
	*
	* [[[DataSource]]] chops data into (overlapping) multiple
	* pieces. Within each piece, it is further splitted into training and testing
	* set. The testing sets is from <code>fromIdx</code> until
	* <code>untilIdx</code> with a step size of <code>testingWindowSize</code>.
	* A concrete example: (from, until, windowSize) = (100, 150, 20), it generates
	* three testing sets corresponding to time range: [100, 120), [120, 140), [140, 150).
	* For each testing sets, it also generates the training data set using
	* <code>maxTrainingWindowSize</code>. Suppose trainingWindowSize = 50 and testing set =
	* [100, 120), the training set draws data in time range [50, 100).
	*/

	case class DataSourceParams(
	val appid: Int = 1008,
	val baseDate: DateTime,
	val fromIdx: Int,
	val untilIdx: Int,
	val trainingWindowSize: Int,
	val maxTestingWindowSize: Int,
	val marketTicker: String,
	val tickerList: Seq[String]) extends Params {}