| /** Copyright 2015 TappingStone, Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package io.prediction.controller |
| |
| import io.prediction.core.BasePreparator |
| import org.apache.spark.SparkContext |
| |
| /** Base class of a parallel preparator. |
| * |
| * A parallel preparator can be run in parallel on a cluster and produces a |
| * prepared data that is distributed across a cluster. |
| * |
| * @tparam TD Training data class. |
| * @tparam PD Prepared data class. |
| * @group Preparator |
| */ |
| abstract class PPreparator[TD, PD] |
| extends BasePreparator[TD, PD] { |
| |
| private[prediction] |
| def prepareBase(sc: SparkContext, td: TD): PD = { |
| prepare(sc, td) |
| } |
| |
| /** Implement this method to produce prepared data that is ready for model |
| * training. |
| * |
| * @param sc An Apache Spark context. |
| * @param trainingData Training data to be prepared. |
| */ |
| def prepare(sc: SparkContext, trainingData: TD): PD |
| } |