blob: c4255bc855f661f4d52cc328652be8a6627db01e [file] [log] [blame]
package org.template.ecommercerecommendation
/*
import io.prediction.controller.P2LAlgorithm
import io.prediction.controller.Params
case class PopularAlgorithmParams() extends Params
class PopularModel(
val itemModel: Vector[(String, (Item, Int))] // Vector of (item ID, (Item, Count))
) extends Serializable {
}
class PopularAlgorithm(val ap: PopularAlgorithmParams)
extends P2LAlgorithm[PreparedData, PopularModel, Query, PredictedResult] {
def train(sc: SparkContext, data: PreparedData): PopularModel = {
// calculate number of buys for each item
val buyCounts: RDD[(String, Int)] = data.buyEvents
.map { buy => (buy.item, 1) }
.reduceByKey{ case (a, b) => a + b }
// combine item data with the count
val itemWithCount: RDD[(String, (Item, Int))] = data.items.join(buyCounts)
// collect to local vector, and sort save as model
val itemModel = itemWithCount.collect.toVector
.sortBy{ case (id, (item, count)) => count }(Ordering.Int.revese)
PopularModel(
itemModel = itemModel
)
}
def predict(model: PopularModel, query: Query): PredictedResult = {
model.itemModel.filter {
case (id, (item, count)) =>
isCandidateItem(
)
)
}
}
private
def isCandidateItem(
i: Int,
item: Item,
categories: Option[Set[String]],
whiteList: Option[Set[Int]],
blackList: Set[Int]
): Boolean = {
// can add other custom filtering here
whiteList.map(_.contains(i)).getOrElse(true) &&
!blackList.contains(i) &&
// filter categories
categories.map { cat =>
item.categories.map { itemCat =>
// keep this item if has ovelap categories with the query
!(itemCat.toSet.intersect(cat).isEmpty)
}.getOrElse(false) // discard this item if it has no categories
}.getOrElse(true)
}
}
*/