blob: 9f522b85f1d21fbcc556312e1e0482f6a7f72e71 [file] [log] [blame]
package io.prediction.algorithms.mahout.itemrec.thresholduserbased
import scala.collection.JavaConversions._
import io.prediction.algorithms.mahout.itemrec.MahoutJob
import io.prediction.algorithms.mahout.itemrec.{ UserBasedRecommender, BooleanPrefUserBasedRecommender }
import org.apache.mahout.cf.taste.model.DataModel
import org.apache.mahout.cf.taste.recommender.Recommender
import org.apache.mahout.cf.taste.recommender.RecommendedItem
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood
import org.apache.mahout.cf.taste.similarity.UserSimilarity
import org.apache.mahout.cf.taste.common.Weighting
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood
import org.apache.mahout.cf.taste.impl.similarity.{
CityBlockSimilarity,
EuclideanDistanceSimilarity,
LogLikelihoodSimilarity,
PearsonCorrelationSimilarity,
SpearmanCorrelationSimilarity,
TanimotoCoefficientSimilarity,
UncenteredCosineSimilarity
}
class ThresholdUserBasedJob extends MahoutJob {
val userSimilarityValues = Seq(
"CityBlockSimilarity",
"EuclideanDistanceSimilarity",
"LogLikelihoodSimilarity",
"PearsonCorrelationSimilarity",
"SpearmanCorrelationSimilarity",
"TanimotoCoefficientSimilarity",
"UncenteredCosineSimilarity")
val defaultUserSimilarity = "PearsonCorrelationSimilarity"
override def buildRecommender(dataModel: DataModel, seenDataModel: DataModel,
validItemIDs: Set[Long], args: Map[String, String]): Recommender = {
val booleanData: Boolean = getArgOpt(args, "booleanData", "false").toBoolean
val userSimilarity: String = getArgOpt(args, "userSimilarity", defaultUserSimilarity)
val weighted: Boolean = getArgOpt(args, "weighted", "false").toBoolean
val threshold: Double = getArgOpt(args, "threshold").map(_.toDouble).getOrElse(Double.MinPositiveValue)
val samplingRate: Double = getArgOpt(args, "samplingRate", "1.0").toDouble
val weightedParam: Weighting = if (weighted) Weighting.WEIGHTED else Weighting.UNWEIGHTED
val unseenOnly: Boolean = getArgOpt(args, "unseenOnly", "false").toBoolean
val similarity: UserSimilarity = userSimilarity match {
case "CityBlockSimilarity" => new CityBlockSimilarity(dataModel)
case "EuclideanDistanceSimilarity" => new EuclideanDistanceSimilarity(dataModel, weightedParam)
case "LogLikelihoodSimilarity" => new LogLikelihoodSimilarity(dataModel)
case "PearsonCorrelationSimilarity" => new PearsonCorrelationSimilarity(dataModel, weightedParam)
case "SpearmanCorrelationSimilarity" => new SpearmanCorrelationSimilarity(dataModel)
case "TanimotoCoefficientSimilarity" => new TanimotoCoefficientSimilarity(dataModel)
case "UncenteredCosineSimilarity" => new UncenteredCosineSimilarity(dataModel, weightedParam)
case _ => throw new RuntimeException("Invalid UserSimilarity: " + userSimilarity)
}
val neighborhood: UserNeighborhood = new ThresholdUserNeighborhood(threshold, similarity, dataModel, samplingRate)
val recSeenDataModel = if (unseenOnly) seenDataModel else null
val recommender: Recommender = if (booleanData) {
new BooleanPrefUserBasedRecommender(dataModel, neighborhood, similarity,
validItemIDs.toArray, recSeenDataModel)
} else {
new UserBasedRecommender(dataModel, neighborhood, similarity,
validItemIDs.toArray, recSeenDataModel)
}
recommender
}
}