blob: 25de98dbadaf3d5dad69b63904fb3ac7b5daf9a3 [file] [log] [blame]
package io.prediction.algorithms.mahout.itemrec.knnitembased
import org.apache.mahout.cf.taste.common.TasteException
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender
import org.apache.mahout.cf.taste.impl.recommender.AbstractRecommender
import org.apache.mahout.cf.taste.model.DataModel
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity
import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy
import org.apache.mahout.cf.taste.recommender.MostSimilarItemsCandidateItemsStrategy
import org.apache.mahout.cf.taste.impl.recommender.EstimatedPreferenceCapper
import scala.collection.mutable.PriorityQueue
import scala.collection.JavaConversions._
import io.prediction.algorithms.mahout.itemrec.AllPreferredItemsNeighborhoodCandidateItemsStrategy
/* Extension to Mahout's GenericItemBasedRecommender
* with the additional settings: booleanData, neighbourSize, threshold.
*/
class KNNItemBasedRecommender(dataModel: DataModel,
similarity: ItemSimilarity,
candidateItemsStrategy: CandidateItemsStrategy,
mostSimilarItemsCandidateItemsStrategy: MostSimilarItemsCandidateItemsStrategy,
booleanData: Boolean,
neighbourSize: Int,
threshold: Double) extends GenericItemBasedRecommender(dataModel, similarity, candidateItemsStrategy,
mostSimilarItemsCandidateItemsStrategy) {
val capper: Option[EstimatedPreferenceCapper] = if (getDataModel().getMinPreference().isNaN ||
getDataModel().getMaxPreference().isNaN)
None
else
Some(new EstimatedPreferenceCapper(getDataModel()))
def this(dataModel: DataModel, similarity: ItemSimilarity,
candidateItemsStrategy: CandidateItemsStrategy,
booleanData: Boolean, neighbourSize: Int, threshold: Double) =
this(dataModel, similarity, candidateItemsStrategy,
GenericItemBasedRecommender.getDefaultMostSimilarItemsCandidateItemsStrategy(),
booleanData, neighbourSize, threshold)
def this(dataModel: DataModel, similarity: ItemSimilarity, booleanData: Boolean, neighbourSize: Int, threshold: Double) =
this(dataModel, similarity, AbstractRecommender.getDefaultCandidateItemsStrategy(),
GenericItemBasedRecommender.getDefaultMostSimilarItemsCandidateItemsStrategy(), booleanData, neighbourSize, threshold)
@throws(classOf[TasteException])
override def doEstimatePreference(userID: Long, preferencesFromUser: PreferenceArray, itemID: Long): Float = {
val ratedIds = preferencesFromUser.getIDs()
.zipWithIndex // need index for accessing preferencesFromUser later
.map { case (id, index) => (id, similarity.itemSimilarity(itemID, id), index) } // (id, simiarity, index)
.filter { case (id, sim, index) => (!sim.isNaN()) && (sim >= threshold) }
val neighbourRatedIds = getTopN(ratedIds, neighbourSize)(RatedIdOdering.reverse)
val estimatedPreference: Float = if (booleanData) {
val totalSimilarity = neighbourRatedIds.foldLeft[Double](0) { (acc, x) =>
val (id, sim, index) = x
acc + sim
}
totalSimilarity.toFloat
} else {
val (totalPreference, totalSimilarity) = neighbourRatedIds.foldLeft[(Double, Double)]((0, 0)) { (acc, x) =>
val (accPreference, accSimilarity) = acc
val (id, sim, index) = x
val totalPreference = accPreference + (sim * preferencesFromUser.getValue(index))
val totalSimilarity = accSimilarity + sim
(totalPreference, totalSimilarity)
}
// if there is only 1 similar item, the estimate preference will be same as the preferewnce of that item
// regardless of similarity. so don't count it and return NaN instead.
if (neighbourRatedIds.size <= 1) {
Float.NaN
} else {
val estimate = (totalPreference / totalSimilarity).toFloat
val cappedEstimate = capper.map(c => c.capEstimate(estimate)).getOrElse(estimate)
cappedEstimate
}
}
estimatedPreference
}
object RatedIdOdering extends Ordering[(Long, Double, Int)] {
override def compare(a: (Long, Double, Int), b: (Long, Double, Int)) = a._2 compare b._2
}
def getTopN[T](s: Seq[T], n: Int)(implicit ord: Ordering[T]): Seq[T] = {
val q = PriorityQueue()
for (x <- s) {
if (q.size < n)
q.enqueue(x)
else {
// q is full
if (ord.compare(x, q.head) < 0) {
q.dequeue()
q.enqueue(x)
}
}
}
q.dequeueAll.toSeq.reverse
}
override def toString() = {
"KNNItemBasedRecommender"
}
}