blob: a9cd38ad8db3a2319b6e12b02b55f284123ba63b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.ltr.interleaving.algorithms;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.search.ScoreDoc;
import org.apache.solr.ltr.interleaving.Interleaving;
import org.apache.solr.ltr.interleaving.InterleavingResult;
/**
* Interleaving was introduced the first time by Joachims in [1, 2].
* Team Draft Interleaving is among the most successful and used interleaving approaches[3].
* Team Draft Interleaving implements a method similar to the way in which captains select their players in team-matches.
* Team Draft Interleaving produces a fair distribution of ranking models’ elements in the final interleaved list.
* "Team draft interleaving" has also proved to overcome an issue of the "Balanced interleaving" approach, in determining the winning model[4].
* <p>
* [1] T. Joachims. Optimizing search engines using clickthrough data. KDD (2002)
* [2] T.Joachims.Evaluatingretrievalperformanceusingclickthroughdata.InJ.Franke, G. Nakhaeizadeh, and I. Renz, editors,
* Text Mining, pages 79–96. Physica/Springer (2003)
* [3] F. Radlinski, M. Kurup, and T. Joachims. How does clickthrough data reflect re-
* trieval quality? In CIKM, pages 43–52. ACM Press (2008)
* [4] O. Chapelle, T. Joachims, F. Radlinski, and Y. Yue.
* Large-scale validation and analysis of interleaved search evaluation. ACM TOIS, 30(1):1–41, Feb. (2012)
*/
public class TeamDraftInterleaving implements Interleaving {
public static Random RANDOM;
static {
// We try to make things reproducible in the context of our tests by initializing the random instance
// based on the current seed
String seed = System.getProperty("tests.seed");
if (seed == null) {
RANDOM = new Random();
} else {
RANDOM = new Random(seed.hashCode());
}
}
/**
* Team Draft Interleaving considers two ranking models: modelA and modelB.
* For a given query, each model returns its ranked list of documents La = (a1,a2,...) and Lb = (b1, b2, ...).
* The algorithm creates a unique ranked list I = (i1, i2, ...).
* This list is created by interleaving elements from the two lists la and lb as described by Chapelle et al.[1].
* Each element Ij is labelled TeamA if it is selected from La and TeamB if it is selected from Lb.
* <p>
* [1] O. Chapelle, T. Joachims, F. Radlinski, and Y. Yue.
* Large-scale validation and analysis of interleaved search evaluation. ACM TOIS, 30(1):1–41, Feb. (2012)
* <p>
* Assumptions:
* - rerankedA and rerankedB has the same length.
* They contains the same search results, ranked differently by two ranking models
* - each reranked list can not contain the same search result more than once.
* - results are all from the same shard
*
* @param rerankedA a ranked list of search results produced by a ranking model A
* @param rerankedB a ranked list of search results produced by a ranking model B
* @return the interleaved ranking list
*/
public InterleavingResult interleave(ScoreDoc[] rerankedA, ScoreDoc[] rerankedB) {
LinkedList<ScoreDoc> interleavedResults = new LinkedList<>();
HashSet<Integer> alreadyAdded = new HashSet<>();
ScoreDoc[] interleavedResultArray = new ScoreDoc[rerankedA.length];
ArrayList<Set<Integer>> interleavingPicks = new ArrayList<>(2);
Set<Integer> teamA = new HashSet<>();
Set<Integer> teamB = new HashSet<>();
int topN = rerankedA.length;
int indexA = 0, indexB = 0;
while (interleavedResults.size() < topN && indexA < rerankedA.length && indexB < rerankedB.length) {
if(teamA.size()<teamB.size() || (teamA.size()==teamB.size() && !RANDOM.nextBoolean())){
indexA = updateIndex(alreadyAdded, indexA, rerankedA);
interleavedResults.add(rerankedA[indexA]);
alreadyAdded.add(rerankedA[indexA].doc);
teamA.add(rerankedA[indexA].doc);
indexA++;
} else{
indexB = updateIndex(alreadyAdded,indexB,rerankedB);
interleavedResults.add(rerankedB[indexB]);
alreadyAdded.add(rerankedB[indexB].doc);
teamB.add(rerankedB[indexB].doc);
indexB++;
}
}
interleavingPicks.add(teamA);
interleavingPicks.add(teamB);
interleavedResultArray = interleavedResults.toArray(interleavedResultArray);
return new InterleavingResult(interleavedResultArray,interleavingPicks);
}
private int updateIndex(HashSet<Integer> alreadyAdded, int index, ScoreDoc[] reranked) {
boolean foundElementToAdd = false;
while (index < reranked.length && !foundElementToAdd) {
ScoreDoc elementToCheck = reranked[index];
if (alreadyAdded.contains(elementToCheck.doc)) {
index++;
} else {
foundElementToAdd = true;
}
}
return index;
}
public static void setRANDOM(Random RANDOM) {
TeamDraftInterleaving.RANDOM = RANDOM;
}
}