blob: 25ccb9bc32da77cea502443cbddfe404d79a4702 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.benchmark.quality;
import java.io.IOException;
import java.io.PrintWriter;
import org.apache.lucene.benchmark.quality.utils.DocNameExtractor;
import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
/**
* Main entry point for running a quality benchmark.
*
* <p>There are two main configurations for running a quality benchmark:
*
* <ul>
* <li>Against existing judgements.
* <li>For submission (e.g. for a contest).
* </ul>
*
* The first configuration requires a non null {@link org.apache.lucene.benchmark.quality.Judge
* Judge}. The second configuration requires a non null {@link
* org.apache.lucene.benchmark.quality.utils.SubmissionReport SubmissionLogger}.
*/
public class QualityBenchmark {
/** Quality Queries that this quality benchmark would execute. */
protected QualityQuery qualityQueries[];
/** Parser for turning QualityQueries into Lucene Queries. */
protected QualityQueryParser qqParser;
/** Index to be searched. */
protected IndexSearcher searcher;
/** index field to extract doc name for each search result; used for judging the results. */
protected String docNameField;
/**
* maximal number of queries that this quality benchmark runs. Default: maxint. Useful for
* debugging.
*/
private int maxQueries = Integer.MAX_VALUE;
/** maximal number of results to collect for each query. Default: 1000. */
private int maxResults = 1000;
/**
* Create a QualityBenchmark.
*
* @param qqs quality queries to run.
* @param qqParser parser for turning QualityQueries into Lucene Queries.
* @param searcher index to be searched.
* @param docNameField name of field containing the document name. This allows to extract the doc
* name for search results, and is important for judging the results.
*/
public QualityBenchmark(
QualityQuery qqs[],
QualityQueryParser qqParser,
IndexSearcher searcher,
String docNameField) {
this.qualityQueries = qqs;
this.qqParser = qqParser;
this.searcher = searcher;
this.docNameField = docNameField;
}
/**
* Run the quality benchmark.
*
* @param judge the judge that can tell if a certain result doc is relevant for a certain quality
* query. If null, no judgements would be made. Usually null for a submission run.
* @param submitRep submission report is created if non null.
* @param qualityLog If not null, quality run data would be printed for each query.
* @return QualityStats of each quality query that was executed.
* @throws Exception if quality benchmark failed to run.
*/
public QualityStats[] execute(Judge judge, SubmissionReport submitRep, PrintWriter qualityLog)
throws Exception {
int nQueries = Math.min(maxQueries, qualityQueries.length);
QualityStats stats[] = new QualityStats[nQueries];
for (int i = 0; i < nQueries; i++) {
QualityQuery qq = qualityQueries[i];
// generate query
Query q = qqParser.parse(qq);
// search with this query
long t1 = System.currentTimeMillis();
TopDocs td = searcher.search(q, maxResults);
long searchTime = System.currentTimeMillis() - t1;
// most likely we either submit or judge, but check both
if (judge != null) {
stats[i] = analyzeQueryResults(qq, q, td, judge, qualityLog, searchTime);
}
if (submitRep != null) {
submitRep.report(qq, td, docNameField, searcher);
}
}
if (submitRep != null) {
submitRep.flush();
}
return stats;
}
/* Analyze/judge results for a single quality query; optionally log them. */
private QualityStats analyzeQueryResults(
QualityQuery qq, Query q, TopDocs td, Judge judge, PrintWriter logger, long searchTime)
throws IOException {
QualityStats stts = new QualityStats(judge.maxRecall(qq), searchTime);
ScoreDoc sd[] = td.scoreDocs;
long t1 =
System.currentTimeMillis(); // extraction of first doc name we measure also construction of
// doc name extractor, just in case.
DocNameExtractor xt = new DocNameExtractor(docNameField);
for (int i = 0; i < sd.length; i++) {
String docName = xt.docName(searcher, sd[i].doc);
long docNameExtractTime = System.currentTimeMillis() - t1;
t1 = System.currentTimeMillis();
boolean isRelevant = judge.isRelevant(docName, qq);
stts.addResult(i + 1, isRelevant, docNameExtractTime);
}
if (logger != null) {
logger.println(qq.getQueryID() + " - " + q);
stts.log(qq.getQueryID() + " Stats:", 1, logger, " ");
}
return stts;
}
/** @return the maximum number of quality queries to run. Useful at debugging. */
public int getMaxQueries() {
return maxQueries;
}
/** Set the maximum number of quality queries to run. Useful at debugging. */
public void setMaxQueries(int maxQueries) {
this.maxQueries = maxQueries;
}
/** @return the maximum number of results to collect for each quality query. */
public int getMaxResults() {
return maxResults;
}
/** set the maximum number of results to collect for each quality query. */
public void setMaxResults(int maxResults) {
this.maxResults = maxResults;
}
}