lucene/benchmark/src/java/org/apache/lucene/benchmark/quality/QualityBenchmark.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.benchmark.quality;

 import java.io.IOException;
 import java.io.PrintWriter;
 import org.apache.lucene.benchmark.quality.utils.DocNameExtractor;
 import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TopDocs;

 /**
  * Main entry point for running a quality benchmark.
  *
  * <p>There are two main configurations for running a quality benchmark:
  *
  * <ul>
  *   <li>Against existing judgements.
  *   <li>For submission (e.g. for a contest).
  * </ul>
  *
  * The first configuration requires a non null {@link org.apache.lucene.benchmark.quality.Judge
  * Judge}. The second configuration requires a non null {@link
  * org.apache.lucene.benchmark.quality.utils.SubmissionReport SubmissionLogger}.
  */
 public class QualityBenchmark {

   /** Quality Queries that this quality benchmark would execute. */
   protected QualityQuery qualityQueries[];

   /** Parser for turning QualityQueries into Lucene Queries. */
   protected QualityQueryParser qqParser;

   /** Index to be searched. */
   protected IndexSearcher searcher;

   /** index field to extract doc name for each search result; used for judging the results. */
   protected String docNameField;

   /**
    * maximal number of queries that this quality benchmark runs. Default: maxint. Useful for
    * debugging.
    */
   private int maxQueries = Integer.MAX_VALUE;

   /** maximal number of results to collect for each query. Default: 1000. */
   private int maxResults = 1000;

   /**
    * Create a QualityBenchmark.
    *
    * @param qqs quality queries to run.
    * @param qqParser parser for turning QualityQueries into Lucene Queries.
    * @param searcher index to be searched.
    * @param docNameField name of field containing the document name. This allows to extract the doc
    *     name for search results, and is important for judging the results.
    */
   public QualityBenchmark(
       QualityQuery qqs[],
       QualityQueryParser qqParser,
       IndexSearcher searcher,
       String docNameField) {
     this.qualityQueries = qqs;
     this.qqParser = qqParser;
     this.searcher = searcher;
     this.docNameField = docNameField;
   }

   /**
    * Run the quality benchmark.
    *
    * @param judge the judge that can tell if a certain result doc is relevant for a certain quality
    *     query. If null, no judgements would be made. Usually null for a submission run.
    * @param submitRep submission report is created if non null.
    * @param qualityLog If not null, quality run data would be printed for each query.
    * @return QualityStats of each quality query that was executed.
    * @throws Exception if quality benchmark failed to run.
    */
   public QualityStats[] execute(Judge judge, SubmissionReport submitRep, PrintWriter qualityLog)
       throws Exception {
     int nQueries = Math.min(maxQueries, qualityQueries.length);
     QualityStats stats[] = new QualityStats[nQueries];
     for (int i = 0; i < nQueries; i++) {
       QualityQuery qq = qualityQueries[i];
       // generate query
       Query q = qqParser.parse(qq);
       // search with this query
       long t1 = System.currentTimeMillis();
       TopDocs td = searcher.search(q, maxResults);
       long searchTime = System.currentTimeMillis() - t1;
       // most likely we either submit or judge, but check both
       if (judge != null) {
         stats[i] = analyzeQueryResults(qq, q, td, judge, qualityLog, searchTime);
       }
       if (submitRep != null) {
         submitRep.report(qq, td, docNameField, searcher);
       }
     }
     if (submitRep != null) {
       submitRep.flush();
     }
     return stats;
   }

   /* Analyze/judge results for a single quality query; optionally log them. */
   private QualityStats analyzeQueryResults(
       QualityQuery qq, Query q, TopDocs td, Judge judge, PrintWriter logger, long searchTime)
       throws IOException {
     QualityStats stts = new QualityStats(judge.maxRecall(qq), searchTime);
     ScoreDoc sd[] = td.scoreDocs;
     long t1 =
         System.currentTimeMillis(); // extraction of first doc name we measure also construction of
     // doc name extractor, just in case.
     DocNameExtractor xt = new DocNameExtractor(docNameField);
     for (int i = 0; i < sd.length; i++) {
       String docName = xt.docName(searcher, sd[i].doc);
       long docNameExtractTime = System.currentTimeMillis() - t1;
       t1 = System.currentTimeMillis();
       boolean isRelevant = judge.isRelevant(docName, qq);
       stts.addResult(i + 1, isRelevant, docNameExtractTime);
     }
     if (logger != null) {
       logger.println(qq.getQueryID() + "  -  " + q);
       stts.log(qq.getQueryID() + " Stats:", 1, logger, "  ");
     }
     return stts;
   }

   /** @return the maximum number of quality queries to run. Useful at debugging. */
   public int getMaxQueries() {
     return maxQueries;
   }

   /** Set the maximum number of quality queries to run. Useful at debugging. */
   public void setMaxQueries(int maxQueries) {
     this.maxQueries = maxQueries;
   }

   /** @return the maximum number of results to collect for each quality query. */
   public int getMaxResults() {
     return maxResults;
   }

   /** set the maximum number of results to collect for each quality query. */
   public void setMaxResults(int maxResults) {
     this.maxResults = maxResults;
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.benchmark.quality;

	import java.io.IOException;
	import java.io.PrintWriter;
	import org.apache.lucene.benchmark.quality.utils.DocNameExtractor;
	import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.ScoreDoc;
	import org.apache.lucene.search.TopDocs;

	/**
	* Main entry point for running a quality benchmark.
	*
	* <p>There are two main configurations for running a quality benchmark:
	*
	* <ul>
	* <li>Against existing judgements.
	* <li>For submission (e.g. for a contest).
	* </ul>
	*
	* The first configuration requires a non null {@link org.apache.lucene.benchmark.quality.Judge
	* Judge}. The second configuration requires a non null {@link
	* org.apache.lucene.benchmark.quality.utils.SubmissionReport SubmissionLogger}.
	*/
	public class QualityBenchmark {

	/** Quality Queries that this quality benchmark would execute. */
	protected QualityQuery qualityQueries[];

	/** Parser for turning QualityQueries into Lucene Queries. */
	protected QualityQueryParser qqParser;

	/** Index to be searched. */
	protected IndexSearcher searcher;

	/** index field to extract doc name for each search result; used for judging the results. */
	protected String docNameField;

	/**
	* maximal number of queries that this quality benchmark runs. Default: maxint. Useful for
	* debugging.
	*/
	private int maxQueries = Integer.MAX_VALUE;

	/** maximal number of results to collect for each query. Default: 1000. */
	private int maxResults = 1000;

	/**
	* Create a QualityBenchmark.
	*
	* @param qqs quality queries to run.
	* @param qqParser parser for turning QualityQueries into Lucene Queries.
	* @param searcher index to be searched.
	* @param docNameField name of field containing the document name. This allows to extract the doc
	* name for search results, and is important for judging the results.
	*/
	public QualityBenchmark(
	QualityQuery qqs[],
	QualityQueryParser qqParser,
	IndexSearcher searcher,
	String docNameField) {
	this.qualityQueries = qqs;
	this.qqParser = qqParser;
	this.searcher = searcher;
	this.docNameField = docNameField;
	}

	/**
	* Run the quality benchmark.
	*
	* @param judge the judge that can tell if a certain result doc is relevant for a certain quality
	* query. If null, no judgements would be made. Usually null for a submission run.
	* @param submitRep submission report is created if non null.
	* @param qualityLog If not null, quality run data would be printed for each query.
	* @return QualityStats of each quality query that was executed.
	* @throws Exception if quality benchmark failed to run.
	*/
	public QualityStats[] execute(Judge judge, SubmissionReport submitRep, PrintWriter qualityLog)
	throws Exception {
	int nQueries = Math.min(maxQueries, qualityQueries.length);
	QualityStats stats[] = new QualityStats[nQueries];
	for (int i = 0; i < nQueries; i++) {
	QualityQuery qq = qualityQueries[i];
	// generate query
	Query q = qqParser.parse(qq);
	// search with this query
	long t1 = System.currentTimeMillis();
	TopDocs td = searcher.search(q, maxResults);
	long searchTime = System.currentTimeMillis() - t1;
	// most likely we either submit or judge, but check both
	if (judge != null) {
	stats[i] = analyzeQueryResults(qq, q, td, judge, qualityLog, searchTime);
	}
	if (submitRep != null) {
	submitRep.report(qq, td, docNameField, searcher);
	}
	}
	if (submitRep != null) {
	submitRep.flush();
	}
	return stats;
	}

	/* Analyze/judge results for a single quality query; optionally log them. */
	private QualityStats analyzeQueryResults(
	QualityQuery qq, Query q, TopDocs td, Judge judge, PrintWriter logger, long searchTime)
	throws IOException {
	QualityStats stts = new QualityStats(judge.maxRecall(qq), searchTime);
	ScoreDoc sd[] = td.scoreDocs;
	long t1 =
	System.currentTimeMillis(); // extraction of first doc name we measure also construction of
	// doc name extractor, just in case.
	DocNameExtractor xt = new DocNameExtractor(docNameField);
	for (int i = 0; i < sd.length; i++) {
	String docName = xt.docName(searcher, sd[i].doc);
	long docNameExtractTime = System.currentTimeMillis() - t1;
	t1 = System.currentTimeMillis();
	boolean isRelevant = judge.isRelevant(docName, qq);
	stts.addResult(i + 1, isRelevant, docNameExtractTime);
	}
	if (logger != null) {
	logger.println(qq.getQueryID() + " - " + q);
	stts.log(qq.getQueryID() + " Stats:", 1, logger, " ");
	}
	return stts;
	}

	/** @return the maximum number of quality queries to run. Useful at debugging. */
	public int getMaxQueries() {
	return maxQueries;
	}

	/** Set the maximum number of quality queries to run. Useful at debugging. */
	public void setMaxQueries(int maxQueries) {
	this.maxQueries = maxQueries;
	}

	/** @return the maximum number of results to collect for each quality query. */
	public int getMaxResults() {
	return maxResults;
	}

	/** set the maximum number of results to collect for each quality query. */
	public void setMaxResults(int maxResults) {
	this.maxResults = maxResults;
	}
	}