lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java - lucene-solr - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.lucene.benchmark.byTask.tasks;


 import java.io.IOException;

 import org.apache.lucene.benchmark.byTask.PerfRunData;
 import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiBits;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.TopFieldCollector;
 import org.apache.lucene.search.TopScoreDocCollector;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;


 /**
  * Read index (abstract) task.
  * Sub classes implement withSearch(), withWarm(), withTraverse() and withRetrieve()
  * methods to configure the actual action.
  * <p>Note: All ReadTasks reuse the reader if it is already open.
  * Otherwise a reader is opened at start and closed at the end.
  * <p>
  * The <code>search.num.hits</code> config parameter sets
  * the top number of hits to collect during searching.  If
  * <code>print.hits.field</code> is set, then each hit is
  * printed along with the value of that field.</p>
  *
  * <p>Other side effects: none.
  */
 public abstract class ReadTask extends PerfTask {

   private final QueryMaker queryMaker;

   public ReadTask(PerfRunData runData) {
     super(runData);
     if (withSearch()) {
       queryMaker = getQueryMaker();
     } else {
       queryMaker = null;
     }
   }
   @Override
   public int doLogic() throws Exception {
     int res = 0;

     // open reader or use existing one
     IndexSearcher searcher = getRunData().getIndexSearcher(); // (will incRef the reader)

     IndexReader reader;

     final boolean closeSearcher;
     if (searcher == null) {
       // open our own reader
       Directory dir = getRunData().getDirectory();
       reader = DirectoryReader.open(dir);
       searcher = new IndexSearcher(reader);
       closeSearcher = true;
     } else {
       // use existing one; this passes +1 ref to us
       reader = searcher.getIndexReader();
       closeSearcher = false;
     }

     // optionally warm and add num docs traversed to count
     if (withWarm()) {
       Document doc = null;
       Bits liveDocs = MultiBits.getLiveDocs(reader);
       for (int m = 0; m < reader.maxDoc(); m++) {
         if (null == liveDocs || liveDocs.get(m)) {
           doc = reader.document(m);
           res += (doc == null ? 0 : 1);
         }
       }
     }

     if (withSearch()) {
       res++;
       Query q = queryMaker.makeQuery();
       Sort sort = getSort();
       TopDocs hits = null;
       final int numHits = numHits();
       if (numHits > 0) {
         if (withCollector() == false) {
           if (sort != null) {
             // TODO: instead of always passing false we
             // should detect based on the query; if we make
             // the IndexSearcher search methods that take
             // Weight public again, we can go back to
             // pulling the Weight ourselves:
             TopFieldCollector collector = TopFieldCollector.create(sort, numHits,
                                                                    withTotalHits() ? Integer.MAX_VALUE : 1);
             searcher.search(q, collector);
             hits = collector.topDocs();
           } else {
             hits = searcher.search(q, numHits);
           }
         } else {
           Collector collector = createCollector();
           searcher.search(q, collector);
           //hits = collector.topDocs();
         }

         if (hits != null) {
           final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
           if (printHitsField != null && printHitsField.length() > 0) {
             System.out.println("totalHits = " + hits.totalHits);
             System.out.println("maxDoc()  = " + reader.maxDoc());
             System.out.println("numDocs() = " + reader.numDocs());
             for(int i=0;i<hits.scoreDocs.length;i++) {
               final int docID = hits.scoreDocs[i].doc;
               final Document doc = reader.document(docID);
               System.out.println("  " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + " =" + doc.get(printHitsField));
             }
           }

           res += withTopDocs(searcher, q, hits);
         }
       }
     }

     if (closeSearcher) {
       reader.close();
     } else {
       // Release our +1 ref from above
       reader.decRef();
     }
     return res;
   }

   protected int withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
     IndexReader reader = searcher.getIndexReader();
     int res = 0;
     if (withTraverse()) {
       final ScoreDoc[] scoreDocs = hits.scoreDocs;
       int traversalSize = Math.min(scoreDocs.length, traversalSize());

       if (traversalSize > 0) {
         boolean retrieve = withRetrieve();
         for (int m = 0; m < traversalSize; m++) {
           int id = scoreDocs[m].doc;
           res++;
           if (retrieve) {
             Document document = retrieveDoc(reader, id);
             res += document != null ? 1 : 0;
           }
         }
       }
     }
     return res;
   }

   protected Collector createCollector() throws Exception {
     return TopScoreDocCollector.create(numHits(), withTotalHits() ? Integer.MAX_VALUE : 1);
   }


   protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
     return ir.document(id);
   }

   /**
    * Return query maker used for this task.
    */
   public abstract QueryMaker getQueryMaker();

   /**
    * Return true if search should be performed.
    */
   public abstract boolean withSearch();

   public boolean withCollector(){
     return false;
   }


   /**
    * Return true if warming should be performed.
    */
   public abstract boolean withWarm();

   /**
    * Return true if, with search, results should be traversed.
    */
   public abstract boolean withTraverse();

   /** Whether totalHits should be computed (only useful with
    *  field sort) */
   public boolean withTotalHits() {
     return false;
   }

   /**
    * Specify the number of hits to traverse.  Tasks should override this if they want to restrict the number
    * of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0.
    * <p>
    * Read task calculates the traversal as: Math.min(hits.length(), traversalSize())
    *
    * @return Integer.MAX_VALUE
    */
   public int traversalSize() {
     return Integer.MAX_VALUE;
   }

   static final int DEFAULT_SEARCH_NUM_HITS = 10;
   private int numHits;

   @Override
   public void setup() throws Exception {
     super.setup();
     numHits = getRunData().getConfig().get("search.num.hits", DEFAULT_SEARCH_NUM_HITS);
   }

   /**
    * Specify the number of hits to retrieve.  Tasks should override this if they want to restrict the number
    * of hits that are collected during searching. Must be greater than 0.
    *
    * @return 10 by default, or search.num.hits config if set.
    */
   public int numHits() {
     return numHits;
   }

   /**
    * Return true if, with search and results traversing, docs should be retrieved.
    */
   public abstract boolean withRetrieve();

   protected Sort getSort() {
     return null;
   }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.lucene.benchmark.byTask.tasks;


	import java.io.IOException;

	import org.apache.lucene.benchmark.byTask.PerfRunData;
	import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
	import org.apache.lucene.document.Document;
	import org.apache.lucene.index.DirectoryReader;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.MultiBits;
	import org.apache.lucene.search.Collector;
	import org.apache.lucene.search.IndexSearcher;
	import org.apache.lucene.search.Query;
	import org.apache.lucene.search.ScoreDoc;
	import org.apache.lucene.search.Sort;
	import org.apache.lucene.search.TopDocs;
	import org.apache.lucene.search.TopFieldCollector;
	import org.apache.lucene.search.TopScoreDocCollector;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.Bits;


	/**
	* Read index (abstract) task.
	* Sub classes implement withSearch(), withWarm(), withTraverse() and withRetrieve()
	* methods to configure the actual action.
	* <p>Note: All ReadTasks reuse the reader if it is already open.
	* Otherwise a reader is opened at start and closed at the end.
	* <p>
	* The <code>search.num.hits</code> config parameter sets
	* the top number of hits to collect during searching. If
	* <code>print.hits.field</code> is set, then each hit is
	* printed along with the value of that field.</p>
	*
	* <p>Other side effects: none.
	*/
	public abstract class ReadTask extends PerfTask {

	private final QueryMaker queryMaker;

	public ReadTask(PerfRunData runData) {
	super(runData);
	if (withSearch()) {
	queryMaker = getQueryMaker();
	} else {
	queryMaker = null;
	}
	}
	@Override
	public int doLogic() throws Exception {
	int res = 0;

	// open reader or use existing one
	IndexSearcher searcher = getRunData().getIndexSearcher(); // (will incRef the reader)

	IndexReader reader;

	final boolean closeSearcher;
	if (searcher == null) {
	// open our own reader
	Directory dir = getRunData().getDirectory();
	reader = DirectoryReader.open(dir);
	searcher = new IndexSearcher(reader);
	closeSearcher = true;
	} else {
	// use existing one; this passes +1 ref to us
	reader = searcher.getIndexReader();
	closeSearcher = false;
	}

	// optionally warm and add num docs traversed to count
	if (withWarm()) {
	Document doc = null;
	Bits liveDocs = MultiBits.getLiveDocs(reader);
	for (int m = 0; m < reader.maxDoc(); m++) {
	if (null == liveDocs \|\| liveDocs.get(m)) {
	doc = reader.document(m);
	res += (doc == null ? 0 : 1);
	}
	}
	}

	if (withSearch()) {
	res++;
	Query q = queryMaker.makeQuery();
	Sort sort = getSort();
	TopDocs hits = null;
	final int numHits = numHits();
	if (numHits > 0) {
	if (withCollector() == false) {
	if (sort != null) {
	// TODO: instead of always passing false we
	// should detect based on the query; if we make
	// the IndexSearcher search methods that take
	// Weight public again, we can go back to
	// pulling the Weight ourselves:
	TopFieldCollector collector = TopFieldCollector.create(sort, numHits,
	withTotalHits() ? Integer.MAX_VALUE : 1);
	searcher.search(q, collector);
	hits = collector.topDocs();
	} else {
	hits = searcher.search(q, numHits);
	}
	} else {
	Collector collector = createCollector();
	searcher.search(q, collector);
	//hits = collector.topDocs();
	}

	if (hits != null) {
	final String printHitsField = getRunData().getConfig().get("print.hits.field", null);
	if (printHitsField != null && printHitsField.length() > 0) {
	System.out.println("totalHits = " + hits.totalHits);
	System.out.println("maxDoc() = " + reader.maxDoc());
	System.out.println("numDocs() = " + reader.numDocs());
	for(int i=0;i<hits.scoreDocs.length;i++) {
	final int docID = hits.scoreDocs[i].doc;
	final Document doc = reader.document(docID);
	System.out.println(" " + i + ": doc=" + docID + " score=" + hits.scoreDocs[i].score + " " + printHitsField + " =" + doc.get(printHitsField));
	}
	}

	res += withTopDocs(searcher, q, hits);
	}
	}
	}

	if (closeSearcher) {
	reader.close();
	} else {
	// Release our +1 ref from above
	reader.decRef();
	}
	return res;
	}

	protected int withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
	IndexReader reader = searcher.getIndexReader();
	int res = 0;
	if (withTraverse()) {
	final ScoreDoc[] scoreDocs = hits.scoreDocs;
	int traversalSize = Math.min(scoreDocs.length, traversalSize());

	if (traversalSize > 0) {
	boolean retrieve = withRetrieve();
	for (int m = 0; m < traversalSize; m++) {
	int id = scoreDocs[m].doc;
	res++;
	if (retrieve) {
	Document document = retrieveDoc(reader, id);
	res += document != null ? 1 : 0;
	}
	}
	}
	}
	return res;
	}

	protected Collector createCollector() throws Exception {
	return TopScoreDocCollector.create(numHits(), withTotalHits() ? Integer.MAX_VALUE : 1);
	}


	protected Document retrieveDoc(IndexReader ir, int id) throws IOException {
	return ir.document(id);
	}

	/**
	* Return query maker used for this task.
	*/
	public abstract QueryMaker getQueryMaker();

	/**
	* Return true if search should be performed.
	*/
	public abstract boolean withSearch();

	public boolean withCollector(){
	return false;
	}


	/**
	* Return true if warming should be performed.
	*/
	public abstract boolean withWarm();

	/**
	* Return true if, with search, results should be traversed.
	*/
	public abstract boolean withTraverse();

	/** Whether totalHits should be computed (only useful with
	* field sort) */
	public boolean withTotalHits() {
	return false;
	}

	/**
	* Specify the number of hits to traverse. Tasks should override this if they want to restrict the number
	* of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0.
	* <p>
	* Read task calculates the traversal as: Math.min(hits.length(), traversalSize())
	*
	* @return Integer.MAX_VALUE
	*/
	public int traversalSize() {
	return Integer.MAX_VALUE;
	}

	static final int DEFAULT_SEARCH_NUM_HITS = 10;
	private int numHits;

	@Override
	public void setup() throws Exception {
	super.setup();
	numHits = getRunData().getConfig().get("search.num.hits", DEFAULT_SEARCH_NUM_HITS);
	}

	/**
	* Specify the number of hits to retrieve. Tasks should override this if they want to restrict the number
	* of hits that are collected during searching. Must be greater than 0.
	*
	* @return 10 by default, or search.num.hits config if set.
	*/
	public int numHits() {
	return numHits;
	}

	/**
	* Return true if, with search and results traversing, docs should be retrieved.
	*/
	public abstract boolean withRetrieve();

	protected Sort getSort() {
	return null;
	}

	}