lucene/backwards/src/test/org/apache/lucene/search/TestTopDocsMerge.java - manifoldcf-integration-solr-3.x - Git at Google

 package org.apache.lucene.search;

 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.NumericField;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.ReaderUtil;
 import org.apache.lucene.util._TestUtil;

 public class TestTopDocsMerge extends LuceneTestCase {

   private static class ShardSearcher {
     private final IndexSearcher subSearcher;

     public ShardSearcher(IndexReader subReader) {
       this.subSearcher = new IndexSearcher(subReader);
     }

     public void search(Weight weight, Collector collector) throws IOException {
       subSearcher.search(weight, null, collector);
     }

     public TopDocs search(Weight weight, int topN) throws IOException {
       return subSearcher.search(weight, null, topN);
     }

     @Override
     public String toString() {
       return "ShardSearcher(" + subSearcher + ")";
     }
   }

   public void testSort() throws Exception {

     IndexReader reader = null;
     Directory dir = null;

     final int numDocs = atLeast(1000);
     //final int numDocs = atLeast(50);

     final String[] tokens = new String[] {"a", "b", "c", "d", "e"};

     if (VERBOSE) {
       System.out.println("TEST: make index");
     }

     {
       dir = newDirectory();
       final RandomIndexWriter w = new RandomIndexWriter(random, dir);
       // w.setDoRandomOptimize(false);

       // w.w.getConfig().setMaxBufferedDocs(atLeast(100));

       final String[] content = new String[atLeast(20)];

       for(int contentIDX=0;contentIDX<content.length;contentIDX++) {
         final StringBuilder sb = new StringBuilder();
         final int numTokens = _TestUtil.nextInt(random, 1, 10);
         for(int tokenIDX=0;tokenIDX<numTokens;tokenIDX++) {
           sb.append(tokens[random.nextInt(tokens.length)]).append(' ');
         }
         content[contentIDX] = sb.toString();
       }

       for(int docIDX=0;docIDX<numDocs;docIDX++) {
         final Document doc = new Document();
         doc.add(newField("string", _TestUtil.randomRealisticUnicodeString(random), Field.Index.NOT_ANALYZED));
         doc.add(newField("text", content[random.nextInt(content.length)], Field.Index.ANALYZED));
         doc.add(new NumericField("float").setFloatValue(random.nextFloat()));
         final int intValue;
         if (random.nextInt(100) == 17) {
           intValue = Integer.MIN_VALUE;
         } else if (random.nextInt(100) == 17) {
           intValue = Integer.MAX_VALUE;
         } else {
           intValue = random.nextInt();
         }
         doc.add(new NumericField("int").setIntValue(intValue));
         if (VERBOSE) {
           System.out.println("  doc=" + doc);
         }
         w.addDocument(doc);
       }

       reader = w.getReader();
       w.close();
     }

     // NOTE: sometimes reader has just one segment, which is
     // important to test
     final IndexSearcher searcher = newSearcher(reader);
     IndexReader[] subReaders = searcher.getIndexReader().getSequentialSubReaders();
     if (subReaders == null) {
       subReaders = new IndexReader[] {searcher.getIndexReader()};
     }
     final ShardSearcher[] subSearchers = new ShardSearcher[subReaders.length];

     for(int searcherIDX=0;searcherIDX<subSearchers.length;searcherIDX++) {
       subSearchers[searcherIDX] = new ShardSearcher(subReaders[searcherIDX]);
     }

     final List<SortField> sortFields = new ArrayList<SortField>();
     sortFields.add(new SortField("string", SortField.STRING, true));
     sortFields.add(new SortField("string", SortField.STRING, false));
     sortFields.add(new SortField("int", SortField.INT, true));
     sortFields.add(new SortField("int", SortField.INT, false));
     sortFields.add(new SortField("float", SortField.FLOAT, true));
     sortFields.add(new SortField("float", SortField.FLOAT, false));
     sortFields.add(new SortField(null, SortField.SCORE, true));
     sortFields.add(new SortField(null, SortField.SCORE, false));
     sortFields.add(new SortField(null, SortField.DOC, true));
     sortFields.add(new SortField(null, SortField.DOC, false));

     final int[] docStarts = new int[subSearchers.length];
     int docBase = 0;
     for(int subIDX=0;subIDX<docStarts.length;subIDX++) {
       docStarts[subIDX] = docBase;
       docBase += subReaders[subIDX].maxDoc();
       if (VERBOSE) {
         System.out.println("docStarts[" + subIDX + "]=" + docStarts[subIDX]);
       }
     }

     for(int iter=0;iter<1000*RANDOM_MULTIPLIER;iter++) {

       // TODO: custom FieldComp...
       final Query query = new TermQuery(new Term("text", tokens[random.nextInt(tokens.length)]));

       final Sort sort;
       if (random.nextInt(10) == 4) {
         // Sort by score
         sort = null;
       } else {
         final SortField[] randomSortFields = new SortField[_TestUtil.nextInt(random, 1, 3)];
         for(int sortIDX=0;sortIDX<randomSortFields.length;sortIDX++) {
           randomSortFields[sortIDX] = sortFields.get(random.nextInt(sortFields.size()));
         }
         sort = new Sort(randomSortFields);
       }

       final int numHits = _TestUtil.nextInt(random, 1, numDocs+5);
       //final int numHits = 5;

       if (VERBOSE) {
         System.out.println("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
       }

       // First search on whole index:
       final TopDocs topHits;
       if (sort == null) {
         topHits = searcher.search(query, numHits);
       } else {
         final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, random.nextBoolean());
         searcher.search(query, c);
         topHits = c.topDocs(0, numHits);
       }

       if (VERBOSE) {
         System.out.println("  top search: " + topHits.totalHits + " totalHits; hits=" + (topHits.scoreDocs == null ? "null" : topHits.scoreDocs.length));
         if (topHits.scoreDocs != null) {
           for(int hitIDX=0;hitIDX<topHits.scoreDocs.length;hitIDX++) {
             final ScoreDoc sd = topHits.scoreDocs[hitIDX];
             System.out.println("    doc=" + sd.doc + " score=" + sd.score);
           }
         }
       }

       // ... then all shards:
       final Weight w = searcher.createNormalizedWeight(query);

       final TopDocs[] shardHits = new TopDocs[subSearchers.length];
       for(int shardIDX=0;shardIDX<subSearchers.length;shardIDX++) {
         final TopDocs subHits;
         final ShardSearcher subSearcher = subSearchers[shardIDX];
         if (sort == null) {
           subHits = subSearcher.search(w, numHits);
         } else {
           final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, random.nextBoolean());
           subSearcher.search(w, c);
           subHits = c.topDocs(0, numHits);
         }
         rebaseDocIDs(docStarts[shardIDX], subHits);

         shardHits[shardIDX] = subHits;
         if (VERBOSE) {
           System.out.println("  shard=" + shardIDX + " " + subHits.totalHits + " totalHits hits=" + (subHits.scoreDocs == null ? "null" : subHits.scoreDocs.length));
           if (subHits.scoreDocs != null) {
             for(ScoreDoc sd : subHits.scoreDocs) {
               System.out.println("    doc=" + sd.doc + " score=" + sd.score);
             }
           }
         }
       }

       // Merge:
       final TopDocs mergedHits = TopDocs.merge(sort, numHits, shardHits);

       if (VERBOSE) {
         System.out.println("  mergedHits: " + mergedHits.totalHits + " totalHits; hits=" + (mergedHits.scoreDocs == null ? "null" : mergedHits.scoreDocs.length));
         if (mergedHits.scoreDocs != null) {
           for(int hitIDX=0;hitIDX<mergedHits.scoreDocs.length;hitIDX++) {
             final ScoreDoc sd = mergedHits.scoreDocs[hitIDX];
             System.out.println("    doc=" + sd.doc + " score=" + sd.score);
           }
         }
       }
       if (mergedHits.scoreDocs != null) {
         // Make sure the returned shards are correct:
         for(int hitIDX=0;hitIDX<mergedHits.scoreDocs.length;hitIDX++) {
           final ScoreDoc sd = mergedHits.scoreDocs[hitIDX];
           assertEquals("doc=" + sd.doc + " wrong shard",
                        ReaderUtil.subIndex(sd.doc, docStarts),
                        sd.shardIndex);
         }
       }

       _TestUtil.assertEquals(topHits, mergedHits);
     }
     searcher.close();
     reader.close();
     dir.close();
   }

   private void rebaseDocIDs(int docBase, TopDocs hits) {
     List<Integer> docFieldLocs = new ArrayList<Integer>();
     if (hits instanceof TopFieldDocs) {
       TopFieldDocs fieldHits = (TopFieldDocs) hits;
       for(int fieldIDX=0;fieldIDX<fieldHits.fields.length;fieldIDX++) {
         if (fieldHits.fields[fieldIDX].getType() == SortField.DOC) {
           docFieldLocs.add(fieldIDX);
         }
       }
     }

     for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
       final ScoreDoc sd = hits.scoreDocs[hitIDX];
       sd.doc += docBase;
       if (sd instanceof FieldDoc) {
         final FieldDoc fd = (FieldDoc) sd;
         if (fd.fields != null) {
           for(int idx : docFieldLocs) {
             fd.fields[idx] = Integer.valueOf(((Integer) fd.fields[idx]).intValue() + docBase);
           }
         }
       }
     }
   }
 }
	package org.apache.lucene.search;

	/**
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.List;

	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.document.NumericField;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.RandomIndexWriter;
	import org.apache.lucene.index.Term;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.LuceneTestCase;
	import org.apache.lucene.util.ReaderUtil;
	import org.apache.lucene.util._TestUtil;

	public class TestTopDocsMerge extends LuceneTestCase {

	private static class ShardSearcher {
	private final IndexSearcher subSearcher;

	public ShardSearcher(IndexReader subReader) {
	this.subSearcher = new IndexSearcher(subReader);
	}

	public void search(Weight weight, Collector collector) throws IOException {
	subSearcher.search(weight, null, collector);
	}

	public TopDocs search(Weight weight, int topN) throws IOException {
	return subSearcher.search(weight, null, topN);
	}

	@Override
	public String toString() {
	return "ShardSearcher(" + subSearcher + ")";
	}
	}

	public void testSort() throws Exception {

	IndexReader reader = null;
	Directory dir = null;

	final int numDocs = atLeast(1000);
	//final int numDocs = atLeast(50);

	final String[] tokens = new String[] {"a", "b", "c", "d", "e"};

	if (VERBOSE) {
	System.out.println("TEST: make index");
	}

	{
	dir = newDirectory();
	final RandomIndexWriter w = new RandomIndexWriter(random, dir);
	// w.setDoRandomOptimize(false);

	// w.w.getConfig().setMaxBufferedDocs(atLeast(100));

	final String[] content = new String[atLeast(20)];

	for(int contentIDX=0;contentIDX<content.length;contentIDX++) {
	final StringBuilder sb = new StringBuilder();
	final int numTokens = _TestUtil.nextInt(random, 1, 10);
	for(int tokenIDX=0;tokenIDX<numTokens;tokenIDX++) {
	sb.append(tokens[random.nextInt(tokens.length)]).append(' ');
	}
	content[contentIDX] = sb.toString();
	}

	for(int docIDX=0;docIDX<numDocs;docIDX++) {
	final Document doc = new Document();
	doc.add(newField("string", _TestUtil.randomRealisticUnicodeString(random), Field.Index.NOT_ANALYZED));
	doc.add(newField("text", content[random.nextInt(content.length)], Field.Index.ANALYZED));
	doc.add(new NumericField("float").setFloatValue(random.nextFloat()));
	final int intValue;
	if (random.nextInt(100) == 17) {
	intValue = Integer.MIN_VALUE;
	} else if (random.nextInt(100) == 17) {
	intValue = Integer.MAX_VALUE;
	} else {
	intValue = random.nextInt();
	}
	doc.add(new NumericField("int").setIntValue(intValue));
	if (VERBOSE) {
	System.out.println(" doc=" + doc);
	}
	w.addDocument(doc);
	}

	reader = w.getReader();
	w.close();
	}

	// NOTE: sometimes reader has just one segment, which is
	// important to test
	final IndexSearcher searcher = newSearcher(reader);
	IndexReader[] subReaders = searcher.getIndexReader().getSequentialSubReaders();
	if (subReaders == null) {
	subReaders = new IndexReader[] {searcher.getIndexReader()};
	}
	final ShardSearcher[] subSearchers = new ShardSearcher[subReaders.length];

	for(int searcherIDX=0;searcherIDX<subSearchers.length;searcherIDX++) {
	subSearchers[searcherIDX] = new ShardSearcher(subReaders[searcherIDX]);
	}

	final List<SortField> sortFields = new ArrayList<SortField>();
	sortFields.add(new SortField("string", SortField.STRING, true));
	sortFields.add(new SortField("string", SortField.STRING, false));
	sortFields.add(new SortField("int", SortField.INT, true));
	sortFields.add(new SortField("int", SortField.INT, false));
	sortFields.add(new SortField("float", SortField.FLOAT, true));
	sortFields.add(new SortField("float", SortField.FLOAT, false));
	sortFields.add(new SortField(null, SortField.SCORE, true));
	sortFields.add(new SortField(null, SortField.SCORE, false));
	sortFields.add(new SortField(null, SortField.DOC, true));
	sortFields.add(new SortField(null, SortField.DOC, false));

	final int[] docStarts = new int[subSearchers.length];
	int docBase = 0;
	for(int subIDX=0;subIDX<docStarts.length;subIDX++) {
	docStarts[subIDX] = docBase;
	docBase += subReaders[subIDX].maxDoc();
	if (VERBOSE) {
	System.out.println("docStarts[" + subIDX + "]=" + docStarts[subIDX]);
	}
	}

	for(int iter=0;iter<1000*RANDOM_MULTIPLIER;iter++) {

	// TODO: custom FieldComp...
	final Query query = new TermQuery(new Term("text", tokens[random.nextInt(tokens.length)]));

	final Sort sort;
	if (random.nextInt(10) == 4) {
	// Sort by score
	sort = null;
	} else {
	final SortField[] randomSortFields = new SortField[_TestUtil.nextInt(random, 1, 3)];
	for(int sortIDX=0;sortIDX<randomSortFields.length;sortIDX++) {
	randomSortFields[sortIDX] = sortFields.get(random.nextInt(sortFields.size()));
	}
	sort = new Sort(randomSortFields);
	}

	final int numHits = _TestUtil.nextInt(random, 1, numDocs+5);
	//final int numHits = 5;

	if (VERBOSE) {
	System.out.println("TEST: search query=" + query + " sort=" + sort + " numHits=" + numHits);
	}

	// First search on whole index:
	final TopDocs topHits;
	if (sort == null) {
	topHits = searcher.search(query, numHits);
	} else {
	final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, random.nextBoolean());
	searcher.search(query, c);
	topHits = c.topDocs(0, numHits);
	}

	if (VERBOSE) {
	System.out.println(" top search: " + topHits.totalHits + " totalHits; hits=" + (topHits.scoreDocs == null ? "null" : topHits.scoreDocs.length));
	if (topHits.scoreDocs != null) {
	for(int hitIDX=0;hitIDX<topHits.scoreDocs.length;hitIDX++) {
	final ScoreDoc sd = topHits.scoreDocs[hitIDX];
	System.out.println(" doc=" + sd.doc + " score=" + sd.score);
	}
	}
	}

	// ... then all shards:
	final Weight w = searcher.createNormalizedWeight(query);

	final TopDocs[] shardHits = new TopDocs[subSearchers.length];
	for(int shardIDX=0;shardIDX<subSearchers.length;shardIDX++) {
	final TopDocs subHits;
	final ShardSearcher subSearcher = subSearchers[shardIDX];
	if (sort == null) {
	subHits = subSearcher.search(w, numHits);
	} else {
	final TopFieldCollector c = TopFieldCollector.create(sort, numHits, true, true, true, random.nextBoolean());
	subSearcher.search(w, c);
	subHits = c.topDocs(0, numHits);
	}
	rebaseDocIDs(docStarts[shardIDX], subHits);

	shardHits[shardIDX] = subHits;
	if (VERBOSE) {
	System.out.println(" shard=" + shardIDX + " " + subHits.totalHits + " totalHits hits=" + (subHits.scoreDocs == null ? "null" : subHits.scoreDocs.length));
	if (subHits.scoreDocs != null) {
	for(ScoreDoc sd : subHits.scoreDocs) {
	System.out.println(" doc=" + sd.doc + " score=" + sd.score);
	}
	}
	}
	}

	// Merge:
	final TopDocs mergedHits = TopDocs.merge(sort, numHits, shardHits);

	if (VERBOSE) {
	System.out.println(" mergedHits: " + mergedHits.totalHits + " totalHits; hits=" + (mergedHits.scoreDocs == null ? "null" : mergedHits.scoreDocs.length));
	if (mergedHits.scoreDocs != null) {
	for(int hitIDX=0;hitIDX<mergedHits.scoreDocs.length;hitIDX++) {
	final ScoreDoc sd = mergedHits.scoreDocs[hitIDX];
	System.out.println(" doc=" + sd.doc + " score=" + sd.score);
	}
	}
	}
	if (mergedHits.scoreDocs != null) {
	// Make sure the returned shards are correct:
	for(int hitIDX=0;hitIDX<mergedHits.scoreDocs.length;hitIDX++) {
	final ScoreDoc sd = mergedHits.scoreDocs[hitIDX];
	assertEquals("doc=" + sd.doc + " wrong shard",
	ReaderUtil.subIndex(sd.doc, docStarts),
	sd.shardIndex);
	}
	}

	_TestUtil.assertEquals(topHits, mergedHits);
	}
	searcher.close();
	reader.close();
	dir.close();
	}

	private void rebaseDocIDs(int docBase, TopDocs hits) {
	List<Integer> docFieldLocs = new ArrayList<Integer>();
	if (hits instanceof TopFieldDocs) {
	TopFieldDocs fieldHits = (TopFieldDocs) hits;
	for(int fieldIDX=0;fieldIDX<fieldHits.fields.length;fieldIDX++) {
	if (fieldHits.fields[fieldIDX].getType() == SortField.DOC) {
	docFieldLocs.add(fieldIDX);
	}
	}
	}

	for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
	final ScoreDoc sd = hits.scoreDocs[hitIDX];
	sd.doc += docBase;
	if (sd instanceof FieldDoc) {
	final FieldDoc fd = (FieldDoc) sd;
	if (fd.fields != null) {
	for(int idx : docFieldLocs) {
	fd.fields[idx] = Integer.valueOf(((Integer) fd.fields[idx]).intValue() + docBase);
	}
	}
	}
	}
	}
	}