lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java - lucene-solr - Git at Google

 package org.apache.lucene.search;

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Random;
 import java.util.Set;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;

 /** random sorting tests */
 public class TestSortRandom extends LuceneTestCase {

   public void testRandomStringSort() throws Exception {
     Random random = new Random(random().nextLong());

     final int NUM_DOCS = atLeast(100);
     final Directory dir = newDirectory();
     final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
     final boolean allowDups = random.nextBoolean();
     final Set<String> seen = new HashSet<>();
     final int maxLength = TestUtil.nextInt(random, 5, 100);
     if (VERBOSE) {
       System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
     }

     int numDocs = 0;
     final List<BytesRef> docValues = new ArrayList<>();
     // TODO: deletions
     while (numDocs < NUM_DOCS) {
       final Document doc = new Document();

       // 10% of the time, the document is missing the value:
       final BytesRef br;
       if (random().nextInt(10) != 7) {
         final String s;
         if (random.nextBoolean()) {
           s = TestUtil.randomSimpleString(random, maxLength);
         } else {
           s = TestUtil.randomUnicodeString(random, maxLength);
         }

         if (!allowDups) {
           if (seen.contains(s)) {
             continue;
           }
           seen.add(s);
         }

         if (VERBOSE) {
           System.out.println("  " + numDocs + ": s=" + s);
         }

         br = new BytesRef(s);
         doc.add(new SortedDocValuesField("stringdv", br));
         docValues.add(br);

       } else {
         br = null;
         if (VERBOSE) {
           System.out.println("  " + numDocs + ": <missing>");
         }
         docValues.add(null);
       }

       doc.add(new NumericDocValuesField("id", numDocs));
       doc.add(new StoredField("id", numDocs));
       writer.addDocument(doc);
       numDocs++;

       if (random.nextInt(40) == 17) {
         // force flush
         writer.getReader().close();
       }
     }

     final IndexReader r = writer.getReader();
     writer.shutdown();
     if (VERBOSE) {
       System.out.println("  reader=" + r);
     }

     final IndexSearcher s = newSearcher(r, false);
     final int ITERS = atLeast(100);
     for(int iter=0;iter<ITERS;iter++) {
       final boolean reverse = random.nextBoolean();

       final TopFieldDocs hits;
       final SortField sf;
       final boolean sortMissingLast;
       final boolean missingIsNull;
       sf = new SortField("stringdv", SortField.Type.STRING, reverse);
       // Can only use sort missing if the DVFormat
       // supports docsWithField:
       sortMissingLast = defaultCodecSupportsDocsWithField() && random().nextBoolean();
       missingIsNull = defaultCodecSupportsDocsWithField();

       if (sortMissingLast) {
         sf.setMissingValue(SortField.STRING_LAST);
       }

       final Sort sort;
       if (random.nextBoolean()) {
         sort = new Sort(sf);
       } else {
         sort = new Sort(sf, SortField.FIELD_DOC);
       }
       final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
       final RandomFilter f = new RandomFilter(random, random.nextFloat(), docValues);
       int queryType = random.nextInt(3);
       if (queryType == 0) {
         // force out of order
         BooleanQuery bq = new BooleanQuery();
         // Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2
         // which delegates to BS if there are no mandatory clauses.
         bq.add(new MatchAllDocsQuery(), Occur.SHOULD);
         // Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return
         // the clause instead of BQ.
         bq.setMinimumNumberShouldMatch(1);
         hits = s.search(bq, f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
       } else if (queryType == 1) {
         hits = s.search(new ConstantScoreQuery(f),
                         null, hitCount, sort, random.nextBoolean(), random.nextBoolean());
       } else {
         hits = s.search(new MatchAllDocsQuery(),
                         f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
       }

       if (VERBOSE) {
         System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
       }

       // Compute expected results:
       Collections.sort(f.matchValues, new Comparator<BytesRef>() {
           @Override
           public int compare(BytesRef a, BytesRef b) {
             if (a == null) {
               if (b == null) {
                 return 0;
               }
               if (sortMissingLast) {
                 return 1;
               } else {
                 return -1;
               }
             } else if (b == null) {
               if (sortMissingLast) {
                 return -1;
               } else {
                 return 1;
               }
             } else {
               return a.compareTo(b);
             }
           }
         });

       if (reverse) {
         Collections.reverse(f.matchValues);
       }
       final List<BytesRef> expected = f.matchValues;
       if (VERBOSE) {
         System.out.println("  expected:");
         for(int idx=0;idx<expected.size();idx++) {
           BytesRef br = expected.get(idx);
           if (br == null && missingIsNull == false) {
             br = new BytesRef();
           }
           System.out.println("    " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
           if (idx == hitCount-1) {
             break;
           }
         }
       }

       if (VERBOSE) {
         System.out.println("  actual:");
         for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
           final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
           BytesRef br = (BytesRef) fd.fields[0];

           System.out.println("    " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
         }
       }
       for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
         final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
         BytesRef br = expected.get(hitIDX);
         if (br == null && missingIsNull == false) {
           br = new BytesRef();
         }

         // Normally, the old codecs (that don't support
         // docsWithField via doc values) will always return
         // an empty BytesRef for the missing case; however,
         // if all docs in a given segment were missing, in
         // that case it will return null!  So we must map
         // null here, too:
         BytesRef br2 = (BytesRef) fd.fields[0];
         if (br2 == null && missingIsNull == false) {
           br2 = new BytesRef();
         }

         assertEquals(br, br2);
       }
     }

     r.close();
     dir.close();
   }

   private static class RandomFilter extends Filter {
     private final Random random;
     private float density;
     private final List<BytesRef> docValues;
     public final List<BytesRef> matchValues = Collections.synchronizedList(new ArrayList<BytesRef>());

     // density should be 0.0 ... 1.0
     public RandomFilter(Random random, float density, List<BytesRef> docValues) {
       this.random = random;
       this.density = density;
       this.docValues = docValues;
     }

     @Override
     public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
       final int maxDoc = context.reader().maxDoc();
       final NumericDocValues idSource = DocValues.getNumeric(context.reader(), "id");
       assertNotNull(idSource);
       final FixedBitSet bits = new FixedBitSet(maxDoc);
       for(int docID=0;docID<maxDoc;docID++) {
         if (random.nextFloat() <= density && (acceptDocs == null || acceptDocs.get(docID))) {
           bits.set(docID);
           //System.out.println("  acc id=" + idSource.getInt(docID) + " docID=" + docID);
           matchValues.add(docValues.get((int) idSource.get(docID)));
         }
       }

       return bits;
     }
   }
 }
	package org.apache.lucene.search;

	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	import java.io.IOException;
	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.Comparator;
	import java.util.HashSet;
	import java.util.List;
	import java.util.Random;
	import java.util.Set;

	import org.apache.lucene.document.Document;
	import org.apache.lucene.document.Field;
	import org.apache.lucene.document.NumericDocValuesField;
	import org.apache.lucene.document.SortedDocValuesField;
	import org.apache.lucene.document.StoredField;
	import org.apache.lucene.index.AtomicReaderContext;
	import org.apache.lucene.index.DocValues;
	import org.apache.lucene.index.IndexReader;
	import org.apache.lucene.index.NumericDocValues;
	import org.apache.lucene.index.RandomIndexWriter;
	import org.apache.lucene.search.BooleanClause.Occur;
	import org.apache.lucene.store.Directory;
	import org.apache.lucene.util.Bits;
	import org.apache.lucene.util.BytesRef;
	import org.apache.lucene.util.FixedBitSet;
	import org.apache.lucene.util.LuceneTestCase;
	import org.apache.lucene.util.TestUtil;

	/** random sorting tests */
	public class TestSortRandom extends LuceneTestCase {

	public void testRandomStringSort() throws Exception {
	Random random = new Random(random().nextLong());

	final int NUM_DOCS = atLeast(100);
	final Directory dir = newDirectory();
	final RandomIndexWriter writer = new RandomIndexWriter(random, dir);
	final boolean allowDups = random.nextBoolean();
	final Set<String> seen = new HashSet<>();
	final int maxLength = TestUtil.nextInt(random, 5, 100);
	if (VERBOSE) {
	System.out.println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups);
	}

	int numDocs = 0;
	final List<BytesRef> docValues = new ArrayList<>();
	// TODO: deletions
	while (numDocs < NUM_DOCS) {
	final Document doc = new Document();

	// 10% of the time, the document is missing the value:
	final BytesRef br;
	if (random().nextInt(10) != 7) {
	final String s;
	if (random.nextBoolean()) {
	s = TestUtil.randomSimpleString(random, maxLength);
	} else {
	s = TestUtil.randomUnicodeString(random, maxLength);
	}

	if (!allowDups) {
	if (seen.contains(s)) {
	continue;
	}
	seen.add(s);
	}

	if (VERBOSE) {
	System.out.println(" " + numDocs + ": s=" + s);
	}

	br = new BytesRef(s);
	doc.add(new SortedDocValuesField("stringdv", br));
	docValues.add(br);

	} else {
	br = null;
	if (VERBOSE) {
	System.out.println(" " + numDocs + ": <missing>");
	}
	docValues.add(null);
	}

	doc.add(new NumericDocValuesField("id", numDocs));
	doc.add(new StoredField("id", numDocs));
	writer.addDocument(doc);
	numDocs++;

	if (random.nextInt(40) == 17) {
	// force flush
	writer.getReader().close();
	}
	}

	final IndexReader r = writer.getReader();
	writer.shutdown();
	if (VERBOSE) {
	System.out.println(" reader=" + r);
	}

	final IndexSearcher s = newSearcher(r, false);
	final int ITERS = atLeast(100);
	for(int iter=0;iter<ITERS;iter++) {
	final boolean reverse = random.nextBoolean();

	final TopFieldDocs hits;
	final SortField sf;
	final boolean sortMissingLast;
	final boolean missingIsNull;
	sf = new SortField("stringdv", SortField.Type.STRING, reverse);
	// Can only use sort missing if the DVFormat
	// supports docsWithField:
	sortMissingLast = defaultCodecSupportsDocsWithField() && random().nextBoolean();
	missingIsNull = defaultCodecSupportsDocsWithField();

	if (sortMissingLast) {
	sf.setMissingValue(SortField.STRING_LAST);
	}

	final Sort sort;
	if (random.nextBoolean()) {
	sort = new Sort(sf);
	} else {
	sort = new Sort(sf, SortField.FIELD_DOC);
	}
	final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20);
	final RandomFilter f = new RandomFilter(random, random.nextFloat(), docValues);
	int queryType = random.nextInt(3);
	if (queryType == 0) {
	// force out of order
	BooleanQuery bq = new BooleanQuery();
	// Add a Query with SHOULD, since bw.scorer() returns BooleanScorer2
	// which delegates to BS if there are no mandatory clauses.
	bq.add(new MatchAllDocsQuery(), Occur.SHOULD);
	// Set minNrShouldMatch to 1 so that BQ will not optimize rewrite to return
	// the clause instead of BQ.
	bq.setMinimumNumberShouldMatch(1);
	hits = s.search(bq, f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
	} else if (queryType == 1) {
	hits = s.search(new ConstantScoreQuery(f),
	null, hitCount, sort, random.nextBoolean(), random.nextBoolean());
	} else {
	hits = s.search(new MatchAllDocsQuery(),
	f, hitCount, sort, random.nextBoolean(), random.nextBoolean());
	}

	if (VERBOSE) {
	System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort);
	}

	// Compute expected results:
	Collections.sort(f.matchValues, new Comparator<BytesRef>() {
	@Override
	public int compare(BytesRef a, BytesRef b) {
	if (a == null) {
	if (b == null) {
	return 0;
	}
	if (sortMissingLast) {
	return 1;
	} else {
	return -1;
	}
	} else if (b == null) {
	if (sortMissingLast) {
	return -1;
	} else {
	return 1;
	}
	} else {
	return a.compareTo(b);
	}
	}
	});

	if (reverse) {
	Collections.reverse(f.matchValues);
	}
	final List<BytesRef> expected = f.matchValues;
	if (VERBOSE) {
	System.out.println(" expected:");
	for(int idx=0;idx<expected.size();idx++) {
	BytesRef br = expected.get(idx);
	if (br == null && missingIsNull == false) {
	br = new BytesRef();
	}
	System.out.println(" " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString()));
	if (idx == hitCount-1) {
	break;
	}
	}
	}

	if (VERBOSE) {
	System.out.println(" actual:");
	for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
	final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
	BytesRef br = (BytesRef) fd.fields[0];

	System.out.println(" " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id"));
	}
	}
	for(int hitIDX=0;hitIDX<hits.scoreDocs.length;hitIDX++) {
	final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX];
	BytesRef br = expected.get(hitIDX);
	if (br == null && missingIsNull == false) {
	br = new BytesRef();
	}

	// Normally, the old codecs (that don't support
	// docsWithField via doc values) will always return
	// an empty BytesRef for the missing case; however,
	// if all docs in a given segment were missing, in
	// that case it will return null! So we must map
	// null here, too:
	BytesRef br2 = (BytesRef) fd.fields[0];
	if (br2 == null && missingIsNull == false) {
	br2 = new BytesRef();
	}

	assertEquals(br, br2);
	}
	}

	r.close();
	dir.close();
	}

	private static class RandomFilter extends Filter {
	private final Random random;
	private float density;
	private final List<BytesRef> docValues;
	public final List<BytesRef> matchValues = Collections.synchronizedList(new ArrayList<BytesRef>());

	// density should be 0.0 ... 1.0
	public RandomFilter(Random random, float density, List<BytesRef> docValues) {
	this.random = random;
	this.density = density;
	this.docValues = docValues;
	}

	@Override
	public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException {
	final int maxDoc = context.reader().maxDoc();
	final NumericDocValues idSource = DocValues.getNumeric(context.reader(), "id");
	assertNotNull(idSource);
	final FixedBitSet bits = new FixedBitSet(maxDoc);
	for(int docID=0;docID<maxDoc;docID++) {
	if (random.nextFloat() <= density && (acceptDocs == null \|\| acceptDocs.get(docID))) {
	bits.set(docID);
	//System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID);
	matchValues.add(docValues.get((int) idSource.get(docID)));
	}
	}

	return bits;
	}
	}
	}