blob: c55c7aaf0903945415b6bd76ed503cfe16428ea9 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.Random;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestSearchForDuplicates extends LuceneTestCase {
static final String PRIORITY_FIELD ="priority";
static final String ID_FIELD ="id";
static final String HIGH_PRIORITY ="high";
static final String MED_PRIORITY ="medium";
static final String LOW_PRIORITY ="low";
/** This test compares search results when using and not using compound
* files.
*
* TODO: There is rudimentary search result validation as well, but it is
* simply based on asserting the output observed in the old test case,
* without really knowing if the output is correct. Someone needs to
* validate this output and make any changes to the checkHits method.
*/
public void testRun() throws Exception {
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw, true);
final int MAX_DOCS = atLeast(225);
doTest(random(), pw, false, MAX_DOCS);
pw.close();
sw.close();
String multiFileOutput = sw.toString();
//System.out.println(multiFileOutput);
sw = new StringWriter();
pw = new PrintWriter(sw, true);
doTest(random(), pw, true, MAX_DOCS);
pw.close();
sw.close();
String singleFileOutput = sw.toString();
assertEquals(multiFileOutput, singleFileOutput);
}
private void doTest(Random random, PrintWriter out, boolean useCompoundFiles, int MAX_DOCS) throws Exception {
Directory directory = newDirectory();
Analyzer analyzer = new MockAnalyzer(random);
IndexWriterConfig conf = newIndexWriterConfig(analyzer);
final MergePolicy mp = conf.getMergePolicy();
mp.setNoCFSRatio(useCompoundFiles ? 1.0 : 0.0);
IndexWriter writer = new IndexWriter(directory, conf);
if (VERBOSE) {
System.out.println("TEST: now build index MAX_DOCS=" + MAX_DOCS);
}
for (int j = 0; j < MAX_DOCS; j++) {
Document d = new Document();
d.add(newTextField(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES));
d.add(new StoredField(ID_FIELD, j));
d.add(new NumericDocValuesField(ID_FIELD, j));
writer.addDocument(d);
}
writer.close();
// try a search without OR
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = newSearcher(reader);
Query query = new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY));
out.println("Query: " + query.toString(PRIORITY_FIELD));
if (VERBOSE) {
System.out.println("TEST: search query=" + query);
}
final Sort sort = new Sort(SortField.FIELD_SCORE,
new SortField(ID_FIELD, SortField.Type.INT));
ScoreDoc[] hits = searcher.search(query, MAX_DOCS, sort).scoreDocs;
printHits(out, hits, searcher);
checkHits(hits, MAX_DOCS, searcher);
// try a new search with OR
searcher = newSearcher(reader);
hits = null;
BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
booleanQuery.add(new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY)), BooleanClause.Occur.SHOULD);
booleanQuery.add(new TermQuery(new Term(PRIORITY_FIELD, MED_PRIORITY)), BooleanClause.Occur.SHOULD);
out.println("Query: " + booleanQuery.build().toString(PRIORITY_FIELD));
hits = searcher.search(booleanQuery.build(), MAX_DOCS, sort).scoreDocs;
printHits(out, hits, searcher);
checkHits(hits, MAX_DOCS, searcher);
reader.close();
directory.close();
}
private void printHits(PrintWriter out, ScoreDoc[] hits, IndexSearcher searcher) throws IOException {
out.println(hits.length + " total results\n");
for (int i = 0 ; i < hits.length; i++) {
if ( i < 10 || (i > 94 && i < 105) ) {
Document d = searcher.doc(hits[i].doc);
out.println(i + " " + d.get(ID_FIELD));
}
}
}
private void checkHits(ScoreDoc[] hits, int expectedCount, IndexSearcher searcher) throws IOException {
assertEquals("total results", expectedCount, hits.length);
for (int i = 0 ; i < hits.length; i++) {
if (i < 10 || (i > 94 && i < 105) ) {
Document d = searcher.doc(hits[i].doc);
assertEquals("check " + i, String.valueOf(i), d.get(ID_FIELD));
}
}
}
}