blob: 5cc80bc921ad5e2123e7cc4cecae8eaac97ef2af [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.TermVectorsReader;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
public class TestSortingCodecReader extends LuceneTestCase {
public void testSortOnAddIndicesInt() throws IOException {
Directory tmpDir = newDirectory();
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
IndexWriter w = new IndexWriter(tmpDir, iwc);
Document doc = new Document();
doc.add(new NumericDocValuesField("foo", 18));
w.addDocument(doc);
doc = new Document();
doc.add(new NumericDocValuesField("foo", -1));
w.addDocument(doc);
w.commit();
doc = new Document();
doc.add(new NumericDocValuesField("foo", 7));
w.addDocument(doc);
w.commit();
w.close();
Sort indexSort = new Sort(new SortField("foo", SortField.Type.INT));
iwc = new IndexWriterConfig(new MockAnalyzer(random())).setIndexSort(indexSort);
w = new IndexWriter(dir, iwc);
try (DirectoryReader reader = DirectoryReader.open(tmpDir)) {
List<CodecReader> readers = new ArrayList<>();
for (LeafReaderContext ctx : reader.leaves()) {
CodecReader wrap = SortingCodecReader.wrap(SlowCodecReaderWrapper.wrap(ctx.reader()), indexSort);
assertTrue(wrap.toString(), wrap.toString().startsWith("SortingCodecReader("));
readers.add(wrap);
}
w.addIndexes(readers.toArray(new CodecReader[0]));
}
DirectoryReader r = DirectoryReader.open(w);
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(3, leaf.maxDoc());
NumericDocValues values = leaf.getNumericDocValues("foo");
assertEquals(0, values.nextDoc());
assertEquals(-1, values.longValue());
assertEquals(1, values.nextDoc());
assertEquals(7, values.longValue());
assertEquals(2, values.nextDoc());
assertEquals(18, values.longValue());
assertNotNull(leaf.getMetaData().getSort());
IOUtils.close(r, w, dir, tmpDir);
}
public void testSortOnAddIndicesRandom() throws IOException {
try (Directory dir = newDirectory()) {
int numDocs = atLeast(200);
int actualNumDocs;
List<Integer> docIds = new ArrayList<>(numDocs);
for (int i = 0; i < numDocs; i++) {
docIds.add(i);
}
Collections.shuffle(docIds, random());
try (RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) {
for (int i = 0; i < numDocs; i++) {
int docId = docIds.get(i);
Document doc = new Document();
doc.add(new StringField("id", Integer.toString(docId), Field.Store.YES));
doc.add(new LongPoint("id", docId));
String s = RandomStrings.randomRealisticUnicodeOfLength(random(), 25);
doc.add(new TextField("text_field", s, Field.Store.YES));
doc.add(new BinaryDocValuesField("text_field", new BytesRef(s)));
doc.add(new TextField("another_text_field", s, Field.Store.YES));
doc.add(new BinaryDocValuesField("another_text_field", new BytesRef(s)));
doc.add(new SortedNumericDocValuesField("sorted_numeric_dv", docId));
doc.add(new SortedDocValuesField("binary_sorted_dv", new BytesRef(Integer.toString(docId))));
doc.add(new BinaryDocValuesField("binary_dv", new BytesRef(Integer.toString(docId))));
doc.add(new SortedSetDocValuesField("sorted_set_dv", new BytesRef(Integer.toString(docId))));
doc.add(new NumericDocValuesField("foo", random().nextInt(20)));
FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
doc.add(new Field("term_vectors", "test" + docId, ft));
if (rarely() == false) {
doc.add(new NumericDocValuesField("id", docId));
doc.add(new SortedSetDocValuesField("sorted_set_sort_field", new BytesRef(String.format(Locale.ROOT, "%06d", docId))));
doc.add(new SortedDocValuesField("sorted_binary_sort_field", new BytesRef(String.format(Locale.ROOT, "%06d", docId))));
doc.add(new SortedNumericDocValuesField("sorted_numeric_sort_field", docId));
} else {
doc.add(new NumericDocValuesField("alt_id", docId));
}
iw.addDocument(doc);
if (i > 0 && random().nextInt(5) == 0) {
final int id = RandomPicks.randomFrom(random(), docIds.subList(0, i));
iw.deleteDocuments(new Term("id", Integer.toString(id)));
}
}
iw.commit();
actualNumDocs = iw.getDocStats().numDocs;
}
Sort indexSort = RandomPicks.randomFrom(random(), Arrays.asList(
new Sort(new SortField("id", SortField.Type.LONG),
new SortField("alt_id", SortField.Type.INT)),
new Sort(new SortedSetSortField("sorted_set_sort_field", false),
new SortField("alt_id", SortField.Type.INT)),
new Sort(new SortedNumericSortField("sorted_numeric_sort_field", SortField.Type.INT),
new SortField("alt_id", SortField.Type.INT)),
new Sort(new SortField("sorted_binary_sort_field", SortField.Type.STRING, false),
new SortField("alt_id", SortField.Type.INT))
));
try (Directory sortDir = newDirectory()) {
try (IndexWriter writer = new IndexWriter(sortDir, newIndexWriterConfig().setIndexSort(indexSort))) {
try (DirectoryReader reader = DirectoryReader.open(dir)) {
List<CodecReader> readers = new ArrayList<>();
for (LeafReaderContext ctx : reader.leaves()) {
CodecReader wrap = SortingCodecReader.wrap(SlowCodecReaderWrapper.wrap(ctx.reader()), indexSort);
readers.add(wrap);
TermVectorsReader termVectorsReader = wrap.getTermVectorsReader();
TermVectorsReader clone = termVectorsReader.clone();
assertNotSame(termVectorsReader, clone);
clone.close();
}
writer.addIndexes(readers.toArray(new CodecReader[0]));
}
assumeTrue("must have at least one doc", actualNumDocs > 0);
try (DirectoryReader r = DirectoryReader.open(writer)) {
LeafReader leaf = getOnlyLeafReader(r);
assertEquals(actualNumDocs, leaf.maxDoc());
BinaryDocValues binary_dv = leaf.getBinaryDocValues("binary_dv");
SortedNumericDocValues sorted_numeric_dv = leaf.getSortedNumericDocValues("sorted_numeric_dv");
SortedSetDocValues sorted_set_dv = leaf.getSortedSetDocValues("sorted_set_dv");
SortedDocValues binary_sorted_dv = leaf.getSortedDocValues("binary_sorted_dv");
NumericDocValues ids = leaf.getNumericDocValues("id");
long prevValue = -1;
boolean usingAltIds = false;
for (int i = 0; i < actualNumDocs; i++) {
int idNext = ids.nextDoc();
if (idNext == DocIdSetIterator.NO_MORE_DOCS) {
assertFalse(usingAltIds);
usingAltIds = true;
ids = leaf.getNumericDocValues("alt_id");
idNext = ids.nextDoc();
binary_dv = leaf.getBinaryDocValues("binary_dv");
sorted_numeric_dv = leaf.getSortedNumericDocValues("sorted_numeric_dv");
sorted_set_dv = leaf.getSortedSetDocValues("sorted_set_dv");
binary_sorted_dv = leaf.getSortedDocValues("binary_sorted_dv");
prevValue = -1;
}
assertTrue(prevValue + " < " + ids.longValue(), prevValue < ids.longValue());
prevValue = ids.longValue();
assertTrue(binary_dv.advanceExact(idNext));
assertTrue(sorted_numeric_dv.advanceExact(idNext));
assertTrue(sorted_set_dv.advanceExact(idNext));
assertTrue(binary_sorted_dv.advanceExact(idNext));
assertEquals(new BytesRef(ids.longValue() + ""), binary_dv.binaryValue());
assertEquals(new BytesRef(ids.longValue() + ""), binary_sorted_dv.binaryValue());
assertEquals(new BytesRef(ids.longValue() + ""), sorted_set_dv.lookupOrd(sorted_set_dv.nextOrd()));
assertEquals(1, sorted_numeric_dv.docValueCount());
assertEquals(ids.longValue(), sorted_numeric_dv.nextValue());
Fields termVectors = leaf.getTermVectors(idNext);
assertTrue(termVectors.terms("term_vectors").iterator().seekExact(new BytesRef("test" + ids.longValue())));
assertEquals(Long.toString(ids.longValue()), leaf.document(idNext).get("id"));
IndexSearcher searcher = new IndexSearcher(r);
TopDocs result = searcher.search(LongPoint.newExactQuery("id", ids.longValue()), 1);
assertEquals(1, result.totalHits.value);
assertEquals(idNext, result.scoreDocs[0].doc);
result = searcher.search(new TermQuery(new Term("id", "" + ids.longValue())), 1);
assertEquals(1, result.totalHits.value);
assertEquals(idNext, result.scoreDocs[0].doc);
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, ids.nextDoc());
}
}
}
}
}
}