blob: 49d15fc1ad43b91474e0850f0ab5d86b02e21cb7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs.uniformsplit.sharedterms;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.codecs.BlockTermState;
import org.apache.lucene.codecs.PostingsReaderBase;
import org.apache.lucene.codecs.lucene90.MockTermStateFactory;
import org.apache.lucene.codecs.uniformsplit.BlockHeader;
import org.apache.lucene.codecs.uniformsplit.BlockLine;
import org.apache.lucene.codecs.uniformsplit.FSTDictionary;
import org.apache.lucene.codecs.uniformsplit.FieldMetadata;
import org.apache.lucene.codecs.uniformsplit.IndexDictionary;
import org.apache.lucene.codecs.uniformsplit.TermBytes;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.ImpactsEnum;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.VectorValues;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
public class TestSTBlockReader extends LuceneTestCase {
private static final String MOCK_BLOCK_OUTPUT_NAME = "TestSTBlockReader.tmp";
private FieldInfos fieldInfos;
private List<MockSTBlockLine> blockLines;
private IndexDictionary.BrowserSupplier supplier;
private ByteBuffersDirectory directory;
@Override
public void setUp() throws Exception {
super.setUp();
fieldInfos = mockFieldInfos();
List<BlockLineDefinition> vocab = new ArrayList<>();
vocab.add(blockLineDef(1, "abaco", "f1", "f3"));
vocab.add(blockLineDef(2, "amiga", "f1", "f2", "f4"));
vocab.add(blockLineDef(5, "amigo", "f1", "f2", "f3", "f4"));
vocab.add(blockLineDef(2, "arco", "f1"));
vocab.add(blockLineDef(1, "bloom", "f2"));
vocab.add(blockLineDef(1, "frien", "f2"));
vocab.add(blockLineDef(6, "frienchies", "f3"));
blockLines = generateBlockLines(vocab);
directory = new ByteBuffersDirectory();
try (IndexOutput blockOutput =
directory.createOutput(MOCK_BLOCK_OUTPUT_NAME, IOContext.DEFAULT)) {
blockOutput.writeVInt(5);
}
IndexDictionary.Builder builder = new FSTDictionary.Builder();
builder.add(new BytesRef("a"), 0);
IndexDictionary indexDictionary = builder.build();
supplier =
new IndexDictionary.BrowserSupplier() {
@Override
public IndexDictionary.Browser get() throws IOException {
return indexDictionary.browser();
}
@Override
public long ramBytesUsed() {
return indexDictionary.ramBytesUsed();
}
};
}
@Override
public void tearDown() throws Exception {
try {
blockLines.clear();
directory.close();
} finally {
super.tearDown();
}
}
public void testSeekExactIgnoreFieldF1() throws IOException {
// when block reader for field 1 -> f1
MockSTBlockReader blockReader =
new MockSTBlockReader(
supplier,
blockLines,
directory,
fieldInfos.fieldInfo("f1"), // last term "arco"
fieldInfos);
// when seekCeil
blockReader.seekCeil(new BytesRef("arco2"));
// then
assertNull(blockReader.term());
// when seekCeilIgnoreField
blockReader.seekCeilIgnoreField(new BytesRef("arco2"));
// then
assertEquals("bloom", blockReader.term().utf8ToString());
}
public void testSeekExactIgnoreFieldF2() throws IOException {
MockSTBlockReader blockReader =
new MockSTBlockReader(
supplier,
blockLines,
directory,
fieldInfos.fieldInfo("f2"), // last term "frien"
fieldInfos);
// when seekCeil
blockReader.seekCeilIgnoreField(new BytesRef("arco2"));
// then
assertEquals("bloom", blockReader.term().utf8ToString());
}
public void testSeekExactIgnoreFieldF3() throws IOException {
MockSTBlockReader blockReader =
new MockSTBlockReader(
supplier,
blockLines,
directory,
fieldInfos.fieldInfo("f3"), // last term "frienchies"
fieldInfos);
// when seekCeilIgnoreField
blockReader.seekCeilIgnoreField(new BytesRef("arco2"));
// then
assertEquals("bloom", blockReader.term().utf8ToString());
// when seekCeil
blockReader.seekCeil(new BytesRef("arco2"));
// then
assertEquals("frienchies", blockReader.term().utf8ToString());
}
public void testSeekExactIgnoreFieldF4() throws IOException {
MockSTBlockReader blockReader =
new MockSTBlockReader(
supplier,
blockLines,
directory,
fieldInfos.fieldInfo("f4"), // last term "amigo"
fieldInfos);
// when seekCeilIgnoreField
blockReader.seekCeilIgnoreField(new BytesRef("abaco"));
// then
assertEquals("abaco", blockReader.term().utf8ToString());
// when seekCeil
blockReader.seekCeil(new BytesRef("abaco"));
// then
assertEquals("amiga", blockReader.term().utf8ToString());
}
private static FieldInfos mockFieldInfos() {
return new FieldInfos(
new FieldInfo[] {
mockFieldInfo("f1", 0),
mockFieldInfo("f2", 1),
mockFieldInfo("f3", 2),
mockFieldInfo("f4", 3),
});
}
private static FieldInfo mockFieldInfo(String fieldName, int number) {
return new FieldInfo(
fieldName,
number,
false,
false,
true,
IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS,
DocValuesType.NONE,
-1,
Collections.emptyMap(),
0,
0,
0,
0,
VectorValues.SearchStrategy.NONE,
false);
}
private BlockLineDefinition blockLineDef(int mdpLength, String term, String... fields) {
return new BlockLineDefinition(
new TermBytes(mdpLength, new BytesRef(term)), Arrays.asList(fields));
}
private static List<MockSTBlockLine> generateBlockLines(
Iterable<BlockLineDefinition> blockLineDefinitions) {
List<MockSTBlockLine> lines = new ArrayList<>();
for (BlockLineDefinition blockLineDefinition : blockLineDefinitions) {
lines.add(new MockSTBlockLine(blockLineDefinition.termBytes, blockLineDefinition.fields));
}
return lines;
}
private static class BlockLineDefinition {
final TermBytes termBytes;
final List<String> fields;
BlockLineDefinition(TermBytes termBytes, List<String> fields) {
this.termBytes = termBytes;
this.fields = fields;
}
}
private static class MockSTBlockLine extends STBlockLine {
final Map<String, BlockTermState> termStates;
MockSTBlockLine(TermBytes termBytes, List<String> fields) {
super(termBytes, Collections.singletonList(new FieldMetadataTermState(null, null)));
this.termStates = new HashMap<>();
for (String field : fields) {
termStates.put(field, MockTermStateFactory.create());
}
}
Set<String> getFields() {
return termStates.keySet();
}
}
private static class MockSTBlockReader extends STBlockReader {
List<MockSTBlockLine> lines;
MockSTBlockReader(
IndexDictionary.BrowserSupplier supplier,
List<MockSTBlockLine> lines,
Directory directory,
FieldInfo fieldInfo,
FieldInfos fieldInfos)
throws IOException {
super(
supplier,
directory.openInput(MOCK_BLOCK_OUTPUT_NAME, IOContext.DEFAULT),
getMockPostingReaderBase(),
mockFieldMetadata(fieldInfo, getLastTermForField(lines, fieldInfo.name)),
null,
fieldInfos);
this.lines = lines;
}
static PostingsReaderBase getMockPostingReaderBase() {
return new PostingsReaderBase() {
@Override
public void init(IndexInput termsIn, SegmentReadState state) {}
@Override
public BlockTermState newTermState() {
return null;
}
@Override
public void decodeTerm(
DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) {}
@Override
public PostingsEnum postings(
FieldInfo fieldInfo, BlockTermState state, PostingsEnum reuse, int flags) {
return null;
}
@Override
public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) {
return null;
}
@Override
public void checkIntegrity() {}
@Override
public void close() {}
@Override
public long ramBytesUsed() {
return 0;
}
};
}
static FieldMetadata mockFieldMetadata(FieldInfo fieldInfo, BytesRef lastTerm) {
FieldMetadata fieldMetadata = new FieldMetadata(fieldInfo, 1);
fieldMetadata.setLastTerm(lastTerm);
fieldMetadata.setLastBlockStartFP(1);
return fieldMetadata;
}
static BytesRef getLastTermForField(List<MockSTBlockLine> lines, String fieldName) {
BytesRef lastTerm = null;
for (MockSTBlockLine line : lines) {
if (line.getFields().contains(fieldName)) {
lastTerm = line.getTermBytes().getTerm();
}
}
return lastTerm;
}
@Override
protected BlockTermState readTermState() {
return termState =
lines.get(lineIndexInBlock - 1).termStates.get(fieldMetadata.getFieldInfo().name);
}
@Override
protected int compareToMiddleAndJump(BytesRef searchedTerm) {
blockLine = lines.get(lines.size() >> 1);
lineIndexInBlock = blockHeader.getMiddleLineIndex();
int compare = searchedTerm.compareTo(term());
if (compare < 0) {
lineIndexInBlock = 0;
}
return compare;
}
@Override
protected BlockLine readLineInBlock() {
if (lineIndexInBlock >= lines.size()) {
return blockLine = null;
}
return blockLine = lines.get(lineIndexInBlock++);
}
@Override
protected void initializeHeader(BytesRef searchedTerm, long startBlockLinePos)
throws IOException {
// Force blockStartFP to an impossible value so we never trigger the optimization
// that keeps the current block with our mock block reader.
blockStartFP = -1;
super.initializeHeader(searchedTerm, startBlockLinePos);
}
@Override
protected BlockHeader readHeader() {
return blockHeader =
lineIndexInBlock >= lines.size() ? null : new MockBlockHeader(lines.size());
}
}
private static class MockBlockHeader extends BlockHeader {
MockBlockHeader(int linesCount) {
super(linesCount, 0, 0, 0, 1, 0);
}
}
}