| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.lucene.codecs.uniformsplit.sharedterms; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| import org.apache.lucene.codecs.BlockTermState; |
| import org.apache.lucene.codecs.PostingsReaderBase; |
| import org.apache.lucene.codecs.lucene90.MockTermStateFactory; |
| import org.apache.lucene.codecs.uniformsplit.BlockHeader; |
| import org.apache.lucene.codecs.uniformsplit.BlockLine; |
| import org.apache.lucene.codecs.uniformsplit.FSTDictionary; |
| import org.apache.lucene.codecs.uniformsplit.FieldMetadata; |
| import org.apache.lucene.codecs.uniformsplit.IndexDictionary; |
| import org.apache.lucene.codecs.uniformsplit.TermBytes; |
| import org.apache.lucene.index.DocValuesType; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.FieldInfos; |
| import org.apache.lucene.index.ImpactsEnum; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.index.PostingsEnum; |
| import org.apache.lucene.index.SegmentReadState; |
| import org.apache.lucene.index.VectorValues; |
| import org.apache.lucene.store.ByteBuffersDirectory; |
| import org.apache.lucene.store.DataInput; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.store.IOContext; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.store.IndexOutput; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.LuceneTestCase; |
| |
| public class TestSTBlockReader extends LuceneTestCase { |
| |
| private static final String MOCK_BLOCK_OUTPUT_NAME = "TestSTBlockReader.tmp"; |
| |
| private FieldInfos fieldInfos; |
| private List<MockSTBlockLine> blockLines; |
| private IndexDictionary.BrowserSupplier supplier; |
| private ByteBuffersDirectory directory; |
| |
| @Override |
| public void setUp() throws Exception { |
| super.setUp(); |
| |
| fieldInfos = mockFieldInfos(); |
| List<BlockLineDefinition> vocab = new ArrayList<>(); |
| vocab.add(blockLineDef(1, "abaco", "f1", "f3")); |
| vocab.add(blockLineDef(2, "amiga", "f1", "f2", "f4")); |
| vocab.add(blockLineDef(5, "amigo", "f1", "f2", "f3", "f4")); |
| vocab.add(blockLineDef(2, "arco", "f1")); |
| vocab.add(blockLineDef(1, "bloom", "f2")); |
| vocab.add(blockLineDef(1, "frien", "f2")); |
| vocab.add(blockLineDef(6, "frienchies", "f3")); |
| |
| blockLines = generateBlockLines(vocab); |
| directory = new ByteBuffersDirectory(); |
| try (IndexOutput blockOutput = |
| directory.createOutput(MOCK_BLOCK_OUTPUT_NAME, IOContext.DEFAULT)) { |
| blockOutput.writeVInt(5); |
| } |
| IndexDictionary.Builder builder = new FSTDictionary.Builder(); |
| builder.add(new BytesRef("a"), 0); |
| IndexDictionary indexDictionary = builder.build(); |
| supplier = |
| new IndexDictionary.BrowserSupplier() { |
| @Override |
| public IndexDictionary.Browser get() throws IOException { |
| return indexDictionary.browser(); |
| } |
| |
| @Override |
| public long ramBytesUsed() { |
| return indexDictionary.ramBytesUsed(); |
| } |
| }; |
| } |
| |
| @Override |
| public void tearDown() throws Exception { |
| try { |
| blockLines.clear(); |
| directory.close(); |
| } finally { |
| super.tearDown(); |
| } |
| } |
| |
| public void testSeekExactIgnoreFieldF1() throws IOException { |
| // when block reader for field 1 -> f1 |
| MockSTBlockReader blockReader = |
| new MockSTBlockReader( |
| supplier, |
| blockLines, |
| directory, |
| fieldInfos.fieldInfo("f1"), // last term "arco" |
| fieldInfos); |
| |
| // when seekCeil |
| blockReader.seekCeil(new BytesRef("arco2")); |
| // then |
| assertNull(blockReader.term()); |
| |
| // when seekCeilIgnoreField |
| blockReader.seekCeilIgnoreField(new BytesRef("arco2")); |
| // then |
| assertEquals("bloom", blockReader.term().utf8ToString()); |
| } |
| |
| public void testSeekExactIgnoreFieldF2() throws IOException { |
| MockSTBlockReader blockReader = |
| new MockSTBlockReader( |
| supplier, |
| blockLines, |
| directory, |
| fieldInfos.fieldInfo("f2"), // last term "frien" |
| fieldInfos); |
| |
| // when seekCeil |
| blockReader.seekCeilIgnoreField(new BytesRef("arco2")); |
| // then |
| assertEquals("bloom", blockReader.term().utf8ToString()); |
| } |
| |
| public void testSeekExactIgnoreFieldF3() throws IOException { |
| MockSTBlockReader blockReader = |
| new MockSTBlockReader( |
| supplier, |
| blockLines, |
| directory, |
| fieldInfos.fieldInfo("f3"), // last term "frienchies" |
| fieldInfos); |
| |
| // when seekCeilIgnoreField |
| blockReader.seekCeilIgnoreField(new BytesRef("arco2")); |
| // then |
| assertEquals("bloom", blockReader.term().utf8ToString()); |
| |
| // when seekCeil |
| blockReader.seekCeil(new BytesRef("arco2")); |
| // then |
| assertEquals("frienchies", blockReader.term().utf8ToString()); |
| } |
| |
| public void testSeekExactIgnoreFieldF4() throws IOException { |
| MockSTBlockReader blockReader = |
| new MockSTBlockReader( |
| supplier, |
| blockLines, |
| directory, |
| fieldInfos.fieldInfo("f4"), // last term "amigo" |
| fieldInfos); |
| |
| // when seekCeilIgnoreField |
| blockReader.seekCeilIgnoreField(new BytesRef("abaco")); |
| // then |
| assertEquals("abaco", blockReader.term().utf8ToString()); |
| |
| // when seekCeil |
| blockReader.seekCeil(new BytesRef("abaco")); |
| // then |
| assertEquals("amiga", blockReader.term().utf8ToString()); |
| } |
| |
| private static FieldInfos mockFieldInfos() { |
| return new FieldInfos( |
| new FieldInfo[] { |
| mockFieldInfo("f1", 0), |
| mockFieldInfo("f2", 1), |
| mockFieldInfo("f3", 2), |
| mockFieldInfo("f4", 3), |
| }); |
| } |
| |
| private static FieldInfo mockFieldInfo(String fieldName, int number) { |
| return new FieldInfo( |
| fieldName, |
| number, |
| false, |
| false, |
| true, |
| IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, |
| DocValuesType.NONE, |
| -1, |
| Collections.emptyMap(), |
| 0, |
| 0, |
| 0, |
| 0, |
| VectorValues.SearchStrategy.NONE, |
| false); |
| } |
| |
| private BlockLineDefinition blockLineDef(int mdpLength, String term, String... fields) { |
| return new BlockLineDefinition( |
| new TermBytes(mdpLength, new BytesRef(term)), Arrays.asList(fields)); |
| } |
| |
| private static List<MockSTBlockLine> generateBlockLines( |
| Iterable<BlockLineDefinition> blockLineDefinitions) { |
| List<MockSTBlockLine> lines = new ArrayList<>(); |
| for (BlockLineDefinition blockLineDefinition : blockLineDefinitions) { |
| lines.add(new MockSTBlockLine(blockLineDefinition.termBytes, blockLineDefinition.fields)); |
| } |
| return lines; |
| } |
| |
| private static class BlockLineDefinition { |
| final TermBytes termBytes; |
| final List<String> fields; |
| |
| BlockLineDefinition(TermBytes termBytes, List<String> fields) { |
| this.termBytes = termBytes; |
| this.fields = fields; |
| } |
| } |
| |
| private static class MockSTBlockLine extends STBlockLine { |
| |
| final Map<String, BlockTermState> termStates; |
| |
| MockSTBlockLine(TermBytes termBytes, List<String> fields) { |
| super(termBytes, Collections.singletonList(new FieldMetadataTermState(null, null))); |
| this.termStates = new HashMap<>(); |
| for (String field : fields) { |
| termStates.put(field, MockTermStateFactory.create()); |
| } |
| } |
| |
| Set<String> getFields() { |
| return termStates.keySet(); |
| } |
| } |
| |
| private static class MockSTBlockReader extends STBlockReader { |
| |
| List<MockSTBlockLine> lines; |
| |
| MockSTBlockReader( |
| IndexDictionary.BrowserSupplier supplier, |
| List<MockSTBlockLine> lines, |
| Directory directory, |
| FieldInfo fieldInfo, |
| FieldInfos fieldInfos) |
| throws IOException { |
| super( |
| supplier, |
| directory.openInput(MOCK_BLOCK_OUTPUT_NAME, IOContext.DEFAULT), |
| getMockPostingReaderBase(), |
| mockFieldMetadata(fieldInfo, getLastTermForField(lines, fieldInfo.name)), |
| null, |
| fieldInfos); |
| this.lines = lines; |
| } |
| |
| static PostingsReaderBase getMockPostingReaderBase() { |
| return new PostingsReaderBase() { |
| @Override |
| public void init(IndexInput termsIn, SegmentReadState state) {} |
| |
| @Override |
| public BlockTermState newTermState() { |
| return null; |
| } |
| |
| @Override |
| public void decodeTerm( |
| DataInput in, FieldInfo fieldInfo, BlockTermState state, boolean absolute) {} |
| |
| @Override |
| public PostingsEnum postings( |
| FieldInfo fieldInfo, BlockTermState state, PostingsEnum reuse, int flags) { |
| return null; |
| } |
| |
| @Override |
| public ImpactsEnum impacts(FieldInfo fieldInfo, BlockTermState state, int flags) { |
| return null; |
| } |
| |
| @Override |
| public void checkIntegrity() {} |
| |
| @Override |
| public void close() {} |
| |
| @Override |
| public long ramBytesUsed() { |
| return 0; |
| } |
| }; |
| } |
| |
| static FieldMetadata mockFieldMetadata(FieldInfo fieldInfo, BytesRef lastTerm) { |
| FieldMetadata fieldMetadata = new FieldMetadata(fieldInfo, 1); |
| fieldMetadata.setLastTerm(lastTerm); |
| fieldMetadata.setLastBlockStartFP(1); |
| return fieldMetadata; |
| } |
| |
| static BytesRef getLastTermForField(List<MockSTBlockLine> lines, String fieldName) { |
| BytesRef lastTerm = null; |
| for (MockSTBlockLine line : lines) { |
| if (line.getFields().contains(fieldName)) { |
| lastTerm = line.getTermBytes().getTerm(); |
| } |
| } |
| return lastTerm; |
| } |
| |
| @Override |
| protected BlockTermState readTermState() { |
| return termState = |
| lines.get(lineIndexInBlock - 1).termStates.get(fieldMetadata.getFieldInfo().name); |
| } |
| |
| @Override |
| protected int compareToMiddleAndJump(BytesRef searchedTerm) { |
| blockLine = lines.get(lines.size() >> 1); |
| lineIndexInBlock = blockHeader.getMiddleLineIndex(); |
| int compare = searchedTerm.compareTo(term()); |
| if (compare < 0) { |
| lineIndexInBlock = 0; |
| } |
| return compare; |
| } |
| |
| @Override |
| protected BlockLine readLineInBlock() { |
| if (lineIndexInBlock >= lines.size()) { |
| return blockLine = null; |
| } |
| return blockLine = lines.get(lineIndexInBlock++); |
| } |
| |
| @Override |
| protected void initializeHeader(BytesRef searchedTerm, long startBlockLinePos) |
| throws IOException { |
| // Force blockStartFP to an impossible value so we never trigger the optimization |
| // that keeps the current block with our mock block reader. |
| blockStartFP = -1; |
| super.initializeHeader(searchedTerm, startBlockLinePos); |
| } |
| |
| @Override |
| protected BlockHeader readHeader() { |
| return blockHeader = |
| lineIndexInBlock >= lines.size() ? null : new MockBlockHeader(lines.size()); |
| } |
| } |
| |
| private static class MockBlockHeader extends BlockHeader { |
| |
| MockBlockHeader(int linesCount) { |
| super(linesCount, 0, 0, 0, 1, 0); |
| } |
| } |
| } |