| package org.apache.lucene.codecs.memory; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.IOException; |
| import java.util.HashMap; |
| import java.util.Map; |
| import java.util.concurrent.atomic.AtomicLong; |
| |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.DocValuesProducer; |
| import org.apache.lucene.index.BinaryDocValues; |
| import org.apache.lucene.index.CorruptIndexException; |
| import org.apache.lucene.index.DocValues; |
| import org.apache.lucene.index.FieldInfo; |
| import org.apache.lucene.index.IndexFileNames; |
| import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.RandomAccessOrds; |
| import org.apache.lucene.index.SegmentReadState; |
| import org.apache.lucene.index.SortedDocValues; |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.store.ChecksumIndexInput; |
| import org.apache.lucene.store.IndexInput; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.FixedBitSet; |
| import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.RamUsageEstimator; |
| |
| /** |
| * Reader for {@link DirectDocValuesFormat} |
| */ |
| |
| class DirectDocValuesProducer extends DocValuesProducer { |
| // metadata maps (just file pointers and minimal stuff) |
| private final Map<Integer,NumericEntry> numerics = new HashMap<>(); |
| private final Map<Integer,BinaryEntry> binaries = new HashMap<>(); |
| private final Map<Integer,SortedEntry> sorteds = new HashMap<>(); |
| private final Map<Integer,SortedSetEntry> sortedSets = new HashMap<>(); |
| private final IndexInput data; |
| |
| // ram instances we have already loaded |
| private final Map<Integer,NumericDocValues> numericInstances = |
| new HashMap<>(); |
| private final Map<Integer,BinaryDocValues> binaryInstances = |
| new HashMap<>(); |
| private final Map<Integer,SortedDocValues> sortedInstances = |
| new HashMap<>(); |
| private final Map<Integer,SortedSetRawValues> sortedSetInstances = |
| new HashMap<>(); |
| private final Map<Integer,Bits> docsWithFieldInstances = new HashMap<>(); |
| |
| private final int maxDoc; |
| private final AtomicLong ramBytesUsed; |
| private final int version; |
| |
| static final byte NUMBER = 0; |
| static final byte BYTES = 1; |
| static final byte SORTED = 2; |
| static final byte SORTED_SET = 3; |
| |
| static final int VERSION_START = 0; |
| static final int VERSION_CHECKSUM = 1; |
| static final int VERSION_CURRENT = VERSION_CHECKSUM; |
| |
| DirectDocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { |
| maxDoc = state.segmentInfo.getDocCount(); |
| String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); |
| // read in the entries from the metadata file. |
| ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context); |
| ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOfInstance(getClass())); |
| boolean success = false; |
| try { |
| version = CodecUtil.checkHeader(in, metaCodec, |
| VERSION_START, |
| VERSION_CURRENT); |
| readFields(in); |
| |
| if (version >= VERSION_CHECKSUM) { |
| CodecUtil.checkFooter(in); |
| } else { |
| CodecUtil.checkEOF(in); |
| } |
| success = true; |
| } finally { |
| if (success) { |
| IOUtils.close(in); |
| } else { |
| IOUtils.closeWhileHandlingException(in); |
| } |
| } |
| |
| success = false; |
| String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); |
| data = state.directory.openInput(dataName, state.context); |
| try { |
| final int version2 = CodecUtil.checkHeader(data, dataCodec, |
| VERSION_START, |
| VERSION_CURRENT); |
| if (version != version2) { |
| throw new CorruptIndexException("Format versions mismatch"); |
| } |
| |
| success = true; |
| } finally { |
| if (!success) { |
| IOUtils.closeWhileHandlingException(this.data); |
| } |
| } |
| } |
| |
| private NumericEntry readNumericEntry(IndexInput meta) throws IOException { |
| NumericEntry entry = new NumericEntry(); |
| entry.offset = meta.readLong(); |
| entry.count = meta.readInt(); |
| entry.missingOffset = meta.readLong(); |
| if (entry.missingOffset != -1) { |
| entry.missingBytes = meta.readLong(); |
| } else { |
| entry.missingBytes = 0; |
| } |
| entry.byteWidth = meta.readByte(); |
| |
| return entry; |
| } |
| |
| private BinaryEntry readBinaryEntry(IndexInput meta) throws IOException { |
| BinaryEntry entry = new BinaryEntry(); |
| entry.offset = meta.readLong(); |
| entry.numBytes = meta.readInt(); |
| entry.count = meta.readInt(); |
| entry.missingOffset = meta.readLong(); |
| if (entry.missingOffset != -1) { |
| entry.missingBytes = meta.readLong(); |
| } else { |
| entry.missingBytes = 0; |
| } |
| |
| return entry; |
| } |
| |
| private SortedEntry readSortedEntry(IndexInput meta) throws IOException { |
| SortedEntry entry = new SortedEntry(); |
| entry.docToOrd = readNumericEntry(meta); |
| entry.values = readBinaryEntry(meta); |
| return entry; |
| } |
| |
| private SortedSetEntry readSortedSetEntry(IndexInput meta) throws IOException { |
| SortedSetEntry entry = new SortedSetEntry(); |
| entry.docToOrdAddress = readNumericEntry(meta); |
| entry.ords = readNumericEntry(meta); |
| entry.values = readBinaryEntry(meta); |
| return entry; |
| } |
| |
| private void readFields(IndexInput meta) throws IOException { |
| int fieldNumber = meta.readVInt(); |
| while (fieldNumber != -1) { |
| int fieldType = meta.readByte(); |
| if (fieldType == NUMBER) { |
| numerics.put(fieldNumber, readNumericEntry(meta)); |
| } else if (fieldType == BYTES) { |
| binaries.put(fieldNumber, readBinaryEntry(meta)); |
| } else if (fieldType == SORTED) { |
| sorteds.put(fieldNumber, readSortedEntry(meta)); |
| } else if (fieldType == SORTED_SET) { |
| sortedSets.put(fieldNumber, readSortedSetEntry(meta)); |
| } else { |
| throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta); |
| } |
| fieldNumber = meta.readVInt(); |
| } |
| } |
| |
| @Override |
| public long ramBytesUsed() { |
| return ramBytesUsed.get(); |
| } |
| |
| @Override |
| public void checkIntegrity() throws IOException { |
| if (version >= VERSION_CHECKSUM) { |
| CodecUtil.checksumEntireFile(data); |
| } |
| } |
| |
| @Override |
| public synchronized NumericDocValues getNumeric(FieldInfo field) throws IOException { |
| NumericDocValues instance = numericInstances.get(field.number); |
| if (instance == null) { |
| // Lazy load |
| instance = loadNumeric(numerics.get(field.number)); |
| numericInstances.put(field.number, instance); |
| } |
| return instance; |
| } |
| |
| private NumericDocValues loadNumeric(NumericEntry entry) throws IOException { |
| data.seek(entry.offset + entry.missingBytes); |
| switch (entry.byteWidth) { |
| case 1: |
| { |
| final byte[] values = new byte[entry.count]; |
| data.readBytes(values, 0, entry.count); |
| ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values)); |
| return new NumericDocValues() { |
| @Override |
| public long get(int idx) { |
| return values[idx]; |
| } |
| }; |
| } |
| |
| case 2: |
| { |
| final short[] values = new short[entry.count]; |
| for(int i=0;i<entry.count;i++) { |
| values[i] = data.readShort(); |
| } |
| ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values)); |
| return new NumericDocValues() { |
| @Override |
| public long get(int idx) { |
| return values[idx]; |
| } |
| }; |
| } |
| |
| case 4: |
| { |
| final int[] values = new int[entry.count]; |
| for(int i=0;i<entry.count;i++) { |
| values[i] = data.readInt(); |
| } |
| ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values)); |
| return new NumericDocValues() { |
| @Override |
| public long get(int idx) { |
| return values[idx]; |
| } |
| }; |
| } |
| |
| case 8: |
| { |
| final long[] values = new long[entry.count]; |
| for(int i=0;i<entry.count;i++) { |
| values[i] = data.readLong(); |
| } |
| ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(values)); |
| return new NumericDocValues() { |
| @Override |
| public long get(int idx) { |
| return values[idx]; |
| } |
| }; |
| } |
| |
| default: |
| throw new AssertionError(); |
| } |
| } |
| |
| @Override |
| public synchronized BinaryDocValues getBinary(FieldInfo field) throws IOException { |
| BinaryDocValues instance = binaryInstances.get(field.number); |
| if (instance == null) { |
| // Lazy load |
| instance = loadBinary(binaries.get(field.number)); |
| binaryInstances.put(field.number, instance); |
| } |
| return instance; |
| } |
| |
| private BinaryDocValues loadBinary(BinaryEntry entry) throws IOException { |
| data.seek(entry.offset); |
| final byte[] bytes = new byte[entry.numBytes]; |
| data.readBytes(bytes, 0, entry.numBytes); |
| data.seek(entry.offset + entry.numBytes + entry.missingBytes); |
| |
| final int[] address = new int[entry.count+1]; |
| for(int i=0;i<entry.count;i++) { |
| address[i] = data.readInt(); |
| } |
| address[entry.count] = data.readInt(); |
| |
| ramBytesUsed.addAndGet(RamUsageEstimator.sizeOf(bytes) + RamUsageEstimator.sizeOf(address)); |
| |
| return new BinaryDocValues() { |
| @Override |
| public void get(int docID, BytesRef result) { |
| result.bytes = bytes; |
| result.offset = address[docID]; |
| result.length = address[docID+1] - result.offset; |
| }; |
| }; |
| } |
| |
| @Override |
| public synchronized SortedDocValues getSorted(FieldInfo field) throws IOException { |
| SortedDocValues instance = sortedInstances.get(field.number); |
| if (instance == null) { |
| // Lazy load |
| instance = loadSorted(field); |
| sortedInstances.put(field.number, instance); |
| } |
| return instance; |
| } |
| |
| private SortedDocValues loadSorted(FieldInfo field) throws IOException { |
| final SortedEntry entry = sorteds.get(field.number); |
| final NumericDocValues docToOrd = loadNumeric(entry.docToOrd); |
| final BinaryDocValues values = loadBinary(entry.values); |
| |
| return new SortedDocValues() { |
| |
| @Override |
| public int getOrd(int docID) { |
| return (int) docToOrd.get(docID); |
| } |
| |
| @Override |
| public void lookupOrd(int ord, BytesRef result) { |
| values.get(ord, result); |
| } |
| |
| @Override |
| public int getValueCount() { |
| return entry.values.count; |
| } |
| |
| // Leave lookupTerm to super's binary search |
| |
| // Leave termsEnum to super |
| }; |
| } |
| |
| @Override |
| public synchronized SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { |
| SortedSetRawValues instance = sortedSetInstances.get(field.number); |
| final SortedSetEntry entry = sortedSets.get(field.number); |
| if (instance == null) { |
| // Lazy load |
| instance = loadSortedSet(entry); |
| sortedSetInstances.put(field.number, instance); |
| } |
| |
| final NumericDocValues docToOrdAddress = instance.docToOrdAddress; |
| final NumericDocValues ords = instance.ords; |
| final BinaryDocValues values = instance.values; |
| |
| // Must make a new instance since the iterator has state: |
| return new RandomAccessOrds() { |
| int ordStart; |
| int ordUpto; |
| int ordLimit; |
| |
| @Override |
| public long nextOrd() { |
| if (ordUpto == ordLimit) { |
| return NO_MORE_ORDS; |
| } else { |
| return ords.get(ordUpto++); |
| } |
| } |
| |
| @Override |
| public void setDocument(int docID) { |
| ordStart = ordUpto = (int) docToOrdAddress.get(docID); |
| ordLimit = (int) docToOrdAddress.get(docID+1); |
| } |
| |
| @Override |
| public void lookupOrd(long ord, BytesRef result) { |
| values.get((int) ord, result); |
| } |
| |
| @Override |
| public long getValueCount() { |
| return entry.values.count; |
| } |
| |
| @Override |
| public long ordAt(int index) { |
| return ords.get(ordStart + index); |
| } |
| |
| @Override |
| public int cardinality() { |
| return ordLimit - ordStart; |
| } |
| |
| // Leave lookupTerm to super's binary search |
| |
| // Leave termsEnum to super |
| }; |
| } |
| |
| private SortedSetRawValues loadSortedSet(SortedSetEntry entry) throws IOException { |
| SortedSetRawValues instance = new SortedSetRawValues(); |
| instance.docToOrdAddress = loadNumeric(entry.docToOrdAddress); |
| instance.ords = loadNumeric(entry.ords); |
| instance.values = loadBinary(entry.values); |
| return instance; |
| } |
| |
| private Bits getMissingBits(int fieldNumber, final long offset, final long length) throws IOException { |
| if (offset == -1) { |
| return new Bits.MatchAllBits(maxDoc); |
| } else { |
| Bits instance; |
| synchronized(this) { |
| instance = docsWithFieldInstances.get(fieldNumber); |
| if (instance == null) { |
| IndexInput data = this.data.clone(); |
| data.seek(offset); |
| assert length % 8 == 0; |
| long bits[] = new long[(int) length >> 3]; |
| for (int i = 0; i < bits.length; i++) { |
| bits[i] = data.readLong(); |
| } |
| instance = new FixedBitSet(bits, maxDoc); |
| docsWithFieldInstances.put(fieldNumber, instance); |
| } |
| } |
| return instance; |
| } |
| } |
| |
| @Override |
| public Bits getDocsWithField(FieldInfo field) throws IOException { |
| switch(field.getDocValuesType()) { |
| case SORTED_SET: |
| return DocValues.docsWithValue(getSortedSet(field), maxDoc); |
| case SORTED: |
| return DocValues.docsWithValue(getSorted(field), maxDoc); |
| case BINARY: |
| BinaryEntry be = binaries.get(field.number); |
| return getMissingBits(field.number, be.missingOffset, be.missingBytes); |
| case NUMERIC: |
| NumericEntry ne = numerics.get(field.number); |
| return getMissingBits(field.number, ne.missingOffset, ne.missingBytes); |
| default: |
| throw new AssertionError(); |
| } |
| } |
| |
| @Override |
| public void close() throws IOException { |
| data.close(); |
| } |
| |
| static class SortedSetRawValues { |
| NumericDocValues docToOrdAddress; |
| NumericDocValues ords; |
| BinaryDocValues values; |
| } |
| |
| static class NumericEntry { |
| long offset; |
| int count; |
| long missingOffset; |
| long missingBytes; |
| byte byteWidth; |
| int packedIntsVersion; |
| } |
| |
| static class BinaryEntry { |
| long offset; |
| long missingOffset; |
| long missingBytes; |
| int count; |
| int numBytes; |
| int minLength; |
| int maxLength; |
| int packedIntsVersion; |
| int blockSize; |
| } |
| |
| static class SortedEntry { |
| NumericEntry docToOrd; |
| BinaryEntry values; |
| } |
| |
| static class SortedSetEntry { |
| NumericEntry docToOrdAddress; |
| NumericEntry ords; |
| BinaryEntry values; |
| } |
| |
| static class FSTEntry { |
| long offset; |
| long numOrds; |
| } |
| } |