| using J2N.Numerics; |
| using J2N.Threading.Atomic; |
| using Lucene.Net.Index; |
| using Lucene.Net.Util; |
| using System; |
| using System.Collections.Generic; |
| using System.IO; |
| using System.Runtime.CompilerServices; |
| |
| namespace Lucene.Net.Codecs.Lucene45 |
| { |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| using BinaryDocValues = Lucene.Net.Index.BinaryDocValues; |
| using IBits = Lucene.Net.Util.IBits; |
| using BlockPackedReader = Lucene.Net.Util.Packed.BlockPackedReader; |
| using BytesRef = Lucene.Net.Util.BytesRef; |
| using ChecksumIndexInput = Lucene.Net.Store.ChecksumIndexInput; |
| using DocsAndPositionsEnum = Lucene.Net.Index.DocsAndPositionsEnum; |
| using DocsEnum = Lucene.Net.Index.DocsEnum; |
| using DocValues = Lucene.Net.Index.DocValues; |
| using DocValuesType = Lucene.Net.Index.DocValuesType; |
| using FieldInfo = Lucene.Net.Index.FieldInfo; |
| using FieldInfos = Lucene.Net.Index.FieldInfos; |
| using IndexFileNames = Lucene.Net.Index.IndexFileNames; |
| using IndexInput = Lucene.Net.Store.IndexInput; |
| using IOUtils = Lucene.Net.Util.IOUtils; |
| using Int64Values = Lucene.Net.Util.Int64Values; |
| using MonotonicBlockPackedReader = Lucene.Net.Util.Packed.MonotonicBlockPackedReader; |
| using NumericDocValues = Lucene.Net.Index.NumericDocValues; |
| using PackedInt32s = Lucene.Net.Util.Packed.PackedInt32s; |
| using RamUsageEstimator = Lucene.Net.Util.RamUsageEstimator; |
| using RandomAccessOrds = Lucene.Net.Index.RandomAccessOrds; |
| using SegmentReadState = Lucene.Net.Index.SegmentReadState; |
| using SortedDocValues = Lucene.Net.Index.SortedDocValues; |
| using SortedSetDocValues = Lucene.Net.Index.SortedSetDocValues; |
| using TermsEnum = Lucene.Net.Index.TermsEnum; |
| |
| /// <summary> |
| /// Reader for <see cref="Lucene45DocValuesFormat"/>. </summary> |
| public class Lucene45DocValuesProducer : DocValuesProducer // LUCENENET specific - removed IDisposable, it is already implemented in base class |
| { |
| private readonly IDictionary<int, NumericEntry> numerics; |
| private readonly IDictionary<int, BinaryEntry> binaries; |
| private readonly IDictionary<int, SortedSetEntry> sortedSets; |
| private readonly IDictionary<int, NumericEntry> ords; |
| private readonly IDictionary<int, NumericEntry> ordIndexes; |
| private readonly AtomicInt64 ramBytesUsed; |
| private readonly IndexInput data; |
| private readonly int maxDoc; |
| private readonly int version; |
| |
| // memory-resident structures |
| private readonly IDictionary<int, MonotonicBlockPackedReader> addressInstances = new Dictionary<int, MonotonicBlockPackedReader>(); |
| |
| private readonly IDictionary<int, MonotonicBlockPackedReader> ordIndexInstances = new Dictionary<int, MonotonicBlockPackedReader>(); |
| |
| /// <summary> |
| /// Expert: instantiates a new reader. </summary> |
| protected internal Lucene45DocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension) |
| { |
| string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension); |
| // read in the entries from the metadata file. |
| ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context); |
| this.maxDoc = state.SegmentInfo.DocCount; |
| bool success = false; |
| try |
| { |
| version = CodecUtil.CheckHeader(@in, metaCodec, Lucene45DocValuesFormat.VERSION_START, Lucene45DocValuesFormat.VERSION_CURRENT); |
| numerics = new Dictionary<int, NumericEntry>(); |
| ords = new Dictionary<int, NumericEntry>(); |
| ordIndexes = new Dictionary<int, NumericEntry>(); |
| binaries = new Dictionary<int, BinaryEntry>(); |
| sortedSets = new Dictionary<int, SortedSetEntry>(); |
| ReadFields(@in /*, state.FieldInfos // LUCENENET: Not read */); |
| |
| if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) |
| { |
| CodecUtil.CheckFooter(@in); |
| } |
| else |
| { |
| #pragma warning disable 612, 618 |
| CodecUtil.CheckEOF(@in); |
| #pragma warning restore 612, 618 |
| } |
| |
| success = true; |
| } |
| finally |
| { |
| if (success) |
| { |
| IOUtils.Dispose(@in); |
| } |
| else |
| { |
| IOUtils.DisposeWhileHandlingException(@in); |
| } |
| } |
| |
| success = false; |
| try |
| { |
| string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension); |
| data = state.Directory.OpenInput(dataName, state.Context); |
| int version2 = CodecUtil.CheckHeader(data, dataCodec, Lucene45DocValuesFormat.VERSION_START, Lucene45DocValuesFormat.VERSION_CURRENT); |
| if (version != version2) |
| { |
| throw new Exception("Format versions mismatch"); |
| } |
| |
| success = true; |
| } |
| finally |
| { |
| if (!success) |
| { |
| IOUtils.DisposeWhileHandlingException(this.data); |
| } |
| } |
| |
| ramBytesUsed = new AtomicInt64(RamUsageEstimator.ShallowSizeOfInstance(this.GetType())); |
| } |
| |
| private void ReadSortedField(int fieldNumber, IndexInput meta /*, FieldInfos infos // LUCENENET: Never read */) |
| { |
| // sorted = binary + numeric |
| if (meta.ReadVInt32() != fieldNumber) |
| { |
| throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); |
| } |
| if (meta.ReadByte() != Lucene45DocValuesFormat.BINARY) |
| { |
| throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); |
| } |
| BinaryEntry b = ReadBinaryEntry(meta); |
| binaries[fieldNumber] = b; |
| |
| if (meta.ReadVInt32() != fieldNumber) |
| { |
| throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); |
| } |
| if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC) |
| { |
| throw new Exception("sorted entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); |
| } |
| NumericEntry n = ReadNumericEntry(meta); |
| ords[fieldNumber] = n; |
| } |
| |
| private void ReadSortedSetFieldWithAddresses(int fieldNumber, IndexInput meta /*, FieldInfos infos // LUCENENET: Never read */) |
| { |
| // sortedset = binary + numeric (addresses) + ordIndex |
| if (meta.ReadVInt32() != fieldNumber) |
| { |
| throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); |
| } |
| if (meta.ReadByte() != Lucene45DocValuesFormat.BINARY) |
| { |
| throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); |
| } |
| BinaryEntry b = ReadBinaryEntry(meta); |
| binaries[fieldNumber] = b; |
| |
| if (meta.ReadVInt32() != fieldNumber) |
| { |
| throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); |
| } |
| if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC) |
| { |
| throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); |
| } |
| NumericEntry n1 = ReadNumericEntry(meta); |
| ords[fieldNumber] = n1; |
| |
| if (meta.ReadVInt32() != fieldNumber) |
| { |
| throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); |
| } |
| if (meta.ReadByte() != Lucene45DocValuesFormat.NUMERIC) |
| { |
| throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); |
| } |
| NumericEntry n2 = ReadNumericEntry(meta); |
| ordIndexes[fieldNumber] = n2; |
| } |
| |
| private void ReadFields(IndexInput meta /*, FieldInfos infos // LUCENENET: Not read */) |
| { |
| int fieldNumber = meta.ReadVInt32(); |
| while (fieldNumber != -1) |
| { |
| // check should be: infos.fieldInfo(fieldNumber) != null, which incorporates negative check |
| // but docvalues updates are currently buggy here (loading extra stuff, etc): LUCENE-5616 |
| if (fieldNumber < 0) |
| { |
| // trickier to validate more: because we re-use for norms, because we use multiple entries |
| // for "composite" types like sortedset, etc. |
| throw new Exception("Invalid field number: " + fieldNumber + " (resource=" + meta + ")"); |
| } |
| byte type = meta.ReadByte(); |
| if (type == Lucene45DocValuesFormat.NUMERIC) |
| { |
| numerics[fieldNumber] = ReadNumericEntry(meta); |
| } |
| else if (type == Lucene45DocValuesFormat.BINARY) |
| { |
| BinaryEntry b = ReadBinaryEntry(meta); |
| binaries[fieldNumber] = b; |
| } |
| else if (type == Lucene45DocValuesFormat.SORTED) |
| { |
| ReadSortedField(fieldNumber, meta /*, infos // LUCENENET: Never read */); |
| } |
| else if (type == Lucene45DocValuesFormat.SORTED_SET) |
| { |
| SortedSetEntry ss = ReadSortedSetEntry(meta); |
| sortedSets[fieldNumber] = ss; |
| if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES) |
| { |
| ReadSortedSetFieldWithAddresses(fieldNumber, meta/*, infos // LUCENENET: Never read */); |
| } |
| else if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED) |
| { |
| if (meta.ReadVInt32() != fieldNumber) |
| { |
| throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); |
| } |
| if (meta.ReadByte() != Lucene45DocValuesFormat.SORTED) |
| { |
| throw new Exception("sortedset entry for field: " + fieldNumber + " is corrupt (resource=" + meta + ")"); |
| } |
| ReadSortedField(fieldNumber, meta/*, infos // LUCENENET: Never read */); |
| } |
| else |
| { |
| throw new Exception(); |
| } |
| } |
| else |
| { |
| throw new Exception("invalid type: " + type + ", resource=" + meta); |
| } |
| fieldNumber = meta.ReadVInt32(); |
| } |
| } |
| |
| internal static NumericEntry ReadNumericEntry(IndexInput meta) |
| { |
| NumericEntry entry = new NumericEntry(); |
| entry.format = meta.ReadVInt32(); |
| entry.missingOffset = meta.ReadInt64(); |
| entry.PackedInt32sVersion = meta.ReadVInt32(); |
| entry.Offset = meta.ReadInt64(); |
| entry.Count = meta.ReadVInt64(); |
| entry.BlockSize = meta.ReadVInt32(); |
| switch (entry.format) |
| { |
| case Lucene45DocValuesConsumer.GCD_COMPRESSED: |
| entry.minValue = meta.ReadInt64(); |
| entry.gcd = meta.ReadInt64(); |
| break; |
| |
| case Lucene45DocValuesConsumer.TABLE_COMPRESSED: |
| if (entry.Count > int.MaxValue) |
| { |
| throw new Exception("Cannot use TABLE_COMPRESSED with more than MAX_VALUE values, input=" + meta); |
| } |
| int uniqueValues = meta.ReadVInt32(); |
| if (uniqueValues > 256) |
| { |
| throw new Exception("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + meta); |
| } |
| entry.table = new long[uniqueValues]; |
| for (int i = 0; i < uniqueValues; ++i) |
| { |
| entry.table[i] = meta.ReadInt64(); |
| } |
| break; |
| |
| case Lucene45DocValuesConsumer.DELTA_COMPRESSED: |
| break; |
| |
| default: |
| throw new Exception("Unknown format: " + entry.format + ", input=" + meta); |
| } |
| return entry; |
| } |
| |
| internal static BinaryEntry ReadBinaryEntry(IndexInput meta) |
| { |
| BinaryEntry entry = new BinaryEntry(); |
| entry.format = meta.ReadVInt32(); |
| entry.missingOffset = meta.ReadInt64(); |
| entry.minLength = meta.ReadVInt32(); |
| entry.maxLength = meta.ReadVInt32(); |
| entry.Count = meta.ReadVInt64(); |
| entry.offset = meta.ReadInt64(); |
| switch (entry.format) |
| { |
| case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED: |
| break; |
| |
| case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED: |
| entry.AddressInterval = meta.ReadVInt32(); |
| entry.AddressesOffset = meta.ReadInt64(); |
| entry.PackedInt32sVersion = meta.ReadVInt32(); |
| entry.BlockSize = meta.ReadVInt32(); |
| break; |
| |
| case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED: |
| entry.AddressesOffset = meta.ReadInt64(); |
| entry.PackedInt32sVersion = meta.ReadVInt32(); |
| entry.BlockSize = meta.ReadVInt32(); |
| break; |
| |
| default: |
| throw new Exception("Unknown format: " + entry.format + ", input=" + meta); |
| } |
| return entry; |
| } |
| |
| internal virtual SortedSetEntry ReadSortedSetEntry(IndexInput meta) |
| { |
| SortedSetEntry entry = new SortedSetEntry(); |
| if (version >= Lucene45DocValuesFormat.VERSION_SORTED_SET_SINGLE_VALUE_OPTIMIZED) |
| { |
| entry.Format = meta.ReadVInt32(); |
| } |
| else |
| { |
| entry.Format = Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES; |
| } |
| if (entry.Format != Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED && entry.Format != Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES) |
| { |
| throw new Exception("Unknown format: " + entry.Format + ", input=" + meta); |
| } |
| return entry; |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override NumericDocValues GetNumeric(FieldInfo field) |
| { |
| NumericEntry entry = numerics[field.Number]; |
| return GetNumeric(entry); |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override long RamBytesUsed() => ramBytesUsed; |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override void CheckIntegrity() |
| { |
| if (version >= Lucene45DocValuesFormat.VERSION_CHECKSUM) |
| { |
| CodecUtil.ChecksumEntireFile(data); |
| } |
| } |
| |
| internal virtual Int64Values GetNumeric(NumericEntry entry) |
| { |
| IndexInput data = (IndexInput)this.data.Clone(); |
| data.Seek(entry.Offset); |
| |
| switch (entry.format) |
| { |
| case Lucene45DocValuesConsumer.DELTA_COMPRESSED: |
| BlockPackedReader reader = new BlockPackedReader(data, entry.PackedInt32sVersion, entry.BlockSize, entry.Count, true); |
| return reader; |
| |
| case Lucene45DocValuesConsumer.GCD_COMPRESSED: |
| long min = entry.minValue; |
| long mult = entry.gcd; |
| BlockPackedReader quotientReader = new BlockPackedReader(data, entry.PackedInt32sVersion, entry.BlockSize, entry.Count, true); |
| return new Int64ValuesAnonymousInnerClassHelper(min, mult, quotientReader); |
| |
| case Lucene45DocValuesConsumer.TABLE_COMPRESSED: |
| long[] table = entry.table; |
| int bitsRequired = PackedInt32s.BitsRequired(table.Length - 1); |
| PackedInt32s.Reader ords = PackedInt32s.GetDirectReaderNoHeader(data, PackedInt32s.Format.PACKED, entry.PackedInt32sVersion, (int)entry.Count, bitsRequired); |
| return new Int64ValuesAnonymousInnerClassHelper2(table, ords); |
| |
| default: |
| throw new Exception(); |
| } |
| } |
| |
| private class Int64ValuesAnonymousInnerClassHelper : Int64Values |
| { |
| private readonly long min; |
| private readonly long mult; |
| private readonly BlockPackedReader quotientReader; |
| |
| public Int64ValuesAnonymousInnerClassHelper(long min, long mult, BlockPackedReader quotientReader) |
| { |
| this.min = min; |
| this.mult = mult; |
| this.quotientReader = quotientReader; |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override long Get(long id) |
| { |
| return min + mult * quotientReader.Get(id); |
| } |
| } |
| |
| private class Int64ValuesAnonymousInnerClassHelper2 : Int64Values |
| { |
| private readonly long[] table; |
| private readonly PackedInt32s.Reader ords; |
| |
| public Int64ValuesAnonymousInnerClassHelper2(long[] table, PackedInt32s.Reader ords) |
| { |
| this.table = table; |
| this.ords = ords; |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override long Get(long id) |
| { |
| return table[(int)ords.Get((int)id)]; |
| } |
| } |
| |
| public override BinaryDocValues GetBinary(FieldInfo field) |
| { |
| BinaryEntry bytes = binaries[field.Number]; |
| switch (bytes.format) |
| { |
| case Lucene45DocValuesConsumer.BINARY_FIXED_UNCOMPRESSED: |
| return GetFixedBinary(/*field, LUCENENET: Never read */ bytes); |
| |
| case Lucene45DocValuesConsumer.BINARY_VARIABLE_UNCOMPRESSED: |
| return GetVariableBinary(field, bytes); |
| |
| case Lucene45DocValuesConsumer.BINARY_PREFIX_COMPRESSED: |
| return GetCompressedBinary(field, bytes); |
| |
| default: |
| throw new Exception(); |
| } |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| private BinaryDocValues GetFixedBinary(/* FieldInfo field, // LUCENENET: Never read */ BinaryEntry bytes) |
| { |
| IndexInput data = (IndexInput)this.data.Clone(); |
| |
| return new Int64BinaryDocValuesAnonymousInnerClassHelper(bytes, data); |
| } |
| |
| private class Int64BinaryDocValuesAnonymousInnerClassHelper : Int64BinaryDocValues |
| { |
| private readonly Lucene45DocValuesProducer.BinaryEntry bytes; |
| private readonly IndexInput data; |
| |
| public Int64BinaryDocValuesAnonymousInnerClassHelper(Lucene45DocValuesProducer.BinaryEntry bytes, IndexInput data) |
| { |
| this.bytes = bytes; |
| this.data = data; |
| } |
| |
| public override void Get(long id, BytesRef result) |
| { |
| long address = bytes.offset + id * bytes.maxLength; |
| try |
| { |
| data.Seek(address); |
| // NOTE: we could have one buffer, but various consumers (e.g. FieldComparerSource) |
| // assume "they" own the bytes after calling this! |
| var buffer = new byte[bytes.maxLength]; |
| data.ReadBytes(buffer, 0, buffer.Length); |
| result.Bytes = buffer; |
| result.Offset = 0; |
| result.Length = buffer.Length; |
| } |
| catch (IOException e) |
| { |
| throw new Exception(e.ToString(), e); |
| } |
| } |
| } |
| |
| /// <summary> |
| /// Returns an address instance for variable-length binary values. |
| /// <para/> |
| /// @lucene.internal |
| /// </summary> |
| protected virtual MonotonicBlockPackedReader GetAddressInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) |
| { |
| MonotonicBlockPackedReader addresses; |
| lock (addressInstances) |
| { |
| if (!addressInstances.TryGetValue(field.Number, out MonotonicBlockPackedReader addrInstance) || addrInstance == null) |
| { |
| data.Seek(bytes.AddressesOffset); |
| addrInstance = new MonotonicBlockPackedReader(data, bytes.PackedInt32sVersion, bytes.BlockSize, bytes.Count, false); |
| addressInstances[field.Number] = addrInstance; |
| ramBytesUsed.AddAndGet(addrInstance.RamBytesUsed() + RamUsageEstimator.NUM_BYTES_INT32); |
| } |
| addresses = addrInstance; |
| } |
| return addresses; |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| private BinaryDocValues GetVariableBinary(FieldInfo field, BinaryEntry bytes) |
| { |
| IndexInput data = (IndexInput)this.data.Clone(); |
| |
| MonotonicBlockPackedReader addresses = GetAddressInstance(data, field, bytes); |
| |
| return new Int64BinaryDocValuesAnonymousInnerClassHelper2(bytes, data, addresses); |
| } |
| |
| private class Int64BinaryDocValuesAnonymousInnerClassHelper2 : Int64BinaryDocValues |
| { |
| private readonly Lucene45DocValuesProducer.BinaryEntry bytes; |
| private readonly IndexInput data; |
| private readonly MonotonicBlockPackedReader addresses; |
| |
| public Int64BinaryDocValuesAnonymousInnerClassHelper2(Lucene45DocValuesProducer.BinaryEntry bytes, IndexInput data, MonotonicBlockPackedReader addresses) |
| { |
| this.bytes = bytes; |
| this.data = data; |
| this.addresses = addresses; |
| } |
| |
| public override void Get(long id, BytesRef result) |
| { |
| long startAddress = bytes.offset + (id == 0 ? 0 : addresses.Get(id - 1)); |
| long endAddress = bytes.offset + addresses.Get(id); |
| int length = (int)(endAddress - startAddress); |
| try |
| { |
| data.Seek(startAddress); |
| // NOTE: we could have one buffer, but various consumers (e.g. FieldComparerSource) |
| // assume "they" own the bytes after calling this! |
| var buffer = new byte[length]; |
| data.ReadBytes(buffer, 0, buffer.Length); |
| result.Bytes = buffer; |
| result.Offset = 0; |
| result.Length = length; |
| } |
| catch (IOException e) |
| { |
| throw new Exception(e.ToString(), e); |
| } |
| } |
| } |
| |
| /// <summary> |
| /// Returns an address instance for prefix-compressed binary values. |
| /// <para/> |
| /// @lucene.internal |
| /// </summary> |
| protected virtual MonotonicBlockPackedReader GetIntervalInstance(IndexInput data, FieldInfo field, BinaryEntry bytes) |
| { |
| MonotonicBlockPackedReader addresses; |
| long interval = bytes.AddressInterval; |
| lock (addressInstances) |
| { |
| if (!addressInstances.TryGetValue(field.Number, out MonotonicBlockPackedReader addrInstance)) |
| { |
| data.Seek(bytes.AddressesOffset); |
| long size; |
| if (bytes.Count % interval == 0) |
| { |
| size = bytes.Count / interval; |
| } |
| else |
| { |
| size = 1L + bytes.Count / interval; |
| } |
| addrInstance = new MonotonicBlockPackedReader(data, bytes.PackedInt32sVersion, bytes.BlockSize, size, false); |
| addressInstances[field.Number] = addrInstance; |
| ramBytesUsed.AddAndGet(addrInstance.RamBytesUsed() + RamUsageEstimator.NUM_BYTES_INT32); |
| } |
| addresses = addrInstance; |
| } |
| return addresses; |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| private BinaryDocValues GetCompressedBinary(FieldInfo field, BinaryEntry bytes) |
| { |
| IndexInput data = (IndexInput)this.data.Clone(); |
| |
| MonotonicBlockPackedReader addresses = GetIntervalInstance(data, field, bytes); |
| |
| return new CompressedBinaryDocValues(bytes, addresses, data); |
| } |
| |
| public override SortedDocValues GetSorted(FieldInfo field) |
| { |
| int valueCount = (int)binaries[field.Number].Count; |
| BinaryDocValues binary = GetBinary(field); |
| NumericEntry entry = ords[field.Number]; |
| IndexInput data = (IndexInput)this.data.Clone(); |
| data.Seek(entry.Offset); |
| BlockPackedReader ordinals = new BlockPackedReader(data, entry.PackedInt32sVersion, entry.BlockSize, entry.Count, true); |
| |
| return new SortedDocValuesAnonymousInnerClassHelper(valueCount, binary, ordinals); |
| } |
| |
| private class SortedDocValuesAnonymousInnerClassHelper : SortedDocValues |
| { |
| private readonly int valueCount; |
| private readonly BinaryDocValues binary; |
| private readonly BlockPackedReader ordinals; |
| |
| public SortedDocValuesAnonymousInnerClassHelper(int valueCount, BinaryDocValues binary, BlockPackedReader ordinals) |
| { |
| this.valueCount = valueCount; |
| this.binary = binary; |
| this.ordinals = ordinals; |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override int GetOrd(int docID) |
| { |
| return (int)ordinals.Get(docID); |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override void LookupOrd(int ord, BytesRef result) |
| { |
| binary.Get(ord, result); |
| } |
| |
| public override int ValueCount => valueCount; |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override int LookupTerm(BytesRef key) |
| { |
| if (binary is CompressedBinaryDocValues compressedBinaryDocValues) |
| { |
| return (int)compressedBinaryDocValues.LookupTerm(key); |
| } |
| else |
| { |
| return base.LookupTerm(key); |
| } |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override TermsEnum GetTermsEnum() |
| { |
| if (binary is CompressedBinaryDocValues compressedBinaryDocValues) |
| { |
| return compressedBinaryDocValues.GetTermsEnum(); |
| } |
| else |
| { |
| return base.GetTermsEnum(); |
| } |
| } |
| } |
| |
| /// <summary> |
| /// Returns an address instance for sortedset ordinal lists. |
| /// <para/> |
| /// @lucene.internal |
| /// </summary> |
| protected virtual MonotonicBlockPackedReader GetOrdIndexInstance(IndexInput data, FieldInfo field, NumericEntry entry) |
| { |
| MonotonicBlockPackedReader ordIndex; |
| lock (ordIndexInstances) |
| { |
| if (!ordIndexInstances.TryGetValue(field.Number, out MonotonicBlockPackedReader ordIndexInstance)) |
| { |
| data.Seek(entry.Offset); |
| ordIndexInstance = new MonotonicBlockPackedReader(data, entry.PackedInt32sVersion, entry.BlockSize, entry.Count, false); |
| ordIndexInstances[field.Number] = ordIndexInstance; |
| ramBytesUsed.AddAndGet(ordIndexInstance.RamBytesUsed() + RamUsageEstimator.NUM_BYTES_INT32); |
| } |
| ordIndex = ordIndexInstance; |
| } |
| return ordIndex; |
| } |
| |
| public override SortedSetDocValues GetSortedSet(FieldInfo field) |
| { |
| SortedSetEntry ss = sortedSets[field.Number]; |
| if (ss.Format == Lucene45DocValuesConsumer.SORTED_SET_SINGLE_VALUED_SORTED) |
| { |
| SortedDocValues values = GetSorted(field); |
| return DocValues.Singleton(values); |
| } |
| else if (ss.Format != Lucene45DocValuesConsumer.SORTED_SET_WITH_ADDRESSES) |
| { |
| throw new Exception(); |
| } |
| |
| IndexInput data = (IndexInput)this.data.Clone(); |
| long valueCount = binaries[field.Number].Count; |
| // we keep the byte[]s and list of ords on disk, these could be large |
| Int64BinaryDocValues binary = (Int64BinaryDocValues)GetBinary(field); |
| Int64Values ordinals = GetNumeric(ords[field.Number]); |
| // but the addresses to the ord stream are in RAM |
| MonotonicBlockPackedReader ordIndex = GetOrdIndexInstance(data, field, ordIndexes[field.Number]); |
| |
| return new RandomAccessOrdsAnonymousInnerClassHelper(valueCount, binary, ordinals, ordIndex); |
| } |
| |
| private class RandomAccessOrdsAnonymousInnerClassHelper : RandomAccessOrds |
| { |
| private readonly long valueCount; |
| private readonly Lucene45DocValuesProducer.Int64BinaryDocValues binary; |
| private readonly Int64Values ordinals; |
| private readonly MonotonicBlockPackedReader ordIndex; |
| |
| public RandomAccessOrdsAnonymousInnerClassHelper(long valueCount, Lucene45DocValuesProducer.Int64BinaryDocValues binary, Int64Values ordinals, MonotonicBlockPackedReader ordIndex) |
| { |
| this.valueCount = valueCount; |
| this.binary = binary; |
| this.ordinals = ordinals; |
| this.ordIndex = ordIndex; |
| } |
| |
| internal long startOffset; |
| internal long offset; |
| internal long endOffset; |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override long NextOrd() |
| { |
| if (offset == endOffset) |
| { |
| return NO_MORE_ORDS; |
| } |
| else |
| { |
| long ord = ordinals.Get(offset); |
| offset++; |
| return ord; |
| } |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override void SetDocument(int docID) |
| { |
| startOffset = offset = (docID == 0 ? 0 : ordIndex.Get(docID - 1)); |
| endOffset = ordIndex.Get(docID); |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override void LookupOrd(long ord, BytesRef result) |
| { |
| binary.Get(ord, result); |
| } |
| |
| public override long ValueCount => valueCount; |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override long LookupTerm(BytesRef key) |
| { |
| if (binary is CompressedBinaryDocValues compressedBinaryDocValues) |
| { |
| return compressedBinaryDocValues.LookupTerm(key); |
| } |
| else |
| { |
| return base.LookupTerm(key); |
| } |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override TermsEnum GetTermsEnum() |
| { |
| if (binary is CompressedBinaryDocValues compressedBinaryDocValues) |
| { |
| return compressedBinaryDocValues.GetTermsEnum(); |
| } |
| else |
| { |
| return base.GetTermsEnum(); |
| } |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override long OrdAt(int index) |
| { |
| return ordinals.Get(startOffset + index); |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override int Cardinality() |
| { |
| return (int)(endOffset - startOffset); |
| } |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| private IBits GetMissingBits(long offset) |
| { |
| if (offset == -1) |
| { |
| return new Bits.MatchAllBits(maxDoc); |
| } |
| else |
| { |
| IndexInput @in = (IndexInput)data.Clone(); |
| return new BitsAnonymousInnerClassHelper(this, offset, @in); |
| } |
| } |
| |
| private class BitsAnonymousInnerClassHelper : IBits |
| { |
| private readonly Lucene45DocValuesProducer outerInstance; |
| |
| private readonly long offset; |
| private readonly IndexInput @in; |
| |
| public BitsAnonymousInnerClassHelper(Lucene45DocValuesProducer outerInstance, long offset, IndexInput @in) |
| { |
| this.outerInstance = outerInstance; |
| this.offset = offset; |
| this.@in = @in; |
| } |
| |
| public virtual bool Get(int index) |
| { |
| try |
| { |
| @in.Seek(offset + (index >> 3)); |
| return (@in.ReadByte() & (1 << (index & 7))) != 0; |
| } |
| catch (IOException e) |
| { |
| throw new Exception(e.ToString(), e); |
| } |
| } |
| |
| public virtual int Length => outerInstance.maxDoc; |
| } |
| |
| public override IBits GetDocsWithField(FieldInfo field) |
| { |
| switch (field.DocValuesType) |
| { |
| case DocValuesType.SORTED_SET: |
| return DocValues.DocsWithValue(GetSortedSet(field), maxDoc); |
| |
| case DocValuesType.SORTED: |
| return DocValues.DocsWithValue(GetSorted(field), maxDoc); |
| |
| case DocValuesType.BINARY: |
| BinaryEntry be = binaries[field.Number]; |
| return GetMissingBits(be.missingOffset); |
| |
| case DocValuesType.NUMERIC: |
| NumericEntry ne = numerics[field.Number]; |
| return GetMissingBits(ne.missingOffset); |
| |
| default: |
| throw new InvalidOperationException(); |
| } |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| protected override void Dispose(bool disposing) |
| { |
| if (disposing) |
| data.Dispose(); |
| } |
| |
| /// <summary> |
| /// Metadata entry for a numeric docvalues field. </summary> |
| protected internal class NumericEntry |
| { |
| internal NumericEntry() |
| { |
| } |
| |
| /// <summary> |
| /// Offset to the bitset representing docsWithField, or -1 if no documents have missing values. </summary> |
| internal long missingOffset; |
| |
| /// <summary> |
| /// Offset to the actual numeric values. </summary> |
| public long Offset { get; set; } |
| |
| internal int format; |
| |
| /// <summary> |
| /// Packed <see cref="int"/>s version used to encode these numerics. |
| /// <para/> |
| /// NOTE: This was packedIntsVersion (field) in Lucene |
| /// </summary> |
| public int PackedInt32sVersion { get; set; } |
| |
| /// <summary> |
| /// Count of values written. </summary> |
| public long Count { get; set; } |
| |
| /// <summary> |
| /// Packed <see cref="int"/>s blocksize. </summary> |
| public int BlockSize { get; set; } |
| |
| internal long minValue; |
| internal long gcd; |
| internal long[] table; |
| } |
| |
| /// <summary> |
| /// Metadata entry for a binary docvalues field. </summary> |
| protected internal class BinaryEntry |
| { |
| internal BinaryEntry() |
| { |
| } |
| |
| /// <summary> |
| /// Offset to the bitset representing docsWithField, or -1 if no documents have missing values. </summary> |
| internal long missingOffset; |
| |
| /// <summary> |
| /// Offset to the actual binary values. </summary> |
| internal long offset; |
| |
| internal int format; |
| |
| /// <summary> |
| /// Count of values written. </summary> |
| public long Count { get; set; } |
| |
| internal int minLength; |
| internal int maxLength; |
| |
| /// <summary> |
| /// Offset to the addressing data that maps a value to its slice of the <see cref="T:byte[]"/>. </summary> |
| public long AddressesOffset { get; set; } |
| |
| /// <summary> |
| /// Interval of shared prefix chunks (when using prefix-compressed binary). </summary> |
| public long AddressInterval { get; set; } |
| |
| /// <summary> |
| /// Packed ints version used to encode addressing information. |
| /// <para/> |
| /// NOTE: This was packedIntsVersion (field) in Lucene. |
| /// </summary> |
| public int PackedInt32sVersion { get; set; } |
| |
| /// <summary> |
| /// Packed ints blocksize. </summary> |
| public int BlockSize { get; set; } |
| } |
| |
| /// <summary> |
| /// Metadata entry for a sorted-set docvalues field. </summary> |
| protected internal class SortedSetEntry |
| { |
| internal SortedSetEntry() |
| { |
| } |
| |
| internal int Format { get; set; } |
| } |
| |
| // internally we compose complex dv (sorted/sortedset) from other ones |
| /// <summary> |
| /// NOTE: This was LongBinaryDocValues in Lucene. |
| /// </summary> |
| internal abstract class Int64BinaryDocValues : BinaryDocValues |
| { |
| public override sealed void Get(int docID, BytesRef result) |
| { |
| Get((long)docID, result); |
| } |
| |
| public abstract void Get(long id, BytesRef result); |
| } |
| |
| // in the compressed case, we add a few additional operations for |
| // more efficient reverse lookup and enumeration |
| internal class CompressedBinaryDocValues : Int64BinaryDocValues |
| { |
| internal readonly BinaryEntry bytes; |
| internal readonly long interval; |
| internal readonly long numValues; |
| internal readonly long numIndexValues; |
| internal readonly MonotonicBlockPackedReader addresses; |
| internal readonly IndexInput data; |
| internal readonly TermsEnum termsEnum; |
| |
| public CompressedBinaryDocValues(BinaryEntry bytes, MonotonicBlockPackedReader addresses, IndexInput data) |
| { |
| this.bytes = bytes; |
| this.interval = bytes.AddressInterval; |
| this.addresses = addresses; |
| this.data = data; |
| this.numValues = bytes.Count; |
| this.numIndexValues = addresses.Count; |
| this.termsEnum = GetTermsEnum(data); |
| } |
| |
| public override void Get(long id, BytesRef result) |
| { |
| try |
| { |
| termsEnum.SeekExact(id); |
| BytesRef term = termsEnum.Term; |
| result.Bytes = term.Bytes; |
| result.Offset = term.Offset; |
| result.Length = term.Length; |
| } |
| catch (IOException e) |
| { |
| throw new Exception(e.ToString(), e); |
| } |
| } |
| |
| internal virtual long LookupTerm(BytesRef key) |
| { |
| try |
| { |
| TermsEnum.SeekStatus status = termsEnum.SeekCeil(key); |
| if (status == TermsEnum.SeekStatus.END) |
| { |
| return -numValues - 1; |
| } |
| else if (status == TermsEnum.SeekStatus.FOUND) |
| { |
| return termsEnum.Ord; |
| } |
| else |
| { |
| return -termsEnum.Ord - 1; |
| } |
| } |
| catch (IOException bogus) |
| { |
| throw new Exception(bogus.ToString(), bogus); |
| } |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| internal virtual TermsEnum GetTermsEnum() |
| { |
| try |
| { |
| return GetTermsEnum((IndexInput)data.Clone()); |
| } |
| catch (IOException e) |
| { |
| throw new Exception(e.ToString(), e); |
| } |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| internal virtual TermsEnum GetTermsEnum(IndexInput input) |
| { |
| input.Seek(bytes.offset); |
| |
| return new TermsEnumAnonymousInnerClassHelper(this, input); |
| } |
| |
| private class TermsEnumAnonymousInnerClassHelper : TermsEnum |
| { |
| private readonly CompressedBinaryDocValues outerInstance; |
| |
| private readonly IndexInput input; |
| |
| public TermsEnumAnonymousInnerClassHelper(CompressedBinaryDocValues outerInstance, IndexInput input) |
| { |
| this.outerInstance = outerInstance; |
| this.input = input; |
| currentOrd = -1; |
| termBuffer = new BytesRef(outerInstance.bytes.maxLength < 0 ? 0 : outerInstance.bytes.maxLength); |
| term = new BytesRef(); |
| } |
| |
| private long currentOrd; |
| |
| // TODO: maxLength is negative when all terms are merged away... |
| private readonly BytesRef termBuffer; |
| |
| private readonly BytesRef term; |
| |
| // LUCENENET specific - factored out DoNext() and made into MoveNext() |
| public override bool MoveNext() |
| { |
| if (++currentOrd >= outerInstance.numValues) |
| { |
| return false; |
| } |
| else |
| { |
| int start = input.ReadVInt32(); |
| int suffix = input.ReadVInt32(); |
| input.ReadBytes(termBuffer.Bytes, start, suffix); |
| termBuffer.Length = start + suffix; |
| SetTerm(); |
| return true; |
| } |
| } |
| |
| [Obsolete("Use MoveNext() and Term instead. This method will be removed in 4.8.0 release candidate."), System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] |
| public override BytesRef Next() |
| { |
| if (MoveNext()) |
| return term; |
| return null; |
| } |
| |
| public override TermsEnum.SeekStatus SeekCeil(BytesRef text) |
| { |
| // binary-search just the index values to find the block, |
| // then scan within the block |
| long low = 0; |
| long high = outerInstance.numIndexValues - 1; |
| |
| while (low <= high) |
| { |
| long mid = (low + high).TripleShift(1); |
| DoSeek(mid * outerInstance.interval); |
| int cmp = termBuffer.CompareTo(text); |
| |
| if (cmp < 0) |
| { |
| low = mid + 1; |
| } |
| else if (cmp > 0) |
| { |
| high = mid - 1; |
| } |
| else |
| { |
| // we got lucky, found an indexed term |
| SetTerm(); |
| return TermsEnum.SeekStatus.FOUND; |
| } |
| } |
| |
| if (outerInstance.numIndexValues == 0) |
| { |
| return TermsEnum.SeekStatus.END; |
| } |
| |
| // block before insertion point |
| long block = low - 1; |
| DoSeek(block < 0 ? -1 : block * outerInstance.interval); |
| |
| while (MoveNext()) |
| { |
| int cmp = termBuffer.CompareTo(text); |
| if (cmp == 0) |
| { |
| SetTerm(); |
| return TermsEnum.SeekStatus.FOUND; |
| } |
| else if (cmp > 0) |
| { |
| SetTerm(); |
| return TermsEnum.SeekStatus.NOT_FOUND; |
| } |
| } |
| |
| return TermsEnum.SeekStatus.END; |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| public override void SeekExact(long ord) |
| { |
| DoSeek(ord); |
| SetTerm(); |
| } |
| |
| private void DoSeek(long ord) |
| { |
| long block = ord / outerInstance.interval; |
| |
| if (ord >= currentOrd && block == currentOrd / outerInstance.interval) |
| { |
| // seek within current block |
| } |
| else |
| { |
| // position before start of block |
| currentOrd = ord - ord % outerInstance.interval - 1; |
| input.Seek(outerInstance.bytes.offset + outerInstance.addresses.Get(block)); |
| } |
| |
| while (currentOrd < ord) |
| { |
| MoveNext(); |
| } |
| } |
| |
| [MethodImpl(MethodImplOptions.AggressiveInlining)] |
| private void SetTerm() |
| { |
| // TODO: is there a cleaner way |
| term.Bytes = new byte[termBuffer.Length]; |
| term.Offset = 0; |
| term.CopyBytes(termBuffer); |
| } |
| |
| public override BytesRef Term => term; |
| |
| public override long Ord => currentOrd; |
| |
| public override IComparer<BytesRef> Comparer => BytesRef.UTF8SortedAsUnicodeComparer; |
| |
| public override int DocFreq => throw new NotSupportedException(); |
| |
| public override long TotalTermFreq => -1; |
| |
| public override DocsEnum Docs(IBits liveDocs, DocsEnum reuse, DocsFlags flags) |
| { |
| throw new NotSupportedException(); |
| } |
| |
| public override DocsAndPositionsEnum DocsAndPositions(IBits liveDocs, DocsAndPositionsEnum reuse, DocsAndPositionsFlags flags) |
| { |
| throw new NotSupportedException(); |
| } |
| } |
| } |
| } |
| } |