blob: 41eaef90fe185457cc461214264f5842671129c4 [file] [log] [blame]
using J2N.Threading.Atomic;
using Lucene.Net.Diagnostics;
using Lucene.Net.Index;
using Lucene.Net.Store;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
using System.Diagnostics;
namespace Lucene.Net.Codecs.Memory
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// TextReader for <see cref="DirectDocValuesFormat"/>.
/// </summary>
internal class DirectDocValuesProducer : DocValuesProducer
{
// metadata maps (just file pointers and minimal stuff)
private readonly IDictionary<int?, NumericEntry> numerics = new Dictionary<int?, NumericEntry>();
private readonly IDictionary<int?, BinaryEntry> binaries = new Dictionary<int?, BinaryEntry>();
private readonly IDictionary<int?, SortedEntry> sorteds = new Dictionary<int?, SortedEntry>();
private readonly IDictionary<int?, SortedSetEntry> sortedSets = new Dictionary<int?, SortedSetEntry>();
private readonly IndexInput data;
// ram instances we have already loaded
private readonly IDictionary<int?, NumericDocValues> numericInstances = new Dictionary<int?, NumericDocValues>();
private readonly IDictionary<int?, BinaryDocValues> binaryInstances = new Dictionary<int?, BinaryDocValues>();
private readonly IDictionary<int?, SortedDocValues> sortedInstances = new Dictionary<int?, SortedDocValues>();
private readonly IDictionary<int?, SortedSetRawValues> sortedSetInstances = new Dictionary<int?, SortedSetRawValues>();
private readonly IDictionary<int?, IBits> docsWithFieldInstances = new Dictionary<int?, IBits>();
private readonly int maxDoc;
private readonly AtomicInt64 ramBytesUsed;
private readonly int version;
internal const sbyte NUMBER = 0;
internal const sbyte BYTES = 1;
internal const sbyte SORTED = 2;
internal const sbyte SORTED_SET = 3;
internal const int VERSION_START = 0;
internal const int VERSION_CHECKSUM = 1;
internal const int VERSION_CURRENT = VERSION_CHECKSUM;
internal DirectDocValuesProducer(SegmentReadState state, string dataCodec, string dataExtension, string metaCodec, string metaExtension)
{
maxDoc = state.SegmentInfo.DocCount;
string metaName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension);
// read in the entries from the metadata file.
ChecksumIndexInput @in = state.Directory.OpenChecksumInput(metaName, state.Context);
ramBytesUsed = new AtomicInt64(RamUsageEstimator.ShallowSizeOfInstance(this.GetType()));
bool success = false;
try
{
version = CodecUtil.CheckHeader(@in, metaCodec, VERSION_START, VERSION_CURRENT);
ReadFields(@in);
if (version >= VERSION_CHECKSUM)
{
CodecUtil.CheckFooter(@in);
}
else
{
#pragma warning disable 612, 618
CodecUtil.CheckEOF(@in);
#pragma warning restore 612, 618
}
success = true;
}
finally
{
if (success)
{
IOUtils.Dispose(@in);
}
else
{
IOUtils.DisposeWhileHandlingException(@in);
}
}
success = false;
try
{
string dataName = IndexFileNames.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension);
data = state.Directory.OpenInput(dataName, state.Context);
int version2 = CodecUtil.CheckHeader(data, dataCodec, VERSION_START, VERSION_CURRENT);
if (version != version2)
{
throw new CorruptIndexException("Format versions mismatch");
}
success = true;
}
finally
{
if (!success)
{
IOUtils.DisposeWhileHandlingException(this.data);
}
}
}
private static NumericEntry ReadNumericEntry(IndexInput meta) // LUCENENET: CA1822: Mark members as static
{
var entry = new NumericEntry { offset = meta.ReadInt64(), count = meta.ReadInt32(), missingOffset = meta.ReadInt64() };
if (entry.missingOffset != -1)
{
entry.missingBytes = meta.ReadInt64();
}
else
{
entry.missingBytes = 0;
}
entry.byteWidth = meta.ReadByte();
return entry;
}
private static BinaryEntry ReadBinaryEntry(IndexInput meta) // LUCENENET: CA1822: Mark members as static
{
var entry = new BinaryEntry();
entry.offset = meta.ReadInt64();
entry.numBytes = meta.ReadInt32();
entry.count = meta.ReadInt32();
entry.missingOffset = meta.ReadInt64();
if (entry.missingOffset != -1)
{
entry.missingBytes = meta.ReadInt64();
}
else
{
entry.missingBytes = 0;
}
return entry;
}
private static SortedEntry ReadSortedEntry(IndexInput meta) // LUCENENET: CA1822: Mark members as static
{
var entry = new SortedEntry();
entry.docToOrd = ReadNumericEntry(meta);
entry.values = ReadBinaryEntry(meta);
return entry;
}
private static SortedSetEntry ReadSortedSetEntry(IndexInput meta) // LUCENENET: CA1822: Mark members as static
{
var entry = new SortedSetEntry();
entry.docToOrdAddress = ReadNumericEntry(meta);
entry.ords = ReadNumericEntry(meta);
entry.values = ReadBinaryEntry(meta);
return entry;
}
private void ReadFields(IndexInput meta)
{
int fieldNumber = meta.ReadVInt32();
while (fieldNumber != -1)
{
int fieldType = meta.ReadByte();
if (fieldType == NUMBER)
{
numerics[fieldNumber] = ReadNumericEntry(meta);
}
else if (fieldType == BYTES)
{
binaries[fieldNumber] = ReadBinaryEntry(meta);
}
else if (fieldType == SORTED)
{
sorteds[fieldNumber] = ReadSortedEntry(meta);
}
else if (fieldType == SORTED_SET)
{
sortedSets[fieldNumber] = ReadSortedSetEntry(meta);
}
else
{
throw new CorruptIndexException("invalid entry type: " + fieldType + ", input=" + meta);
}
fieldNumber = meta.ReadVInt32();
}
}
public override long RamBytesUsed() => ramBytesUsed;
public override void CheckIntegrity()
{
if (version >= VERSION_CHECKSUM)
{
CodecUtil.ChecksumEntireFile(data);
}
}
public override NumericDocValues GetNumeric(FieldInfo field)
{
lock (this)
{
if (!numericInstances.TryGetValue(field.Number, out NumericDocValues instance))
{
// Lazy load
instance = LoadNumeric(numerics[field.Number]);
numericInstances[field.Number] = instance;
}
return instance;
}
}
private NumericDocValues LoadNumeric(NumericEntry entry)
{
data.Seek(entry.offset + entry.missingBytes);
switch (entry.byteWidth)
{
case 1:
{
var values = new byte[entry.count];
data.ReadBytes(values, 0, entry.count);
ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values));
// LUCENENET: IMPORTANT - some bytes are negative here, so we need to pass as sbyte
return new NumericDocValuesAnonymousClass((sbyte[])(Array)values);
}
case 2:
{
var values = new short[entry.count];
for (int i = 0; i < entry.count; i++)
{
values[i] = data.ReadInt16();
}
ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values));
return new NumericDocValuesAnonymousClass2(values);
}
case 4:
{
var values = new int[entry.count];
for (var i = 0; i < entry.count; i++)
{
values[i] = data.ReadInt32();
}
ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values));
return new NumericDocValuesAnonymousClass3(values);
}
case 8:
{
var values = new long[entry.count];
for (int i = 0; i < entry.count; i++)
{
values[i] = data.ReadInt64();
}
ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(values));
return new NumericDocValuesAnonymousClass4(values);
}
default:
throw new InvalidOperationException();
}
}
private class NumericDocValuesAnonymousClass : NumericDocValues
{
private readonly sbyte[] values;
public NumericDocValuesAnonymousClass(sbyte[] values)
{
this.values = values;
}
public override long Get(int idx)
{
return values[idx];
}
}
private class NumericDocValuesAnonymousClass2 : NumericDocValues
{
private readonly short[] values;
public NumericDocValuesAnonymousClass2(short[] values)
{
this.values = values;
}
public override long Get(int idx)
{
return values[idx];
}
}
private class NumericDocValuesAnonymousClass3 : NumericDocValues
{
private readonly int[] values;
public NumericDocValuesAnonymousClass3(int[] values)
{
this.values = values;
}
public override long Get(int idx)
{
return values[idx];
}
}
private class NumericDocValuesAnonymousClass4 : NumericDocValues
{
private readonly long[] values;
public NumericDocValuesAnonymousClass4(long[] values)
{
this.values = values;
}
public override long Get(int idx)
{
return values[idx];
}
}
public override BinaryDocValues GetBinary(FieldInfo field)
{
lock (this)
{
if (!binaryInstances.TryGetValue(field.Number, out BinaryDocValues instance))
{
// Lazy load
instance = LoadBinary(binaries[field.Number]);
binaryInstances[field.Number] = instance;
}
return instance;
}
}
private BinaryDocValues LoadBinary(BinaryEntry entry)
{
data.Seek(entry.offset);
var bytes = new byte[entry.numBytes];
data.ReadBytes(bytes, 0, entry.numBytes);
data.Seek(entry.offset + entry.numBytes + entry.missingBytes);
var address = new int[entry.count + 1];
for (int i = 0; i < entry.count; i++)
{
address[i] = data.ReadInt32();
}
address[entry.count] = data.ReadInt32();
ramBytesUsed.AddAndGet(RamUsageEstimator.SizeOf(bytes) + RamUsageEstimator.SizeOf(address));
return new BinaryDocValuesAnonymousClass(bytes, address);
}
private class BinaryDocValuesAnonymousClass : BinaryDocValues
{
private readonly byte[] bytes;
private readonly int[] address;
public BinaryDocValuesAnonymousClass(byte[] bytes, int[] address)
{
this.bytes = bytes;
this.address = address;
}
public override void Get(int docID, BytesRef result)
{
result.Bytes = bytes;
result.Offset = address[docID];
result.Length = address[docID + 1] - result.Offset;
}
}
public override SortedDocValues GetSorted(FieldInfo field)
{
lock (this)
{
if (!sortedInstances.TryGetValue(field.Number, out SortedDocValues instance))
{
// Lazy load
instance = LoadSorted(field);
sortedInstances[field.Number] = instance;
}
return instance;
}
}
private SortedDocValues LoadSorted(FieldInfo field)
{
SortedEntry entry = sorteds[field.Number];
NumericDocValues docToOrd = LoadNumeric(entry.docToOrd);
BinaryDocValues values = LoadBinary(entry.values);
return new SortedDocValuesAnonymousClass(entry, docToOrd, values);
}
private class SortedDocValuesAnonymousClass : SortedDocValues
{
private readonly SortedEntry entry;
private readonly NumericDocValues docToOrd;
private readonly BinaryDocValues values;
public SortedDocValuesAnonymousClass(SortedEntry entry, NumericDocValues docToOrd, BinaryDocValues values)
{
this.entry = entry;
this.docToOrd = docToOrd;
this.values = values;
}
public override int GetOrd(int docID)
{
return (int)docToOrd.Get(docID);
}
public override void LookupOrd(int ord, BytesRef result)
{
values.Get(ord, result);
}
public override int ValueCount => entry.values.count;
// Leave lookupTerm to super's binary search
// Leave termsEnum to super
}
public override SortedSetDocValues GetSortedSet(FieldInfo field)
{
lock (this)
{
var entry = sortedSets[field.Number];
if (!sortedSetInstances.TryGetValue(field.Number, out SortedSetRawValues instance))
{
// Lazy load
instance = LoadSortedSet(entry);
sortedSetInstances[field.Number] = instance;
}
var docToOrdAddress = instance.docToOrdAddress;
var ords = instance.ords;
var values = instance.values;
// Must make a new instance since the iterator has state:
return new RandomAccessOrdsAnonymousClass(entry, docToOrdAddress, ords, values);
}
}
private class RandomAccessOrdsAnonymousClass : RandomAccessOrds
{
private readonly SortedSetEntry entry;
private readonly NumericDocValues docToOrdAddress;
private readonly NumericDocValues ords;
private readonly BinaryDocValues values;
public RandomAccessOrdsAnonymousClass(SortedSetEntry entry, NumericDocValues docToOrdAddress, NumericDocValues ords, BinaryDocValues values)
{
this.entry = entry;
this.docToOrdAddress = docToOrdAddress;
this.ords = ords;
this.values = values;
}
private int ordStart;
private int ordUpto;
private int ordLimit;
public override long NextOrd()
{
if (ordUpto == ordLimit)
{
return NO_MORE_ORDS;
}
else
{
return ords.Get(ordUpto++);
}
}
public override void SetDocument(int docID)
{
ordStart = ordUpto = (int)docToOrdAddress.Get(docID);
ordLimit = (int)docToOrdAddress.Get(docID + 1);
}
public override void LookupOrd(long ord, BytesRef result)
{
values.Get((int)ord, result);
}
public override long ValueCount => entry.values.count;
public override long OrdAt(int index)
{
return ords.Get(ordStart + index);
}
public override int Cardinality()
{
return ordLimit - ordStart;
}
// Leave lookupTerm to super's binary search
// Leave termsEnum to super
}
private SortedSetRawValues LoadSortedSet(SortedSetEntry entry)
{
var instance = new SortedSetRawValues();
instance.docToOrdAddress = LoadNumeric(entry.docToOrdAddress);
instance.ords = LoadNumeric(entry.ords);
instance.values = LoadBinary(entry.values);
return instance;
}
private IBits GetMissingBits(int fieldNumber, long offset, long length)
{
if (offset == -1)
{
return new Bits.MatchAllBits(maxDoc);
}
else
{
IBits instance;
lock (this)
{
if (!docsWithFieldInstances.TryGetValue(fieldNumber, out instance))
{
var data = (IndexInput)this.data.Clone();
data.Seek(offset);
if (Debugging.AssertsEnabled) Debugging.Assert(length % 8 == 0);
var bits = new long[(int)length >> 3];
for (var i = 0; i < bits.Length; i++)
{
bits[i] = data.ReadInt64();
}
instance = new FixedBitSet(bits, maxDoc);
docsWithFieldInstances[fieldNumber] = instance;
}
}
return instance;
}
}
public override IBits GetDocsWithField(FieldInfo field)
{
switch (field.DocValuesType)
{
case DocValuesType.SORTED_SET:
return DocValues.DocsWithValue(GetSortedSet(field), maxDoc);
case DocValuesType.SORTED:
return DocValues.DocsWithValue(GetSorted(field), maxDoc);
case DocValuesType.BINARY:
BinaryEntry be = binaries[field.Number];
return GetMissingBits(field.Number, be.missingOffset, be.missingBytes);
case DocValuesType.NUMERIC:
NumericEntry ne = numerics[field.Number];
return GetMissingBits(field.Number, ne.missingOffset, ne.missingBytes);
default:
throw new ArgumentOutOfRangeException();
}
}
protected override void Dispose(bool disposing)
{
if (disposing)
data.Dispose();
}
internal class SortedSetRawValues
{
internal NumericDocValues docToOrdAddress;
internal NumericDocValues ords;
internal BinaryDocValues values;
}
internal class NumericEntry
{
internal long offset;
internal int count;
internal long missingOffset;
internal long missingBytes;
internal byte byteWidth;
#pragma warning disable 649 // LUCENENET NOTE: Never assigned
internal int packedIntsVersion;
#pragma warning restore 649
}
internal class BinaryEntry
{
internal long offset;
internal long missingOffset;
internal long missingBytes;
internal int count;
internal int numBytes;
#pragma warning disable 649 // LUCENENET NOTE: Never assigned
internal int minLength;
internal int maxLength;
internal int packedIntsVersion;
internal int blockSize;
#pragma warning restore 649
}
internal class SortedEntry
{
internal NumericEntry docToOrd;
internal BinaryEntry values;
}
internal class SortedSetEntry
{
internal NumericEntry docToOrdAddress;
internal NumericEntry ords;
internal BinaryEntry values;
}
internal class FSTEntry
{
#pragma warning disable 649 // LUCENENET NOTE: Never assigned
internal long offset;
internal long numOrds;
#pragma warning restore 649
}
}
}