blob: 3c525bf689b0284b1f5c05c35729a4c871f7762c [file] [log] [blame]
using J2N.Numerics;
using Lucene.Net.Diagnostics;
using Lucene.Net.Index;
using Lucene.Net.Store;
using Lucene.Net.Util;
using System.Diagnostics;
namespace Lucene.Net.Codecs.Sep
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// TODO: -- should we switch "hasProx" higher up? and
// create two separate docs readers, one that also reads
// prox and one that doesn't?
/// <summary>
/// Concrete class that reads the current doc/freq/skip
/// postings format.
/// <para/>
/// @lucene.experimental
/// </summary>
public class SepPostingsReader : PostingsReaderBase
{
#pragma warning disable CA2213 // Disposable fields should be disposed
private readonly Int32IndexInput freqIn;
private readonly Int32IndexInput docIn;
private readonly Int32IndexInput posIn;
private readonly IndexInput payloadIn;
private readonly IndexInput skipIn;
#pragma warning restore CA2213 // Disposable fields should be disposed
private int skipInterval;
private int maxSkipLevels;
private int skipMinimum;
public SepPostingsReader(Directory dir, FieldInfos fieldInfos, SegmentInfo segmentInfo, IOContext context,
Int32StreamFactory intFactory, string segmentSuffix)
{
bool success = false;
try
{
string docFileName = IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.DOC_EXTENSION);
docIn = intFactory.OpenInput(dir, docFileName, context);
skipIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.SKIP_EXTENSION), context);
if (fieldInfos.HasFreq)
{
freqIn = intFactory.OpenInput(dir, IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.FREQ_EXTENSION), context);
}
else
{
freqIn = null;
}
if (fieldInfos.HasProx)
{
posIn = intFactory.OpenInput(dir, IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.POS_EXTENSION), context);
payloadIn = dir.OpenInput(IndexFileNames.SegmentFileName(segmentInfo.Name, segmentSuffix, SepPostingsWriter.PAYLOAD_EXTENSION), context);
}
else
{
posIn = null;
payloadIn = null;
}
success = true;
}
finally
{
if (!success)
{
Dispose();
}
}
}
public override void Init(IndexInput termsIn)
{
// Make sure we are talking to the matching past writer
CodecUtil.CheckHeader(termsIn, SepPostingsWriter.CODEC,
SepPostingsWriter.VERSION_START, SepPostingsWriter.VERSION_START);
skipInterval = termsIn.ReadInt32();
maxSkipLevels = termsIn.ReadInt32();
skipMinimum = termsIn.ReadInt32();
}
protected override void Dispose(bool disposing)
{
if (disposing)
{
IOUtils.Dispose(freqIn, docIn, skipIn, posIn, payloadIn);
}
}
internal sealed class SepTermState : BlockTermState
{
// We store only the seek point to the docs file because
// the rest of the info (freqIndex, posIndex, etc.) is
// stored in the docs file:
internal Int32IndexInput.Index docIndex;
internal Int32IndexInput.Index posIndex;
internal Int32IndexInput.Index freqIndex;
internal long payloadFP;
internal long skipFP;
public override object Clone()
{
var other = new SepTermState();
other.CopyFrom(this);
return other;
}
public override void CopyFrom(TermState other)
{
base.CopyFrom(other);
SepTermState other_ = (SepTermState)other;
if (docIndex == null)
{
docIndex = (Int32IndexInput.Index)other_.docIndex.Clone();
}
else
{
docIndex.CopyFrom(other_.docIndex);
}
if (other_.freqIndex != null)
{
if (freqIndex == null)
{
freqIndex = (Int32IndexInput.Index)other_.freqIndex.Clone();
}
else
{
freqIndex.CopyFrom(other_.freqIndex);
}
}
else
{
freqIndex = null;
}
if (other_.posIndex != null)
{
if (posIndex == null)
{
posIndex = (Int32IndexInput.Index)other_.posIndex.Clone();
}
else
{
posIndex.CopyFrom(other_.posIndex);
}
}
else
{
posIndex = null;
}
payloadFP = other_.payloadFP;
skipFP = other_.skipFP;
}
public override string ToString()
{
return base.ToString() + " docIndex=" + docIndex + " freqIndex=" + freqIndex + " posIndex=" + posIndex +
" payloadFP=" + payloadFP + " skipFP=" + skipFP;
}
}
public override BlockTermState NewTermState()
{
SepTermState state = new SepTermState();
state.docIndex = docIn.GetIndex();
if (freqIn != null)
{
state.freqIndex = freqIn.GetIndex();
}
if (posIn != null)
{
state.posIndex = posIn.GetIndex();
}
return state;
}
public override void DecodeTerm(long[] empty, DataInput input, FieldInfo fieldInfo, BlockTermState termState,
bool absolute)
{
SepTermState termState_ = (SepTermState)termState;
termState_.docIndex.Read(input, absolute);
if (fieldInfo.IndexOptions != IndexOptions.DOCS_ONLY)
{
termState_.freqIndex.Read(input, absolute);
if (fieldInfo.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
{
//System.out.println(" freqIndex=" + termState.freqIndex);
termState_.posIndex.Read(input, absolute);
//System.out.println(" posIndex=" + termState.posIndex);
if (fieldInfo.HasPayloads)
{
if (absolute)
{
termState_.payloadFP = input.ReadVInt64();
}
else
{
termState_.payloadFP += input.ReadVInt64();
}
//System.out.println(" payloadFP=" + termState.payloadFP);
}
}
}
if (termState_.DocFreq >= skipMinimum)
{
//System.out.println(" readSkip @ " + in.getPosition());
if (absolute)
{
termState_.skipFP = input.ReadVInt64();
}
else
{
termState_.skipFP += input.ReadVInt64();
}
//System.out.println(" skipFP=" + termState.skipFP);
}
else if (absolute)
{
termState_.skipFP = 0;
}
}
public override DocsEnum Docs(FieldInfo fieldInfo, BlockTermState termState, IBits liveDocs, DocsEnum reuse,
DocsFlags flags)
{
SepTermState termState_ = (SepTermState)termState;
// If you are using ParellelReader, and pass in a
// reused DocsAndPositionsEnum, it could have come
// from another reader also using sep codec
if (reuse is null || !(reuse is SepDocsEnum docsEnum) || docsEnum.startDocIn != docIn)
docsEnum = new SepDocsEnum(this);
return docsEnum.Init(fieldInfo, termState_, liveDocs);
}
public override DocsAndPositionsEnum DocsAndPositions(FieldInfo fieldInfo, BlockTermState termState,
IBits liveDocs, DocsAndPositionsEnum reuse, DocsAndPositionsFlags flags)
{
if (Debugging.AssertsEnabled) Debugging.Assert(fieldInfo.IndexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
SepTermState termState_ = (SepTermState)termState;
// If you are using ParellelReader, and pass in a
// reused DocsAndPositionsEnum, it could have come
// from another reader also using sep codec
if (reuse is null || !(reuse is SepDocsAndPositionsEnum postingsEnum) || postingsEnum.startDocIn != docIn)
postingsEnum = new SepDocsAndPositionsEnum(this);
return postingsEnum.Init(fieldInfo, termState_, liveDocs);
}
internal class SepDocsEnum : DocsEnum
{
private readonly SepPostingsReader outerInstance;
private int docFreq;
private int doc = -1;
private int accum;
private int count;
private int freq;
//private long freqStart; // LUCENENET: Not used
// TODO: -- should we do omitTF with 2 different enum classes?
private bool omitTF;
private IndexOptions indexOptions;
private bool storePayloads;
private IBits liveDocs;
private readonly Int32IndexInput.Reader docReader;
private readonly Int32IndexInput.Reader freqReader;
private long skipFP;
private readonly Int32IndexInput.Index docIndex;
private readonly Int32IndexInput.Index freqIndex;
private readonly Int32IndexInput.Index posIndex;
internal readonly Int32IndexInput startDocIn;
// TODO: -- should we do hasProx with 2 different enum classes?
private bool skipped;
private SepSkipListReader skipper;
internal SepDocsEnum(SepPostingsReader outerInstance)
{
this.outerInstance = outerInstance;
startDocIn = outerInstance.docIn;
docReader = outerInstance.docIn.GetReader();
docIndex = outerInstance.docIn.GetIndex();
if (outerInstance.freqIn != null)
{
freqReader = outerInstance.freqIn.GetReader();
freqIndex = outerInstance.freqIn.GetIndex();
}
else
{
freqReader = null;
freqIndex = null;
}
if (outerInstance.posIn != null)
{
posIndex = outerInstance.posIn.GetIndex(); // only init this so skipper can read it
}
else
{
posIndex = null;
}
}
internal virtual SepDocsEnum Init(FieldInfo fieldInfo, SepTermState termState, IBits liveDocs)
{
this.liveDocs = liveDocs;
this.indexOptions = fieldInfo.IndexOptions;
omitTF = indexOptions == IndexOptions.DOCS_ONLY;
storePayloads = fieldInfo.HasPayloads;
// TODO: can't we only do this if consumer
// skipped consuming the previous docs?
docIndex.CopyFrom(termState.docIndex);
docIndex.Seek(docReader);
if (!omitTF)
{
freqIndex.CopyFrom(termState.freqIndex);
freqIndex.Seek(freqReader);
}
docFreq = termState.DocFreq;
// NOTE: unused if docFreq < skipMinimum:
skipFP = termState.skipFP;
count = 0;
doc = -1;
accum = 0;
freq = 1;
skipped = false;
return this;
}
public override int NextDoc()
{
while (true)
{
if (count == docFreq)
{
return doc = NO_MORE_DOCS;
}
count++;
// Decode next doc
//System.out.println("decode docDelta:");
accum += docReader.Next();
if (!omitTF)
{
//System.out.println("decode freq:");
freq = freqReader.Next();
}
if (liveDocs == null || liveDocs.Get(accum))
{
break;
}
}
return (doc = accum);
}
public override int Freq => freq;
public override int DocID => doc;
public override int Advance(int target)
{
if ((target - outerInstance.skipInterval) >= doc && docFreq >= outerInstance.skipMinimum)
{
// There are enough docs in the posting to have
// skip data, and its not too close
if (skipper == null)
{
// This DocsEnum has never done any skipping
skipper = new SepSkipListReader((IndexInput)outerInstance.skipIn.Clone(),
outerInstance.freqIn,
outerInstance.docIn,
outerInstance.posIn,
outerInstance.maxSkipLevels, outerInstance.skipInterval);
}
if (!skipped)
{
// We haven't yet skipped for this posting
skipper.Init(skipFP,
docIndex,
freqIndex,
posIndex,
0,
docFreq,
storePayloads);
skipper.SetIndexOptions(indexOptions);
skipped = true;
}
int newCount = skipper.SkipTo(target);
if (newCount > count)
{
// Skipper did move
if (!omitTF)
{
skipper.FreqIndex.Seek(freqReader);
}
skipper.DocIndex.Seek(docReader);
count = newCount;
doc = accum = skipper.Doc;
}
}
// Now, linear scan for the rest:
do
{
if (NextDoc() == NO_MORE_DOCS)
{
return NO_MORE_DOCS;
}
} while (target > doc);
return doc;
}
public override long GetCost()
{
return docFreq;
}
}
internal class SepDocsAndPositionsEnum : DocsAndPositionsEnum
{
private readonly SepPostingsReader outerInstance;
private int docFreq;
private int doc = -1;
private int accum;
private int count;
private int freq;
//private long freqStart; // LUCENENET: Not used
private bool storePayloads;
private IBits liveDocs;
private readonly Int32IndexInput.Reader docReader;
private readonly Int32IndexInput.Reader freqReader;
private readonly Int32IndexInput.Reader posReader;
private readonly IndexInput payloadIn;
private long skipFP;
private readonly Int32IndexInput.Index docIndex;
private readonly Int32IndexInput.Index freqIndex;
private readonly Int32IndexInput.Index posIndex;
internal readonly Int32IndexInput startDocIn;
private long payloadFP;
private int pendingPosCount;
private int position;
private int payloadLength;
private long pendingPayloadBytes;
private bool skipped;
private SepSkipListReader skipper;
private bool payloadPending;
private bool posSeekPending;
internal SepDocsAndPositionsEnum(SepPostingsReader outerInstance)
{
this.outerInstance = outerInstance;
startDocIn = outerInstance.docIn;
docReader = outerInstance.docIn.GetReader();
docIndex = outerInstance.docIn.GetIndex();
freqReader = outerInstance.freqIn.GetReader();
freqIndex = outerInstance.freqIn.GetIndex();
posReader = outerInstance.posIn.GetReader();
posIndex = outerInstance.posIn.GetIndex();
payloadIn = (IndexInput)outerInstance.payloadIn.Clone();
}
internal virtual SepDocsAndPositionsEnum Init(FieldInfo fieldInfo, SepTermState termState, IBits liveDocs)
{
this.liveDocs = liveDocs;
storePayloads = fieldInfo.HasPayloads;
//System.out.println("Sep D&P init");
// TODO: can't we only do this if consumer
// skipped consuming the previous docs?
docIndex.CopyFrom(termState.docIndex);
docIndex.Seek(docReader);
//System.out.println(" docIndex=" + docIndex);
freqIndex.CopyFrom(termState.freqIndex);
freqIndex.Seek(freqReader);
//System.out.println(" freqIndex=" + freqIndex);
posIndex.CopyFrom(termState.posIndex);
//System.out.println(" posIndex=" + posIndex);
posSeekPending = true;
payloadPending = false;
payloadFP = termState.payloadFP;
skipFP = termState.skipFP;
//System.out.println(" skipFP=" + skipFP);
docFreq = termState.DocFreq;
count = 0;
doc = -1;
accum = 0;
pendingPosCount = 0;
pendingPayloadBytes = 0;
skipped = false;
return this;
}
public override int NextDoc()
{
while (true)
{
if (count == docFreq)
{
return doc = NO_MORE_DOCS;
}
count++;
// TODO: maybe we should do the 1-bit trick for encoding
// freq=1 case?
// Decode next doc
//System.out.println(" sep d&p read doc");
accum += docReader.Next();
//System.out.println(" sep d&p read freq");
freq = freqReader.Next();
pendingPosCount += freq;
if (liveDocs == null || liveDocs.Get(accum))
{
break;
}
}
position = 0;
return (doc = accum);
}
public override int Freq => freq;
public override int DocID => doc;
public override int Advance(int target)
{
//System.out.println("SepD&P advance target=" + target + " vs current=" + doc + " this=" + this);
if ((target - outerInstance.skipInterval) >= doc && docFreq >= outerInstance.skipMinimum)
{
// There are enough docs in the posting to have
// skip data, and its not too close
if (skipper == null)
{
//System.out.println(" create skipper");
// This DocsEnum has never done any skipping
skipper = new SepSkipListReader((IndexInput)outerInstance.skipIn.Clone(),
outerInstance.freqIn,
outerInstance.docIn,
outerInstance.posIn,
outerInstance.maxSkipLevels, outerInstance.skipInterval);
}
if (!skipped)
{
//System.out.println(" init skip data skipFP=" + skipFP);
// We haven't yet skipped for this posting
skipper.Init(skipFP,
docIndex,
freqIndex,
posIndex,
payloadFP,
docFreq,
storePayloads);
skipper.SetIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
skipped = true;
}
int newCount = skipper.SkipTo(target);
//System.out.println(" skip newCount=" + newCount + " vs " + count);
if (newCount > count)
{
// Skipper did move
skipper.FreqIndex.Seek(freqReader);
skipper.DocIndex.Seek(docReader);
//System.out.println(" doc seek'd to " + skipper.getDocIndex());
// NOTE: don't seek pos here; do it lazily
// instead. Eg a PhraseQuery may skip to many
// docs before finally asking for positions...
posIndex.CopyFrom(skipper.PosIndex);
posSeekPending = true;
count = newCount;
doc = accum = skipper.Doc;
//System.out.println(" moved to doc=" + doc);
//payloadIn.seek(skipper.getPayloadPointer());
payloadFP = skipper.PayloadPointer;
pendingPosCount = 0;
pendingPayloadBytes = 0;
payloadPending = false;
payloadLength = skipper.PayloadLength;
//System.out.println(" move payloadLen=" + payloadLength);
}
}
// Now, linear scan for the rest:
do
{
if (NextDoc() == NO_MORE_DOCS)
{
//System.out.println(" advance nextDoc=END");
return NO_MORE_DOCS;
}
//System.out.println(" advance nextDoc=" + doc);
} while (target > doc);
//System.out.println(" return doc=" + doc);
return doc;
}
public override int NextPosition()
{
if (posSeekPending)
{
posIndex.Seek(posReader);
payloadIn.Seek(payloadFP);
posSeekPending = false;
}
// scan over any docs that were iterated without their
// positions
while (pendingPosCount > freq)
{
int code2 = posReader.Next();
if (storePayloads && (code2 & 1) != 0)
{
// Payload length has changed
payloadLength = posReader.Next();
if (Debugging.AssertsEnabled) Debugging.Assert(payloadLength >= 0);
}
pendingPosCount--;
position = 0;
pendingPayloadBytes += payloadLength;
}
int code = posReader.Next();
if (storePayloads)
{
if ((code & 1) != 0)
{
// Payload length has changed
payloadLength = posReader.Next();
if (Debugging.AssertsEnabled) Debugging.Assert(payloadLength >= 0);
}
position += code.TripleShift(1);
pendingPayloadBytes += payloadLength;
payloadPending = payloadLength > 0;
}
else
{
position += code;
}
pendingPosCount--;
if (Debugging.AssertsEnabled) Debugging.Assert(pendingPosCount >= 0);
return position;
}
public override int StartOffset => -1;
public override int EndOffset => -1;
private BytesRef payload;
public override BytesRef GetPayload()
{
if (!payloadPending)
{
return null;
}
if (pendingPayloadBytes == 0)
{
return payload;
}
if (Debugging.AssertsEnabled) Debugging.Assert(pendingPayloadBytes >= payloadLength);
if (pendingPayloadBytes > payloadLength)
{
payloadIn.Seek(payloadIn.GetFilePointer() + (pendingPayloadBytes - payloadLength));
}
if (payload == null)
{
payload = new BytesRef();
payload.Bytes = new byte[payloadLength];
}
else if (payload.Bytes.Length < payloadLength)
{
payload.Grow(payloadLength);
}
payloadIn.ReadBytes(payload.Bytes, 0, payloadLength);
payload.Length = payloadLength;
pendingPayloadBytes = 0;
return payload;
}
public override long GetCost()
{
return docFreq;
}
}
public override long RamBytesUsed()
{
return 0;
}
public override void CheckIntegrity()
{
// TODO: remove sep layout, its fallen behind on features...
}
}
}