blob: eaa0a5845c4b58752b89305f80c874fae9fe3bfe [file]
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
namespace Lucene.Net.Codecs.ramonly
{
using Lucene.Net.Support;
using Bits = Lucene.Net.Util.Bits;
using BytesRef = Lucene.Net.Util.BytesRef;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using DocsAndPositionsEnum = Lucene.Net.Index.DocsAndPositionsEnum;
using DocsEnum = Lucene.Net.Index.DocsEnum;
using FieldInfo = Lucene.Net.Index.FieldInfo;
using IndexFileNames = Lucene.Net.Index.IndexFileNames;
using IndexInput = Lucene.Net.Store.IndexInput;
using IndexOutput = Lucene.Net.Store.IndexOutput;
using IOUtils = Lucene.Net.Util.IOUtils;
using RamUsageEstimator = Lucene.Net.Util.RamUsageEstimator;
using SegmentReadState = Lucene.Net.Index.SegmentReadState;
using SegmentWriteState = Lucene.Net.Index.SegmentWriteState;
using Terms = Lucene.Net.Index.Terms;
using TermsEnum = Lucene.Net.Index.TermsEnum;
/// <summary>
/// Stores all postings data in RAM, but writes a small
/// token (header + single int) to identify which "slot" the
/// index is using in RAM HashMap.
///
/// NOTE: this codec sorts terms by reverse-unicode-order!
/// </summary>
public sealed class RAMOnlyPostingsFormat : PostingsFormat
{
// For fun, test that we can override how terms are
// sorted, and basic things still work -- this comparator
// sorts in reversed unicode code point order:
private static readonly IComparer<BytesRef> reverseUnicodeComparator = new ComparatorAnonymousInnerClassHelper();
private class ComparatorAnonymousInnerClassHelper : IComparer<BytesRef>
{
public ComparatorAnonymousInnerClassHelper()
{
}
public virtual int Compare(BytesRef t1, BytesRef t2)
{
var b1 = t1.Bytes;
var b2 = t2.Bytes;
int b1Stop;
int b1Upto = t1.Offset;
int b2Upto = t2.Offset;
if (t1.Length < t2.Length)
{
b1Stop = t1.Offset + t1.Length;
}
else
{
b1Stop = t1.Offset + t2.Length;
}
while (b1Upto < b1Stop)
{
int bb1 = b1[b1Upto++] & 0xff;
int bb2 = b2[b2Upto++] & 0xff;
if (bb1 != bb2)
{
//System.out.println("cmp 1=" + t1 + " 2=" + t2 + " return " + (bb2-bb1));
return bb2 - bb1;
}
}
// One is prefix of another, or they are equal
return t2.Length - t1.Length;
}
public override bool Equals(object other)
{
return this == other;
}
}
public RAMOnlyPostingsFormat()
: base("RAMOnly")
{
}
// Postings state:
internal class RAMPostings : FieldsProducer
{
internal readonly IDictionary<string, RAMField> FieldToTerms = new SortedDictionary<string, RAMField>();
public override Terms Terms(string field)
{
return FieldToTerms[field];
}
public override int Size
{
get { return FieldToTerms.Count; }
}
public override IEnumerator<string> GetEnumerator()
{
return FieldToTerms.Keys.GetEnumerator();
}
public override void Dispose()
{
}
public override long RamBytesUsed()
{
long sizeInBytes = 0;
foreach (RAMField field in FieldToTerms.Values)
{
sizeInBytes += field.RamBytesUsed();
}
return sizeInBytes;
}
public override void CheckIntegrity()
{
}
}
internal class RAMField : Terms
{
internal readonly string Field;
internal readonly SortedDictionary<string, RAMTerm> TermToDocs = new SortedDictionary<string, RAMTerm>();
internal long SumTotalTermFreq_Renamed;
internal long SumDocFreq_Renamed;
internal int DocCount_Renamed;
internal readonly FieldInfo Info;
internal RAMField(string field, FieldInfo info)
{
this.Field = field;
this.Info = info;
}
/// <summary>
/// Returns approximate RAM bytes used </summary>
public virtual long RamBytesUsed()
{
long sizeInBytes = 0;
foreach (RAMTerm term in TermToDocs.Values)
{
sizeInBytes += term.RamBytesUsed();
}
return sizeInBytes;
}
public override long Size()
{
return TermToDocs.Count;
}
public override long SumTotalTermFreq
{
get
{
return SumTotalTermFreq_Renamed;
}
}
public override long SumDocFreq
{
get
{
return SumDocFreq_Renamed;
}
}
public override int DocCount
{
get
{
return DocCount_Renamed;
}
}
public override TermsEnum Iterator(TermsEnum reuse)
{
return new RAMTermsEnum(this);
}
public override IComparer<BytesRef> Comparator
{
get
{
return reverseUnicodeComparator;
}
}
public override bool HasFreqs()
{
return Info.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS;
}
public override bool HasOffsets()
{
return Info.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
}
public override bool HasPositions()
{
return Info.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
public override bool HasPayloads()
{
return Info.HasPayloads();
}
}
internal class RAMTerm
{
internal readonly string Term;
internal long TotalTermFreq;
internal readonly IList<RAMDoc> Docs = new List<RAMDoc>();
public RAMTerm(string term)
{
this.Term = term;
}
/// <summary>
/// Returns approximate RAM bytes used </summary>
public virtual long RamBytesUsed()
{
long sizeInBytes = 0;
foreach (RAMDoc rDoc in Docs)
{
sizeInBytes += rDoc.RamBytesUsed();
}
return sizeInBytes;
}
}
internal class RAMDoc
{
internal readonly int DocID;
internal readonly int[] Positions;
internal byte[][] Payloads;
public RAMDoc(int docID, int freq)
{
this.DocID = docID;
Positions = new int[freq];
}
/// <summary>
/// Returns approximate RAM bytes used </summary>
public virtual long RamBytesUsed()
{
long sizeInBytes = 0;
sizeInBytes += (Positions != null) ? RamUsageEstimator.SizeOf(Positions) : 0;
if (Payloads != null)
{
foreach (var payload in Payloads)
{
sizeInBytes += (payload != null) ? RamUsageEstimator.SizeOf(payload) : 0;
}
}
return sizeInBytes;
}
}
// Classes for writing to the postings state
private class RAMFieldsConsumer : FieldsConsumer
{
internal readonly RAMPostings Postings;
internal readonly RAMTermsConsumer TermsConsumer = new RAMTermsConsumer();
public RAMFieldsConsumer(RAMPostings postings)
{
this.Postings = postings;
}
public override TermsConsumer AddField(FieldInfo field)
{
if (field.FieldIndexOptions >= FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)
{
throw new System.NotSupportedException("this codec cannot index offsets");
}
RAMField ramField = new RAMField(field.Name, field);
Postings.FieldToTerms[field.Name] = ramField;
TermsConsumer.Reset(ramField);
return TermsConsumer;
}
public override void Dispose()
{
// TODO: finalize stuff
}
}
private class RAMTermsConsumer : TermsConsumer
{
internal RAMField Field;
internal readonly RAMPostingsWriterImpl PostingsWriter = new RAMPostingsWriterImpl();
internal RAMTerm Current;
internal virtual void Reset(RAMField field)
{
this.Field = field;
}
public override PostingsConsumer StartTerm(BytesRef text)
{
string term = text.Utf8ToString();
Current = new RAMTerm(term);
PostingsWriter.Reset(Current);
return PostingsWriter;
}
public override IComparer<BytesRef> Comparator
{
get
{
return BytesRef.UTF8SortedAsUnicodeComparer;
}
}
public override void FinishTerm(BytesRef text, TermStats stats)
{
Debug.Assert(stats.DocFreq > 0);
Debug.Assert(stats.DocFreq == Current.Docs.Count);
Current.TotalTermFreq = stats.TotalTermFreq;
Field.TermToDocs[Current.Term] = Current;
}
public override void Finish(long sumTotalTermFreq, long sumDocFreq, int docCount)
{
Field.SumTotalTermFreq_Renamed = sumTotalTermFreq;
Field.SumDocFreq_Renamed = sumDocFreq;
Field.DocCount_Renamed = docCount;
}
}
internal class RAMPostingsWriterImpl : PostingsConsumer
{
internal RAMTerm Term;
internal RAMDoc Current;
internal int PosUpto = 0;
public virtual void Reset(RAMTerm term)
{
this.Term = term;
}
public override void StartDoc(int docID, int freq)
{
Current = new RAMDoc(docID, freq);
Term.Docs.Add(Current);
PosUpto = 0;
}
public override void AddPosition(int position, BytesRef payload, int startOffset, int endOffset)
{
Debug.Assert(startOffset == -1);
Debug.Assert(endOffset == -1);
Current.Positions[PosUpto] = position;
if (payload != null && payload.Length > 0)
{
if (Current.Payloads == null)
{
Current.Payloads = new byte[Current.Positions.Length][];
}
var bytes = Current.Payloads[PosUpto] = new byte[payload.Length];
Array.Copy(payload.Bytes, payload.Offset, bytes, 0, payload.Length);
}
PosUpto++;
}
public override void FinishDoc()
{
Debug.Assert(PosUpto == Current.Positions.Length);
}
}
internal class RAMTermsEnum : TermsEnum
{
internal IEnumerator<string> It;
internal string Current;
internal readonly RAMField RamField;
public RAMTermsEnum(RAMField field)
{
this.RamField = field;
}
public override IComparer<BytesRef> Comparator
{
get
{
return BytesRef.UTF8SortedAsUnicodeComparer;
}
}
public override BytesRef Next()
{
if (It == null)
{
if (Current == null)
{
It = RamField.TermToDocs.Keys.GetEnumerator();
}
else
{
//It = RamField.TermToDocs.tailMap(Current).Keys.GetEnumerator();
It = RamField.TermToDocs.Where(kvpair => String.Compare(kvpair.Key, Current) >= 0).ToDictionary(kvpair => kvpair.Key, kvpair => kvpair.Value).Keys.GetEnumerator();
}
}
if (It.MoveNext())
{
Current = It.Current;
return new BytesRef(Current);
}
else
{
return null;
}
}
public override SeekStatus SeekCeil(BytesRef term)
{
Current = term.Utf8ToString();
It = null;
if (RamField.TermToDocs.ContainsKey(Current))
{
return SeekStatus.FOUND;
}
else
{
if (Current.CompareTo(RamField.TermToDocs.Last().Key) > 0)
{
return SeekStatus.END;
}
else
{
return SeekStatus.NOT_FOUND;
}
}
}
public override void SeekExact(long ord)
{
throw new System.NotSupportedException();
}
public override long Ord()
{
throw new System.NotSupportedException();
}
public override BytesRef Term()
{
// TODO: reuse BytesRef
return new BytesRef(Current);
}
public override int DocFreq()
{
return RamField.TermToDocs[Current].Docs.Count;
}
public override long TotalTermFreq()
{
return RamField.TermToDocs[Current].TotalTermFreq;
}
public override DocsEnum Docs(Bits liveDocs, DocsEnum reuse, int flags)
{
return new RAMDocsEnum(RamField.TermToDocs[Current], liveDocs);
}
public override DocsAndPositionsEnum DocsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags)
{
return new RAMDocsAndPositionsEnum(RamField.TermToDocs[Current], liveDocs);
}
}
private class RAMDocsEnum : DocsEnum
{
private readonly RAMTerm RamTerm;
private readonly Bits LiveDocs;
private RAMDoc Current;
private int Upto = -1;
private int PosUpto = 0;
public RAMDocsEnum(RAMTerm ramTerm, Bits liveDocs)
{
this.RamTerm = ramTerm;
this.LiveDocs = liveDocs;
}
public override int Advance(int targetDocID)
{
return SlowAdvance(targetDocID);
}
// TODO: override bulk read, for better perf
public override int NextDoc()
{
while (true)
{
Upto++;
if (Upto < RamTerm.Docs.Count)
{
Current = RamTerm.Docs[Upto];
if (LiveDocs == null || LiveDocs.Get(Current.DocID))
{
PosUpto = 0;
return Current.DocID;
}
}
else
{
return NO_MORE_DOCS;
}
}
}
public override int Freq()
{
return Current.Positions.Length;
}
public override int DocID()
{
return Current.DocID;
}
public override long Cost()
{
return RamTerm.Docs.Count;
}
}
private class RAMDocsAndPositionsEnum : DocsAndPositionsEnum
{
private readonly RAMTerm RamTerm;
private readonly Bits LiveDocs;
private RAMDoc Current;
private int Upto = -1;
private int PosUpto = 0;
public RAMDocsAndPositionsEnum(RAMTerm ramTerm, Bits liveDocs)
{
this.RamTerm = ramTerm;
this.LiveDocs = liveDocs;
}
public override int Advance(int targetDocID)
{
return SlowAdvance(targetDocID);
}
// TODO: override bulk read, for better perf
public override int NextDoc()
{
while (true)
{
Upto++;
if (Upto < RamTerm.Docs.Count)
{
Current = RamTerm.Docs[Upto];
if (LiveDocs == null || LiveDocs.Get(Current.DocID))
{
PosUpto = 0;
return Current.DocID;
}
}
else
{
return NO_MORE_DOCS;
}
}
}
public override int Freq()
{
return Current.Positions.Length;
}
public override int DocID()
{
return Current.DocID;
}
public override int NextPosition()
{
return Current.Positions[PosUpto++];
}
public override int StartOffset()
{
return -1;
}
public override int EndOffset()
{
return -1;
}
public override BytesRef Payload
{
get
{
if (Current.Payloads != null && Current.Payloads[PosUpto - 1] != null)
{
return new BytesRef(Current.Payloads[PosUpto - 1]);
}
else
{
return null;
}
}
}
public override long Cost()
{
return RamTerm.Docs.Count;
}
}
// Holds all indexes created, keyed by the ID assigned in fieldsConsumer
private readonly IDictionary<int?, RAMPostings> State = new Dictionary<int?, RAMPostings>();
private readonly AtomicLong NextID = new AtomicLong();
private readonly string RAM_ONLY_NAME = "RAMOnly";
private const int VERSION_START = 0;
private const int VERSION_LATEST = VERSION_START;
private const string ID_EXTENSION = "id";
public override FieldsConsumer FieldsConsumer(SegmentWriteState writeState)
{
int id = (int)NextID.IncrementAndGet();
// TODO -- ok to do this up front instead of
// on close....? should be ok?
// Write our ID:
string idFileName = IndexFileNames.SegmentFileName(writeState.SegmentInfo.Name, writeState.SegmentSuffix, ID_EXTENSION);
IndexOutput @out = writeState.Directory.CreateOutput(idFileName, writeState.Context);
bool success = false;
try
{
CodecUtil.WriteHeader(@out, RAM_ONLY_NAME, VERSION_LATEST);
@out.WriteVInt(id);
success = true;
}
finally
{
if (!success)
{
IOUtils.CloseWhileHandlingException(@out);
}
else
{
IOUtils.Close(@out);
}
}
RAMPostings postings = new RAMPostings();
RAMFieldsConsumer consumer = new RAMFieldsConsumer(postings);
lock (State)
{
State[id] = postings;
}
return consumer;
}
public override FieldsProducer FieldsProducer(SegmentReadState readState)
{
// Load our ID:
string idFileName = IndexFileNames.SegmentFileName(readState.SegmentInfo.Name, readState.SegmentSuffix, ID_EXTENSION);
IndexInput @in = readState.Directory.OpenInput(idFileName, readState.Context);
bool success = false;
int id;
try
{
CodecUtil.CheckHeader(@in, RAM_ONLY_NAME, VERSION_START, VERSION_LATEST);
id = @in.ReadVInt();
success = true;
}
finally
{
if (!success)
{
IOUtils.CloseWhileHandlingException(@in);
}
else
{
IOUtils.Close(@in);
}
}
lock (State)
{
return State[id];
}
}
}
}