blob: 04045dcfcc2e5836cccbc8e6ea02f4fd9ed9a68c [file] [log] [blame]
using Lucene.Net.Diagnostics;
using Lucene.Net.Support;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.IO;
namespace Lucene.Net.Index
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using AppendingPackedInt64Buffer = Lucene.Net.Util.Packed.AppendingPackedInt64Buffer;
using BytesRef = Lucene.Net.Util.BytesRef;
using IBits = Lucene.Net.Util.IBits;
using MonotonicAppendingInt64Buffer = Lucene.Net.Util.Packed.MonotonicAppendingInt64Buffer;
using PackedInt32s = Lucene.Net.Util.Packed.PackedInt32s;
using TermsEnumIndex = Lucene.Net.Index.MultiTermsEnum.TermsEnumIndex;
using TermsEnumWithSlice = Lucene.Net.Index.MultiTermsEnum.TermsEnumWithSlice;
/// <summary>
/// A wrapper for <see cref="CompositeReader"/> providing access to <see cref="DocValues"/>.
///
/// <para/><b>NOTE</b>: for multi readers, you'll get better
/// performance by gathering the sub readers using
/// <see cref="IndexReader.Context"/> to get the
/// atomic leaves and then operate per-AtomicReader,
/// instead of using this class.
///
/// <para/><b>NOTE</b>: this is very costly.
///
/// <para/>
/// @lucene.experimental
/// @lucene.internal
/// </summary>
public static class MultiDocValues // LUCENENET specific: CA1052 Static holder types should be Static or NotInheritable
{
/// <summary>
/// Returns a <see cref="NumericDocValues"/> for a reader's norms (potentially merging on-the-fly).
/// <para>
/// This is a slow way to access normalization values. Instead, access them per-segment
/// with <seealso cref="AtomicReader.GetNormValues(string)"/>
/// </para>
/// </summary>
public static NumericDocValues GetNormValues(IndexReader r, string field)
{
IList<AtomicReaderContext> leaves = r.Leaves;
int size = leaves.Count;
if (size == 0)
{
return null;
}
else if (size == 1)
{
return leaves[0].AtomicReader.GetNormValues(field);
}
FieldInfo fi = MultiFields.GetMergedFieldInfos(r).FieldInfo(field);
if (fi == null || fi.HasNorms == false)
{
return null;
}
bool anyReal = false;
NumericDocValues[] values = new NumericDocValues[size];
int[] starts = new int[size + 1];
for (int i = 0; i < size; i++)
{
AtomicReaderContext context = leaves[i];
NumericDocValues v = context.AtomicReader.GetNormValues(field);
if (v == null)
{
v = DocValues.EMPTY_NUMERIC;
}
else
{
anyReal = true;
}
values[i] = v;
starts[i] = context.DocBase;
}
starts[size] = r.MaxDoc;
if (Debugging.AssertsEnabled) Debugging.Assert(anyReal);
return new NumericDocValuesAnonymousClass(values, starts);
}
private class NumericDocValuesAnonymousClass : NumericDocValues
{
private readonly NumericDocValues[] values;
private readonly int[] starts;
public NumericDocValuesAnonymousClass(NumericDocValues[] values, int[] starts)
{
this.values = values;
this.starts = starts;
}
public override long Get(int docID)
{
int subIndex = ReaderUtil.SubIndex(docID, starts);
return values[subIndex].Get(docID - starts[subIndex]);
}
}
/// <summary>
/// Returns a <see cref="NumericDocValues"/> for a reader's docvalues (potentially merging on-the-fly)
/// <para>
/// This is a slow way to access numeric values. Instead, access them per-segment
/// with <see cref="AtomicReader.GetNumericDocValues(string)"/>
/// </para>
/// </summary>
public static NumericDocValues GetNumericValues(IndexReader r, string field)
{
IList<AtomicReaderContext> leaves = r.Leaves;
int size = leaves.Count;
if (size == 0)
{
return null;
}
else if (size == 1)
{
return leaves[0].AtomicReader.GetNumericDocValues(field);
}
bool anyReal = false;
NumericDocValues[] values = new NumericDocValues[size];
int[] starts = new int[size + 1];
for (int i = 0; i < size; i++)
{
AtomicReaderContext context = leaves[i];
NumericDocValues v = context.AtomicReader.GetNumericDocValues(field);
if (v == null)
{
v = DocValues.EMPTY_NUMERIC;
}
else
{
anyReal = true;
}
values[i] = v;
starts[i] = context.DocBase;
}
starts[size] = r.MaxDoc;
if (!anyReal)
{
return null;
}
else
{
return new NumericDocValuesAnonymousClass2(values, starts);
}
}
private class NumericDocValuesAnonymousClass2 : NumericDocValues
{
private readonly NumericDocValues[] values;
private readonly int[] starts;
public NumericDocValuesAnonymousClass2(NumericDocValues[] values, int[] starts)
{
this.values = values;
this.starts = starts;
}
public override long Get(int docID)
{
int subIndex = ReaderUtil.SubIndex(docID, starts);
return values[subIndex].Get(docID - starts[subIndex]);
}
}
/// <summary>
/// Returns a <see cref="IBits"/> for a reader's docsWithField (potentially merging on-the-fly)
/// <para>
/// This is a slow way to access this bitset. Instead, access them per-segment
/// with <see cref="AtomicReader.GetDocsWithField(string)"/>
/// </para>
/// </summary>
public static IBits GetDocsWithField(IndexReader r, string field)
{
IList<AtomicReaderContext> leaves = r.Leaves;
int size = leaves.Count;
if (size == 0)
{
return null;
}
else if (size == 1)
{
return leaves[0].AtomicReader.GetDocsWithField(field);
}
bool anyReal = false;
bool anyMissing = false;
IBits[] values = new IBits[size];
int[] starts = new int[size + 1];
for (int i = 0; i < size; i++)
{
AtomicReaderContext context = leaves[i];
IBits v = context.AtomicReader.GetDocsWithField(field);
if (v == null)
{
v = new Lucene.Net.Util.Bits.MatchNoBits(context.Reader.MaxDoc);
anyMissing = true;
}
else
{
anyReal = true;
if (v is Lucene.Net.Util.Bits.MatchAllBits == false)
{
anyMissing = true;
}
}
values[i] = v;
starts[i] = context.DocBase;
}
starts[size] = r.MaxDoc;
if (!anyReal)
{
return null;
}
else if (!anyMissing)
{
return new Lucene.Net.Util.Bits.MatchAllBits(r.MaxDoc);
}
else
{
return new MultiBits(values, starts, false);
}
}
/// <summary>
/// Returns a <see cref="BinaryDocValues"/> for a reader's docvalues (potentially merging on-the-fly)
/// <para>
/// This is a slow way to access binary values. Instead, access them per-segment
/// with <see cref="AtomicReader.GetBinaryDocValues(string)"/>
/// </para>
/// </summary>
public static BinaryDocValues GetBinaryValues(IndexReader r, string field)
{
IList<AtomicReaderContext> leaves = r.Leaves;
int size = leaves.Count;
if (size == 0)
{
return null;
}
else if (size == 1)
{
return leaves[0].AtomicReader.GetBinaryDocValues(field);
}
bool anyReal = false;
BinaryDocValues[] values = new BinaryDocValues[size];
int[] starts = new int[size + 1];
for (int i = 0; i < size; i++)
{
AtomicReaderContext context = leaves[i];
BinaryDocValues v = context.AtomicReader.GetBinaryDocValues(field);
if (v == null)
{
v = DocValues.EMPTY_BINARY;
}
else
{
anyReal = true;
}
values[i] = v;
starts[i] = context.DocBase;
}
starts[size] = r.MaxDoc;
if (!anyReal)
{
return null;
}
else
{
return new BinaryDocValuesAnonymousClass(values, starts);
}
}
private class BinaryDocValuesAnonymousClass : BinaryDocValues
{
private readonly BinaryDocValues[] values;
private readonly int[] starts;
public BinaryDocValuesAnonymousClass(BinaryDocValues[] values, int[] starts)
{
this.values = values;
this.starts = starts;
}
public override void Get(int docID, BytesRef result)
{
int subIndex = ReaderUtil.SubIndex(docID, starts);
values[subIndex].Get(docID - starts[subIndex], result);
}
}
/// <summary>
/// Returns a <see cref="SortedDocValues"/> for a reader's docvalues (potentially doing extremely slow things).
/// <para>
/// this is an extremely slow way to access sorted values. Instead, access them per-segment
/// with <see cref="AtomicReader.GetSortedDocValues(string)"/>
/// </para>
/// </summary>
public static SortedDocValues GetSortedValues(IndexReader r, string field)
{
IList<AtomicReaderContext> leaves = r.Leaves;
int size = leaves.Count;
if (size == 0)
{
return null;
}
else if (size == 1)
{
return leaves[0].AtomicReader.GetSortedDocValues(field);
}
bool anyReal = false;
var values = new SortedDocValues[size];
int[] starts = new int[size + 1];
for (int i = 0; i < size; i++)
{
AtomicReaderContext context = leaves[i];
SortedDocValues v = context.AtomicReader.GetSortedDocValues(field);
if (v == null)
{
v = DocValues.EMPTY_SORTED;
}
else
{
anyReal = true;
}
values[i] = v;
starts[i] = context.DocBase;
}
starts[size] = r.MaxDoc;
if (!anyReal)
{
return null;
}
else
{
TermsEnum[] enums = new TermsEnum[values.Length];
for (int i = 0; i < values.Length; i++)
{
enums[i] = values[i].GetTermsEnum();
}
OrdinalMap mapping = new OrdinalMap(r.CoreCacheKey, enums);
return new MultiSortedDocValues(values, starts, mapping);
}
}
/// <summary>
/// Returns a <see cref="SortedSetDocValues"/> for a reader's docvalues (potentially doing extremely slow things).
/// <para>
/// This is an extremely slow way to access sorted values. Instead, access them per-segment
/// with <see cref="AtomicReader.GetSortedSetDocValues(string)"/>
/// </para>
/// </summary>
public static SortedSetDocValues GetSortedSetValues(IndexReader r, string field)
{
IList<AtomicReaderContext> leaves = r.Leaves;
int size = leaves.Count;
if (size == 0)
{
return null;
}
else if (size == 1)
{
return leaves[0].AtomicReader.GetSortedSetDocValues(field);
}
bool anyReal = false;
SortedSetDocValues[] values = new SortedSetDocValues[size];
int[] starts = new int[size + 1];
for (int i = 0; i < size; i++)
{
AtomicReaderContext context = leaves[i];
SortedSetDocValues v = context.AtomicReader.GetSortedSetDocValues(field);
if (v == null)
{
v = DocValues.EMPTY_SORTED_SET;
}
else
{
anyReal = true;
}
values[i] = v;
starts[i] = context.DocBase;
}
starts[size] = r.MaxDoc;
if (!anyReal)
{
return null;
}
else
{
TermsEnum[] enums = new TermsEnum[values.Length];
for (int i = 0; i < values.Length; i++)
{
enums[i] = values[i].GetTermsEnum();
}
OrdinalMap mapping = new OrdinalMap(r.CoreCacheKey, enums);
return new MultiSortedSetDocValues(values, starts, mapping);
}
}
/// <summary>
/// maps per-segment ordinals to/from global ordinal space </summary>
// TODO: use more efficient packed ints structures?
// TODO: pull this out? its pretty generic (maps between N ord()-enabled TermsEnums)
public class OrdinalMap
{
// cache key of whoever asked for this awful thing
internal readonly object owner;
// globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
internal readonly MonotonicAppendingInt64Buffer globalOrdDeltas;
// globalOrd -> first segment container
internal readonly AppendingPackedInt64Buffer firstSegments;
// for every segment, segmentOrd -> (globalOrd - segmentOrd)
internal readonly MonotonicAppendingInt64Buffer[] ordDeltas;
/// <summary>
/// Creates an ordinal map that allows mapping ords to/from a merged
/// space from <c>subs</c>. </summary>
/// <param name="owner"> a cache key </param>
/// <param name="subs"> <see cref="TermsEnum"/>s that support <see cref="TermsEnum.Ord"/>. They need
/// not be dense (e.g. can be FilteredTermsEnums). </param>
/// <exception cref="IOException"> if an I/O error occurred. </exception>
public OrdinalMap(object owner, TermsEnum[] subs)
{
// create the ordinal mappings by pulling a termsenum over each sub's
// unique terms, and walking a multitermsenum over those
this.owner = owner;
globalOrdDeltas = new MonotonicAppendingInt64Buffer(PackedInt32s.COMPACT);
firstSegments = new AppendingPackedInt64Buffer(PackedInt32s.COMPACT);
ordDeltas = new MonotonicAppendingInt64Buffer[subs.Length];
for (int i = 0; i < ordDeltas.Length; i++)
{
ordDeltas[i] = new MonotonicAppendingInt64Buffer();
}
long[] segmentOrds = new long[subs.Length];
ReaderSlice[] slices = new ReaderSlice[subs.Length];
TermsEnumIndex[] indexes = new TermsEnumIndex[slices.Length];
for (int i = 0; i < slices.Length; i++)
{
slices[i] = new ReaderSlice(0, 0, i);
indexes[i] = new TermsEnumIndex(subs[i], i);
}
MultiTermsEnum mte = new MultiTermsEnum(slices);
mte.Reset(indexes);
long globalOrd = 0;
while (mte.MoveNext())
{
TermsEnumWithSlice[] matches = mte.MatchArray;
for (int i = 0; i < mte.MatchCount; i++)
{
int segmentIndex = matches[i].Index;
long segmentOrd = matches[i].Terms.Ord;
long delta = globalOrd - segmentOrd;
// for each unique term, just mark the first segment index/delta where it occurs
if (i == 0)
{
firstSegments.Add(segmentIndex);
globalOrdDeltas.Add(delta);
}
// for each per-segment ord, map it back to the global term.
while (segmentOrds[segmentIndex] <= segmentOrd)
{
ordDeltas[segmentIndex].Add(delta);
segmentOrds[segmentIndex]++;
}
}
globalOrd++;
}
firstSegments.Freeze();
globalOrdDeltas.Freeze();
for (int i = 0; i < ordDeltas.Length; ++i)
{
ordDeltas[i].Freeze();
}
}
/// <summary>
/// Given a segment number and segment ordinal, returns
/// the corresponding global ordinal.
/// </summary>
public virtual long GetGlobalOrd(int segmentIndex, long segmentOrd)
{
return segmentOrd + ordDeltas[segmentIndex].Get(segmentOrd);
}
/// <summary>
/// Given global ordinal, returns the ordinal of the first segment which contains
/// this ordinal (the corresponding to the segment return <see cref="GetFirstSegmentNumber(long)"/>).
/// </summary>
public virtual long GetFirstSegmentOrd(long globalOrd)
{
return globalOrd - globalOrdDeltas.Get(globalOrd);
}
/// <summary>
/// Given a global ordinal, returns the index of the first
/// segment that contains this term.
/// </summary>
public virtual int GetFirstSegmentNumber(long globalOrd)
{
return (int)firstSegments.Get(globalOrd);
}
/// <summary>
/// Returns the total number of unique terms in global ord space.
/// </summary>
public virtual long ValueCount => globalOrdDeltas.Count;
/// <summary>
/// Returns total byte size used by this ordinal map.
/// </summary>
public virtual long RamBytesUsed()
{
long size = globalOrdDeltas.RamBytesUsed() + firstSegments.RamBytesUsed();
for (int i = 0; i < ordDeltas.Length; i++)
{
size += ordDeltas[i].RamBytesUsed();
}
return size;
}
}
/// <summary>
/// Implements <see cref="SortedDocValues"/> over n subs, using an <see cref="OrdinalMap"/>
/// <para/>
/// @lucene.internal
/// </summary>
public class MultiSortedDocValues : SortedDocValues
{
/// <summary>
/// docbase for each leaf: parallel with <see cref="Values"/> </summary>
[WritableArray]
[SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
public int[] DocStarts => docStarts;
private readonly int[] docStarts;
/// <summary>
/// leaf values </summary>
[WritableArray]
[SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
public SortedDocValues[] Values => values;
private readonly SortedDocValues[] values;
/// <summary>
/// ordinal map mapping ords from <c>values</c> to global ord space </summary>
public OrdinalMap Mapping => mapping;
private readonly OrdinalMap mapping;
/// <summary>
/// Creates a new <see cref="MultiSortedDocValues"/> over <paramref name="values"/> </summary>
internal MultiSortedDocValues(SortedDocValues[] values, int[] docStarts, OrdinalMap mapping)
{
if (Debugging.AssertsEnabled)
{
Debugging.Assert(values.Length == mapping.ordDeltas.Length);
Debugging.Assert(docStarts.Length == values.Length + 1);
}
this.values = values;
this.docStarts = docStarts;
this.mapping = mapping;
}
public override int GetOrd(int docID)
{
int subIndex = ReaderUtil.SubIndex(docID, docStarts);
int segmentOrd = values[subIndex].GetOrd(docID - docStarts[subIndex]);
return segmentOrd == -1 ? segmentOrd : (int)mapping.GetGlobalOrd(subIndex, segmentOrd);
}
public override void LookupOrd(int ord, BytesRef result)
{
int subIndex = mapping.GetFirstSegmentNumber(ord);
int segmentOrd = (int)mapping.GetFirstSegmentOrd(ord);
values[subIndex].LookupOrd(segmentOrd, result);
}
public override int ValueCount => (int)mapping.ValueCount;
}
/// <summary>
/// Implements <see cref="MultiSortedSetDocValues"/> over n subs, using an <see cref="OrdinalMap"/>
/// <para/>
/// @lucene.internal
/// </summary>
public class MultiSortedSetDocValues : SortedSetDocValues
{
/// <summary>
/// docbase for each leaf: parallel with <see cref="Values"/> </summary>
[WritableArray]
[SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
public int[] DocStarts => docStarts;
private readonly int[] docStarts;
/// <summary>
/// leaf values </summary>
[WritableArray]
[SuppressMessage("Microsoft.Performance", "CA1819", Justification = "Lucene's design requires some writable array properties")]
public SortedSetDocValues[] Values => values;
private readonly SortedSetDocValues[] values;
/// <summary>
/// ordinal map mapping ords from <c>values</c> to global ord space </summary>
public OrdinalMap Mapping => mapping;
private readonly OrdinalMap mapping;
internal int currentSubIndex;
/// <summary>
/// Creates a new <see cref="MultiSortedSetDocValues"/> over <paramref name="values"/> </summary>
internal MultiSortedSetDocValues(SortedSetDocValues[] values, int[] docStarts, OrdinalMap mapping)
{
if (Debugging.AssertsEnabled)
{
Debugging.Assert(values.Length == mapping.ordDeltas.Length);
Debugging.Assert(docStarts.Length == values.Length + 1);
}
this.values = values;
this.docStarts = docStarts;
this.mapping = mapping;
}
public override long NextOrd()
{
long segmentOrd = values[currentSubIndex].NextOrd();
if (segmentOrd == NO_MORE_ORDS)
{
return segmentOrd;
}
else
{
return mapping.GetGlobalOrd(currentSubIndex, segmentOrd);
}
}
public override void SetDocument(int docID)
{
currentSubIndex = ReaderUtil.SubIndex(docID, docStarts);
values[currentSubIndex].SetDocument(docID - docStarts[currentSubIndex]);
}
public override void LookupOrd(long ord, BytesRef result)
{
int subIndex = mapping.GetFirstSegmentNumber(ord);
long segmentOrd = mapping.GetFirstSegmentOrd(ord);
values[subIndex].LookupOrd(segmentOrd, result);
}
public override long ValueCount => mapping.ValueCount;
}
}
}