blob: 72d17dfc48fc61dad46ba8223d0d21410852d393 [file] [log] [blame]
using Lucene.Net.Util;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Index
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using IBits = Lucene.Net.Util.IBits;
using BytesRef = Lucene.Net.Util.BytesRef;
/// <summary>
/// Exposes flex API, merged from flex API of sub-segments.
/// This is useful when you're interacting with an
/// <see cref="IndexReader"/> implementation that consists of sequential
/// sub-readers (eg <see cref="DirectoryReader"/> or
/// <see cref="MultiReader"/>).
///
/// <para/><b>NOTE</b>: for composite readers, you'll get better
/// performance by gathering the sub readers using
/// <see cref="IndexReader.Context"/> to get the
/// atomic leaves and then operate per-AtomicReader,
/// instead of using this class.
/// <para/>
/// @lucene.experimental
/// </summary>
public sealed class MultiFields : Fields
{
private readonly Fields[] subs;
private readonly ReaderSlice[] subSlices;
private readonly IDictionary<string, Terms> terms = new ConcurrentDictionary<string, Terms>();
/// <summary>
/// Returns a single <see cref="Fields"/> instance for this
/// reader, merging fields/terms/docs/positions on the
/// fly. This method will return <c>null</c> if the reader
/// has no postings.
///
/// <para/><b>NOTE</b>: this is a slow way to access postings.
/// It's better to get the sub-readers and iterate through them
/// yourself.
/// </summary>
public static Fields GetFields(IndexReader reader)
{
var leaves = reader.Leaves;
switch (leaves.Count)
{
case 0:
// no fields
return null;
case 1:
// already an atomic reader / reader with one leave
return leaves[0].AtomicReader.Fields;
default:
IList<Fields> fields = new List<Fields>();
IList<ReaderSlice> slices = new List<ReaderSlice>();
foreach (AtomicReaderContext ctx in leaves)
{
AtomicReader r = ctx.AtomicReader;
Fields f = r.Fields;
if (f != null)
{
fields.Add(f);
slices.Add(new ReaderSlice(ctx.DocBase, r.MaxDoc, fields.Count - 1));
}
}
if (fields.Count == 0)
{
return null;
}
else if (fields.Count == 1)
{
return fields[0];
}
else
{
return new MultiFields(fields.ToArray(/*Fields.EMPTY_ARRAY*/), slices.ToArray(/*ReaderSlice.EMPTY_ARRAY*/));
}
}
}
/// <summary>
/// Returns a single <see cref="IBits"/> instance for this
/// reader, merging live Documents on the
/// fly. This method will return <c>null</c> if the reader
/// has no deletions.
///
/// <para/><b>NOTE</b>: this is a very slow way to access live docs.
/// For example, each <see cref="IBits"/> access will require a binary search.
/// It's better to get the sub-readers and iterate through them
/// yourself.
/// </summary>
public static IBits GetLiveDocs(IndexReader reader)
{
if (reader.HasDeletions)
{
IList<AtomicReaderContext> leaves = reader.Leaves;
int size = leaves.Count;
Debug.Assert(size > 0, "A reader with deletions must have at least one leave");
if (size == 1)
{
return leaves[0].AtomicReader.LiveDocs;
}
var liveDocs = new IBits[size];
int[] starts = new int[size + 1];
for (int i = 0; i < size; i++)
{
// record all liveDocs, even if they are null
AtomicReaderContext ctx = leaves[i];
liveDocs[i] = ctx.AtomicReader.LiveDocs;
starts[i] = ctx.DocBase;
}
starts[size] = reader.MaxDoc;
return new MultiBits(liveDocs, starts, true);
}
else
{
return null;
}
}
/// <summary>
/// this method may return <c>null</c> if the field does not exist. </summary>
public static Terms GetTerms(IndexReader r, string field)
{
Fields fields = GetFields(r);
if (fields == null)
{
return null;
}
else
{
return fields.GetTerms(field);
}
}
/// <summary>
/// Returns <see cref="DocsEnum"/> for the specified field &amp;
/// term. This will return <c>null</c> if the field or term does
/// not exist.
/// </summary>
public static DocsEnum GetTermDocsEnum(IndexReader r, IBits liveDocs, string field, BytesRef term)
{
return GetTermDocsEnum(r, liveDocs, field, term, DocsFlags.FREQS);
}
/// <summary>
/// Returns <see cref="DocsEnum"/> for the specified field &amp;
/// term, with control over whether freqs are required.
/// Some codecs may be able to optimize their
/// implementation when freqs are not required. This will
/// return <c>null</c> if the field or term does not exist. See
/// <see cref="TermsEnum.Docs(IBits, DocsEnum, DocsFlags)"/>.
/// </summary>
public static DocsEnum GetTermDocsEnum(IndexReader r, IBits liveDocs, string field, BytesRef term, DocsFlags flags)
{
Debug.Assert(field != null);
Debug.Assert(term != null);
Terms terms = GetTerms(r, field);
if (terms != null)
{
TermsEnum termsEnum = terms.GetIterator(null);
if (termsEnum.SeekExact(term))
{
return termsEnum.Docs(liveDocs, null, flags);
}
}
return null;
}
/// <summary>
/// Returns <see cref="DocsAndPositionsEnum"/> for the specified
/// field &amp; term. This will return <c>null</c> if the field or
/// term does not exist or positions were not indexed. </summary>
/// <seealso cref="GetTermPositionsEnum(IndexReader, IBits, string, BytesRef, DocsAndPositionsFlags)"/>
public static DocsAndPositionsEnum GetTermPositionsEnum(IndexReader r, IBits liveDocs, string field, BytesRef term)
{
return GetTermPositionsEnum(r, liveDocs, field, term, DocsAndPositionsFlags.OFFSETS | DocsAndPositionsFlags.PAYLOADS);
}
/// <summary>
/// Returns <see cref="DocsAndPositionsEnum"/> for the specified
/// field &amp; term, with control over whether offsets and payloads are
/// required. Some codecs may be able to optimize
/// their implementation when offsets and/or payloads are not
/// required. This will return <c>null</c> if the field or term does not
/// exist or positions were not indexed. See
/// <see cref="TermsEnum.DocsAndPositions(IBits, DocsAndPositionsEnum, DocsAndPositionsFlags)"/>.
/// </summary>
public static DocsAndPositionsEnum GetTermPositionsEnum(IndexReader r, IBits liveDocs, string field, BytesRef term, DocsAndPositionsFlags flags)
{
Debug.Assert(field != null);
Debug.Assert(term != null);
Terms terms = GetTerms(r, field);
if (terms != null)
{
TermsEnum termsEnum = terms.GetIterator(null);
if (termsEnum.SeekExact(term))
{
return termsEnum.DocsAndPositions(liveDocs, null, flags);
}
}
return null;
}
/// <summary>
/// Expert: construct a new <see cref="MultiFields"/> instance directly.
/// <para/>
/// @lucene.internal
/// </summary>
// TODO: why is this public?
public MultiFields(Fields[] subs, ReaderSlice[] subSlices)
{
this.subs = subs;
this.subSlices = subSlices;
}
public override IEnumerator<string> GetEnumerator()
{
IEnumerator<string>[] subIterators = new IEnumerator<string>[subs.Length];
for (int i = 0; i < subs.Length; i++)
{
subIterators[i] = subs[i].GetEnumerator();
}
return new MergedIterator<string>(subIterators);
}
public override Terms GetTerms(string field)
{
Terms result;
if (terms.TryGetValue(field, out result) && result != null)
{
return result;
}
// Lazy init: first time this field is requested, we
// create & add to terms:
IList<Terms> subs2 = new List<Terms>();
IList<ReaderSlice> slices2 = new List<ReaderSlice>();
// Gather all sub-readers that share this field
for (int i = 0; i < subs.Length; i++)
{
Terms terms = subs[i].GetTerms(field);
if (terms != null)
{
subs2.Add(terms);
slices2.Add(subSlices[i]);
}
}
if (subs2.Count == 0)
{
result = null;
// don't cache this case with an unbounded cache, since the number of fields that don't exist
// is unbounded.
}
else
{
result = new MultiTerms(subs2.ToArray(/*Terms.EMPTY_ARRAY*/), slices2.ToArray(/*ReaderSlice.EMPTY_ARRAY*/));
terms[field] = result;
}
return result;
}
public override int Count
{
get { return -1; }
}
/// <summary>
/// Call this to get the (merged) <see cref="FieldInfos"/> for a
/// composite reader.
/// <para/>
/// NOTE: the returned field numbers will likely not
/// correspond to the actual field numbers in the underlying
/// readers, and codec metadata (<see cref="FieldInfo.GetAttribute(string)"/>)
/// will be unavailable.
/// </summary>
public static FieldInfos GetMergedFieldInfos(IndexReader reader)
{
var builder = new FieldInfos.Builder();
foreach (AtomicReaderContext ctx in reader.Leaves)
{
builder.Add(ctx.AtomicReader.FieldInfos);
}
return builder.Finish();
}
/// <summary>
/// Call this to get the (merged) <see cref="FieldInfos"/> representing the
/// set of indexed fields <b>only</b> for a composite reader.
/// <para/>
/// NOTE: the returned field numbers will likely not
/// correspond to the actual field numbers in the underlying
/// readers, and codec metadata (<see cref="FieldInfo.GetAttribute(string)"/>)
/// will be unavailable.
/// </summary>
public static ICollection<string> GetIndexedFields(IndexReader reader)
{
ICollection<string> fields = new JCG.HashSet<string>();
foreach (FieldInfo fieldInfo in GetMergedFieldInfos(reader))
{
if (fieldInfo.IsIndexed)
{
fields.Add(fieldInfo.Name);
}
}
return fields;
}
}
}