src/Lucene.Net/Index/MultiFields.cs - lucenenet - Git at Google

 using Lucene.Net.Util;
 using System.Collections.Concurrent;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Linq;
 using JCG = J2N.Collections.Generic;

 namespace Lucene.Net.Index
 {
     /*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
      * The ASF licenses this file to You under the Apache License, Version 2.0
      * (the "License"); you may not use this file except in compliance with
      * the License.  You may obtain a copy of the License at
      *
      *     http://www.apache.org/licenses/LICENSE-2.0
      *
      * Unless required by applicable law or agreed to in writing, software
      * distributed under the License is distributed on an "AS IS" BASIS,
      * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      * See the License for the specific language governing permissions and
      * limitations under the License.
      */

     using IBits = Lucene.Net.Util.IBits;
     using BytesRef = Lucene.Net.Util.BytesRef;

     /// <summary>
     /// Exposes flex API, merged from flex API of sub-segments.
     /// This is useful when you're interacting with an
     /// <see cref="IndexReader"/> implementation that consists of sequential
     /// sub-readers (eg <see cref="DirectoryReader"/> or
     /// <see cref="MultiReader"/>).
     ///
     /// <para/><b>NOTE</b>: for composite readers, you'll get better
     /// performance by gathering the sub readers using
     /// <see cref="IndexReader.Context"/> to get the
     /// atomic leaves and then operate per-AtomicReader,
     /// instead of using this class.
     /// <para/>
     /// @lucene.experimental
     /// </summary>

     public sealed class MultiFields : Fields
     {
         private readonly Fields[] subs;
         private readonly ReaderSlice[] subSlices;
         private readonly IDictionary<string, Terms> terms = new ConcurrentDictionary<string, Terms>();

         /// <summary>
         /// Returns a single <see cref="Fields"/> instance for this
         /// reader, merging fields/terms/docs/positions on the
         /// fly.  This method will return <c>null</c> if the reader
         /// has no postings.
         ///
         /// <para/><b>NOTE</b>: this is a slow way to access postings.
         /// It's better to get the sub-readers and iterate through them
         /// yourself.
         /// </summary>
         public static Fields GetFields(IndexReader reader)
         {
             var leaves = reader.Leaves;
             switch (leaves.Count)
             {
                 case 0:
                     // no fields
                     return null;

                 case 1:
                     // already an atomic reader / reader with one leave
                     return leaves[0].AtomicReader.Fields;

                 default:
                     IList<Fields> fields = new List<Fields>();
                     IList<ReaderSlice> slices = new List<ReaderSlice>();
                     foreach (AtomicReaderContext ctx in leaves)
                     {
                         AtomicReader r = ctx.AtomicReader;
                         Fields f = r.Fields;
                         if (f != null)
                         {
                             fields.Add(f);
                             slices.Add(new ReaderSlice(ctx.DocBase, r.MaxDoc, fields.Count - 1));
                         }
                     }
                     if (fields.Count == 0)
                     {
                         return null;
                     }
                     else if (fields.Count == 1)
                     {
                         return fields[0];
                     }
                     else
                     {
                         return new MultiFields(fields.ToArray(/*Fields.EMPTY_ARRAY*/), slices.ToArray(/*ReaderSlice.EMPTY_ARRAY*/));
                     }
             }
         }

         /// <summary>
         /// Returns a single <see cref="IBits"/> instance for this
         /// reader, merging live Documents on the
         /// fly.  This method will return <c>null</c> if the reader
         /// has no deletions.
         ///
         /// <para/><b>NOTE</b>: this is a very slow way to access live docs.
         /// For example, each <see cref="IBits"/> access will require a binary search.
         /// It's better to get the sub-readers and iterate through them
         /// yourself.
         /// </summary>
         public static IBits GetLiveDocs(IndexReader reader)
         {
             if (reader.HasDeletions)
             {
                 IList<AtomicReaderContext> leaves = reader.Leaves;
                 int size = leaves.Count;
                 Debug.Assert(size > 0, "A reader with deletions must have at least one leave");
                 if (size == 1)
                 {
                     return leaves[0].AtomicReader.LiveDocs;
                 }
                 var liveDocs = new IBits[size];
                 int[] starts = new int[size + 1];
                 for (int i = 0; i < size; i++)
                 {
                     // record all liveDocs, even if they are null
                     AtomicReaderContext ctx = leaves[i];
                     liveDocs[i] = ctx.AtomicReader.LiveDocs;
                     starts[i] = ctx.DocBase;
                 }
                 starts[size] = reader.MaxDoc;
                 return new MultiBits(liveDocs, starts, true);
             }
             else
             {
                 return null;
             }
         }

         /// <summary>
         /// this method may return <c>null</c> if the field does not exist. </summary>
         public static Terms GetTerms(IndexReader r, string field)
         {
             Fields fields = GetFields(r);
             if (fields == null)
             {
                 return null;
             }
             else
             {
                 return fields.GetTerms(field);
             }
         }

         /// <summary>
         /// Returns <see cref="DocsEnum"/> for the specified field &amp;
         /// term.  This will return <c>null</c> if the field or term does
         /// not exist.
         /// </summary>
         public static DocsEnum GetTermDocsEnum(IndexReader r, IBits liveDocs, string field, BytesRef term)
         {
             return GetTermDocsEnum(r, liveDocs, field, term, DocsFlags.FREQS);
         }

         /// <summary>
         /// Returns <see cref="DocsEnum"/> for the specified field &amp;
         /// term, with control over whether freqs are required.
         /// Some codecs may be able to optimize their
         /// implementation when freqs are not required.  This will
         /// return <c>null</c> if the field or term does not exist.  See
         /// <see cref="TermsEnum.Docs(IBits, DocsEnum, DocsFlags)"/>.
         /// </summary>
         public static DocsEnum GetTermDocsEnum(IndexReader r, IBits liveDocs, string field, BytesRef term, DocsFlags flags)
         {
             Debug.Assert(field != null);
             Debug.Assert(term != null);
             Terms terms = GetTerms(r, field);
             if (terms != null)
             {
                 TermsEnum termsEnum = terms.GetIterator(null);
                 if (termsEnum.SeekExact(term))
                 {
                     return termsEnum.Docs(liveDocs, null, flags);
                 }
             }
             return null;
         }

         /// <summary>
         /// Returns <see cref="DocsAndPositionsEnum"/> for the specified
         /// field &amp; term.  This will return <c>null</c> if the field or
         /// term does not exist or positions were not indexed. </summary>
         /// <seealso cref="GetTermPositionsEnum(IndexReader, IBits, string, BytesRef, DocsAndPositionsFlags)"/>
         public static DocsAndPositionsEnum GetTermPositionsEnum(IndexReader r, IBits liveDocs, string field, BytesRef term)
         {
             return GetTermPositionsEnum(r, liveDocs, field, term, DocsAndPositionsFlags.OFFSETS | DocsAndPositionsFlags.PAYLOADS);
         }

         /// <summary>
         /// Returns <see cref="DocsAndPositionsEnum"/> for the specified
         /// field &amp; term, with control over whether offsets and payloads are
         /// required.  Some codecs may be able to optimize
         /// their implementation when offsets and/or payloads are not
         /// required. This will return <c>null</c> if the field or term does not
         /// exist or positions were not indexed. See
         /// <see cref="TermsEnum.DocsAndPositions(IBits, DocsAndPositionsEnum, DocsAndPositionsFlags)"/>.
         /// </summary>
         public static DocsAndPositionsEnum GetTermPositionsEnum(IndexReader r, IBits liveDocs, string field, BytesRef term, DocsAndPositionsFlags flags)
         {
             Debug.Assert(field != null);
             Debug.Assert(term != null);
             Terms terms = GetTerms(r, field);
             if (terms != null)
             {
                 TermsEnum termsEnum = terms.GetIterator(null);
                 if (termsEnum.SeekExact(term))
                 {
                     return termsEnum.DocsAndPositions(liveDocs, null, flags);
                 }
             }
             return null;
         }

         /// <summary>
         /// Expert: construct a new <see cref="MultiFields"/> instance directly.
         /// <para/>
         /// @lucene.internal
         /// </summary>
         // TODO: why is this public?
         public MultiFields(Fields[] subs, ReaderSlice[] subSlices)
         {
             this.subs = subs;
             this.subSlices = subSlices;
         }

         public override IEnumerator<string> GetEnumerator()
         {
             IEnumerator<string>[] subIterators = new IEnumerator<string>[subs.Length];
             for (int i = 0; i < subs.Length; i++)
             {
                 subIterators[i] = subs[i].GetEnumerator();
             }
             return new MergedIterator<string>(subIterators);
         }

         public override Terms GetTerms(string field)
         {
             Terms result;
             if (terms.TryGetValue(field, out result) && result != null)
             {
                 return result;
             }

             // Lazy init: first time this field is requested, we
             // create & add to terms:
             IList<Terms> subs2 = new List<Terms>();
             IList<ReaderSlice> slices2 = new List<ReaderSlice>();

             // Gather all sub-readers that share this field
             for (int i = 0; i < subs.Length; i++)
             {
                 Terms terms = subs[i].GetTerms(field);
                 if (terms != null)
                 {
                     subs2.Add(terms);
                     slices2.Add(subSlices[i]);
                 }
             }
             if (subs2.Count == 0)
             {
                 result = null;
                 // don't cache this case with an unbounded cache, since the number of fields that don't exist
                 // is unbounded.
             }
             else
             {
                 result = new MultiTerms(subs2.ToArray(/*Terms.EMPTY_ARRAY*/), slices2.ToArray(/*ReaderSlice.EMPTY_ARRAY*/));
                 terms[field] = result;
             }

             return result;
         }

         public override int Count
         {
             get { return -1; }
         }

         /// <summary>
         /// Call this to get the (merged) <see cref="FieldInfos"/> for a
         /// composite reader.
         /// <para/>
         /// NOTE: the returned field numbers will likely not
         /// correspond to the actual field numbers in the underlying
         /// readers, and codec metadata (<see cref="FieldInfo.GetAttribute(string)"/>)
         /// will be unavailable.
         /// </summary>
         public static FieldInfos GetMergedFieldInfos(IndexReader reader)
         {
             var builder = new FieldInfos.Builder();
             foreach (AtomicReaderContext ctx in reader.Leaves)
             {
                 builder.Add(ctx.AtomicReader.FieldInfos);
             }
             return builder.Finish();
         }

         /// <summary>
         /// Call this to get the (merged) <see cref="FieldInfos"/> representing the
         /// set of indexed fields <b>only</b> for a composite reader.
         /// <para/>
         /// NOTE: the returned field numbers will likely not
         /// correspond to the actual field numbers in the underlying
         /// readers, and codec metadata (<see cref="FieldInfo.GetAttribute(string)"/>)
         /// will be unavailable.
         /// </summary>
         public static ICollection<string> GetIndexedFields(IndexReader reader)
         {
             ICollection<string> fields = new JCG.HashSet<string>();
             foreach (FieldInfo fieldInfo in GetMergedFieldInfos(reader))
             {
                 if (fieldInfo.IsIndexed)
                 {
                     fields.Add(fieldInfo.Name);
                 }
             }
             return fields;
         }
     }
 }
	using Lucene.Net.Util;
	using System.Collections.Concurrent;
	using System.Collections.Generic;
	using System.Diagnostics;
	using System.Linq;
	using JCG = J2N.Collections.Generic;

	namespace Lucene.Net.Index
	{
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	using IBits = Lucene.Net.Util.IBits;
	using BytesRef = Lucene.Net.Util.BytesRef;

	/// <summary>
	/// Exposes flex API, merged from flex API of sub-segments.
	/// This is useful when you're interacting with an
	/// <see cref="IndexReader"/> implementation that consists of sequential
	/// sub-readers (eg <see cref="DirectoryReader"/> or
	/// <see cref="MultiReader"/>).
	///
	/// <para/><b>NOTE</b>: for composite readers, you'll get better
	/// performance by gathering the sub readers using
	/// <see cref="IndexReader.Context"/> to get the
	/// atomic leaves and then operate per-AtomicReader,
	/// instead of using this class.
	/// <para/>
	/// @lucene.experimental
	/// </summary>

	public sealed class MultiFields : Fields
	{
	private readonly Fields[] subs;
	private readonly ReaderSlice[] subSlices;
	private readonly IDictionary<string, Terms> terms = new ConcurrentDictionary<string, Terms>();

	/// <summary>
	/// Returns a single <see cref="Fields"/> instance for this
	/// reader, merging fields/terms/docs/positions on the
	/// fly. This method will return <c>null</c> if the reader
	/// has no postings.
	///
	/// <para/><b>NOTE</b>: this is a slow way to access postings.
	/// It's better to get the sub-readers and iterate through them
	/// yourself.
	/// </summary>
	public static Fields GetFields(IndexReader reader)
	{
	var leaves = reader.Leaves;
	switch (leaves.Count)
	{
	case 0:
	// no fields
	return null;

	case 1:
	// already an atomic reader / reader with one leave
	return leaves[0].AtomicReader.Fields;

	default:
	IList<Fields> fields = new List<Fields>();
	IList<ReaderSlice> slices = new List<ReaderSlice>();
	foreach (AtomicReaderContext ctx in leaves)
	{
	AtomicReader r = ctx.AtomicReader;
	Fields f = r.Fields;
	if (f != null)
	{
	fields.Add(f);
	slices.Add(new ReaderSlice(ctx.DocBase, r.MaxDoc, fields.Count - 1));
	}
	}
	if (fields.Count == 0)
	{
	return null;
	}
	else if (fields.Count == 1)
	{
	return fields[0];
	}
	else
	{
	return new MultiFields(fields.ToArray(/Fields.EMPTY_ARRAY/), slices.ToArray(/ReaderSlice.EMPTY_ARRAY/));
	}
	}
	}

	/// <summary>
	/// Returns a single <see cref="IBits"/> instance for this
	/// reader, merging live Documents on the
	/// fly. This method will return <c>null</c> if the reader
	/// has no deletions.
	///
	/// <para/><b>NOTE</b>: this is a very slow way to access live docs.
	/// For example, each <see cref="IBits"/> access will require a binary search.
	/// It's better to get the sub-readers and iterate through them
	/// yourself.
	/// </summary>
	public static IBits GetLiveDocs(IndexReader reader)
	{
	if (reader.HasDeletions)
	{
	IList<AtomicReaderContext> leaves = reader.Leaves;
	int size = leaves.Count;
	Debug.Assert(size > 0, "A reader with deletions must have at least one leave");
	if (size == 1)
	{
	return leaves[0].AtomicReader.LiveDocs;
	}
	var liveDocs = new IBits[size];
	int[] starts = new int[size + 1];
	for (int i = 0; i < size; i++)
	{
	// record all liveDocs, even if they are null
	AtomicReaderContext ctx = leaves[i];
	liveDocs[i] = ctx.AtomicReader.LiveDocs;
	starts[i] = ctx.DocBase;
	}
	starts[size] = reader.MaxDoc;
	return new MultiBits(liveDocs, starts, true);
	}
	else
	{
	return null;
	}
	}

	/// <summary>
	/// this method may return <c>null</c> if the field does not exist. </summary>
	public static Terms GetTerms(IndexReader r, string field)
	{
	Fields fields = GetFields(r);
	if (fields == null)
	{
	return null;
	}
	else
	{
	return fields.GetTerms(field);
	}
	}

	/// <summary>
	/// Returns <see cref="DocsEnum"/> for the specified field &
	/// term. This will return <c>null</c> if the field or term does
	/// not exist.
	/// </summary>
	public static DocsEnum GetTermDocsEnum(IndexReader r, IBits liveDocs, string field, BytesRef term)
	{
	return GetTermDocsEnum(r, liveDocs, field, term, DocsFlags.FREQS);
	}

	/// <summary>
	/// Returns <see cref="DocsEnum"/> for the specified field &
	/// term, with control over whether freqs are required.
	/// Some codecs may be able to optimize their
	/// implementation when freqs are not required. This will
	/// return <c>null</c> if the field or term does not exist. See
	/// <see cref="TermsEnum.Docs(IBits, DocsEnum, DocsFlags)"/>.
	/// </summary>
	public static DocsEnum GetTermDocsEnum(IndexReader r, IBits liveDocs, string field, BytesRef term, DocsFlags flags)
	{
	Debug.Assert(field != null);
	Debug.Assert(term != null);
	Terms terms = GetTerms(r, field);
	if (terms != null)
	{
	TermsEnum termsEnum = terms.GetIterator(null);
	if (termsEnum.SeekExact(term))
	{
	return termsEnum.Docs(liveDocs, null, flags);
	}
	}
	return null;
	}

	/// <summary>
	/// Returns <see cref="DocsAndPositionsEnum"/> for the specified
	/// field & term. This will return <c>null</c> if the field or
	/// term does not exist or positions were not indexed. </summary>
	/// <seealso cref="GetTermPositionsEnum(IndexReader, IBits, string, BytesRef, DocsAndPositionsFlags)"/>
	public static DocsAndPositionsEnum GetTermPositionsEnum(IndexReader r, IBits liveDocs, string field, BytesRef term)
	{
	return GetTermPositionsEnum(r, liveDocs, field, term, DocsAndPositionsFlags.OFFSETS \| DocsAndPositionsFlags.PAYLOADS);
	}

	/// <summary>
	/// Returns <see cref="DocsAndPositionsEnum"/> for the specified
	/// field & term, with control over whether offsets and payloads are
	/// required. Some codecs may be able to optimize
	/// their implementation when offsets and/or payloads are not
	/// required. This will return <c>null</c> if the field or term does not
	/// exist or positions were not indexed. See
	/// <see cref="TermsEnum.DocsAndPositions(IBits, DocsAndPositionsEnum, DocsAndPositionsFlags)"/>.
	/// </summary>
	public static DocsAndPositionsEnum GetTermPositionsEnum(IndexReader r, IBits liveDocs, string field, BytesRef term, DocsAndPositionsFlags flags)
	{
	Debug.Assert(field != null);
	Debug.Assert(term != null);
	Terms terms = GetTerms(r, field);
	if (terms != null)
	{
	TermsEnum termsEnum = terms.GetIterator(null);
	if (termsEnum.SeekExact(term))
	{
	return termsEnum.DocsAndPositions(liveDocs, null, flags);
	}
	}
	return null;
	}

	/// <summary>
	/// Expert: construct a new <see cref="MultiFields"/> instance directly.
	/// <para/>
	/// @lucene.internal
	/// </summary>
	// TODO: why is this public?
	public MultiFields(Fields[] subs, ReaderSlice[] subSlices)
	{
	this.subs = subs;
	this.subSlices = subSlices;
	}

	public override IEnumerator<string> GetEnumerator()
	{
	IEnumerator<string>[] subIterators = new IEnumerator<string>[subs.Length];
	for (int i = 0; i < subs.Length; i++)
	{
	subIterators[i] = subs[i].GetEnumerator();
	}
	return new MergedIterator<string>(subIterators);
	}

	public override Terms GetTerms(string field)
	{
	Terms result;
	if (terms.TryGetValue(field, out result) && result != null)
	{
	return result;
	}

	// Lazy init: first time this field is requested, we
	// create & add to terms:
	IList<Terms> subs2 = new List<Terms>();
	IList<ReaderSlice> slices2 = new List<ReaderSlice>();

	// Gather all sub-readers that share this field
	for (int i = 0; i < subs.Length; i++)
	{
	Terms terms = subs[i].GetTerms(field);
	if (terms != null)
	{
	subs2.Add(terms);
	slices2.Add(subSlices[i]);
	}
	}
	if (subs2.Count == 0)
	{
	result = null;
	// don't cache this case with an unbounded cache, since the number of fields that don't exist
	// is unbounded.
	}
	else
	{
	result = new MultiTerms(subs2.ToArray(/Terms.EMPTY_ARRAY/), slices2.ToArray(/ReaderSlice.EMPTY_ARRAY/));
	terms[field] = result;
	}

	return result;
	}

	public override int Count
	{
	get { return -1; }
	}

	/// <summary>
	/// Call this to get the (merged) <see cref="FieldInfos"/> for a
	/// composite reader.
	/// <para/>
	/// NOTE: the returned field numbers will likely not
	/// correspond to the actual field numbers in the underlying
	/// readers, and codec metadata (<see cref="FieldInfo.GetAttribute(string)"/>)
	/// will be unavailable.
	/// </summary>
	public static FieldInfos GetMergedFieldInfos(IndexReader reader)
	{
	var builder = new FieldInfos.Builder();
	foreach (AtomicReaderContext ctx in reader.Leaves)
	{
	builder.Add(ctx.AtomicReader.FieldInfos);
	}
	return builder.Finish();
	}

	/// <summary>
	/// Call this to get the (merged) <see cref="FieldInfos"/> representing the
	/// set of indexed fields <b>only</b> for a composite reader.
	/// <para/>
	/// NOTE: the returned field numbers will likely not
	/// correspond to the actual field numbers in the underlying
	/// readers, and codec metadata (<see cref="FieldInfo.GetAttribute(string)"/>)
	/// will be unavailable.
	/// </summary>
	public static ICollection<string> GetIndexedFields(IndexReader reader)
	{
	ICollection<string> fields = new JCG.HashSet<string>();
	foreach (FieldInfo fieldInfo in GetMergedFieldInfos(reader))
	{
	if (fieldInfo.IsIndexed)
	{
	fields.Add(fieldInfo.Name);
	}
	}
	return fields;
	}
	}
	}