blob: 34e70339aaa70bcdae0b72da1bf761ffca18bc1f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using System;
using System.Collections.Generic;
using System.Linq;
using Lucene.Net.Support;
using Document = Lucene.Net.Documents.Document;
using FieldSelector = Lucene.Net.Documents.FieldSelector;
using FieldSelectorResult = Lucene.Net.Documents.FieldSelectorResult;
namespace Lucene.Net.Index
{
/// <summary>An IndexReader which reads multiple, parallel indexes. Each index added
/// must have the same number of documents, but typically each contains
/// different fields. Each document contains the union of the fields of all
/// documents with the same document number. When searching, matches for a
/// query term are from the first index added that has the field.
///
/// <p/>This is useful, e.g., with collections that have large fields which
/// change rarely and small fields that change more frequently. The smaller
/// fields may be re-indexed in a new index and both indexes may be searched
/// together.
///
/// <p/><strong>Warning:</strong> It is up to you to make sure all indexes
/// are created and modified the same way. For example, if you add
/// documents to one index, you need to add the same documents in the
/// same order to the other indexes. <em>Failure to do so will result in
/// undefined behavior</em>.
/// </summary>
public class ParallelReader:IndexReader, System.ICloneable
{
private List<IndexReader> readers = new List<IndexReader>();
private List<bool> decrefOnClose = new List<bool>(); // remember which subreaders to decRef on close
internal bool incRefReaders = false;
private SortedDictionary<string, IndexReader> fieldToReader = new SortedDictionary<string, IndexReader>();
private IDictionary<IndexReader, ICollection<string>> readerToFields = new HashMap<IndexReader, ICollection<string>>();
private List<IndexReader> storedFieldReaders = new List<IndexReader>();
private int maxDoc;
private int numDocs;
private bool hasDeletions;
/// <summary>Construct a ParallelReader.
/// <p/>Note that all subreaders are closed if this ParallelReader is closed.<p/>
/// </summary>
public ParallelReader():this(true)
{
}
/// <summary>Construct a ParallelReader. </summary>
/// <param name="closeSubReaders">indicates whether the subreaders should be closed
/// when this ParallelReader is closed
/// </param>
public ParallelReader(bool closeSubReaders):base()
{
this.incRefReaders = !closeSubReaders;
}
/// <summary>Add an IndexReader.</summary>
/// <throws> IOException if there is a low-level IO error </throws>
public virtual void Add(IndexReader reader)
{
EnsureOpen();
Add(reader, false);
}
/// <summary>Add an IndexReader whose stored fields will not be returned. This can
/// accellerate search when stored fields are only needed from a subset of
/// the IndexReaders.
///
/// </summary>
/// <throws> IllegalArgumentException if not all indexes contain the same number </throws>
/// <summary> of documents
/// </summary>
/// <throws> IllegalArgumentException if not all indexes have the same value </throws>
/// <summary> of <see cref="IndexReader.MaxDoc" />
/// </summary>
/// <throws> IOException if there is a low-level IO error </throws>
public virtual void Add(IndexReader reader, bool ignoreStoredFields)
{
EnsureOpen();
if (readers.Count == 0)
{
this.maxDoc = reader.MaxDoc;
this.numDocs = reader.NumDocs();
this.hasDeletions = reader.HasDeletions;
}
if (reader.MaxDoc != maxDoc)
// check compatibility
throw new System.ArgumentException("All readers must have same maxDoc: " + maxDoc + "!=" + reader.MaxDoc);
if (reader.NumDocs() != numDocs)
throw new System.ArgumentException("All readers must have same numDocs: " + numDocs + "!=" + reader.NumDocs());
ICollection<string> fields = reader.GetFieldNames(IndexReader.FieldOption.ALL);
readerToFields[reader] = fields;
foreach(var field in fields)
{
// update fieldToReader map
// Do a containskey firt to mimic java behavior
if (!fieldToReader.ContainsKey(field) || fieldToReader[field] == null)
fieldToReader[field] = reader;
}
if (!ignoreStoredFields)
storedFieldReaders.Add(reader); // add to storedFieldReaders
readers.Add(reader);
if (incRefReaders)
{
reader.IncRef();
}
decrefOnClose.Add(incRefReaders);
}
public override System.Object Clone()
{
try
{
return DoReopen(true);
}
catch (System.Exception ex)
{
throw new System.SystemException(ex.Message, ex);
}
}
/// <summary> Tries to reopen the subreaders.
/// <br/>
/// If one or more subreaders could be re-opened (i. e. subReader.reopen()
/// returned a new instance != subReader), then a new ParallelReader instance
/// is returned, otherwise this instance is returned.
/// <p/>
/// A re-opened instance might share one or more subreaders with the old
/// instance. Index modification operations result in undefined behavior
/// when performed before the old instance is closed.
/// (see <see cref="IndexReader.Reopen()" />).
/// <p/>
/// If subreaders are shared, then the reference count of those
/// readers is increased to ensure that the subreaders remain open
/// until the last referring reader is closed.
///
/// </summary>
/// <throws> CorruptIndexException if the index is corrupt </throws>
/// <throws> IOException if there is a low-level IO error </throws>
public override IndexReader Reopen()
{
lock (this)
{
return DoReopen(false);
}
}
protected internal virtual IndexReader DoReopen(bool doClone)
{
EnsureOpen();
bool reopened = false;
IList<IndexReader> newReaders = new List<IndexReader>();
bool success = false;
try
{
foreach(var oldReader in readers)
{
IndexReader newReader = null;
if (doClone)
{
newReader = (IndexReader) oldReader.Clone();
}
else
{
newReader = oldReader.Reopen();
}
newReaders.Add(newReader);
// if at least one of the subreaders was updated we remember that
// and return a new ParallelReader
if (newReader != oldReader)
{
reopened = true;
}
}
success = true;
}
finally
{
if (!success && reopened)
{
for (int i = 0; i < newReaders.Count; i++)
{
IndexReader r = newReaders[i];
if (r != readers[i])
{
try
{
r.Close();
}
catch (System.IO.IOException)
{
// keep going - we want to clean up as much as possible
}
}
}
}
}
if (reopened)
{
List<bool> newDecrefOnClose = new List<bool>();
ParallelReader pr = new ParallelReader();
for (int i = 0; i < readers.Count; i++)
{
IndexReader oldReader = readers[i];
IndexReader newReader = newReaders[i];
if (newReader == oldReader)
{
newDecrefOnClose.Add(true);
newReader.IncRef();
}
else
{
// this is a new subreader instance, so on close() we don't
// decRef but close it
newDecrefOnClose.Add(false);
}
pr.Add(newReader, !storedFieldReaders.Contains(oldReader));
}
pr.decrefOnClose = newDecrefOnClose;
pr.incRefReaders = incRefReaders;
return pr;
}
else
{
// No subreader was refreshed
return this;
}
}
public override int NumDocs()
{
// Don't call ensureOpen() here (it could affect performance)
return numDocs;
}
public override int MaxDoc
{
get
{
// Don't call ensureOpen() here (it could affect performance)
return maxDoc;
}
}
public override bool HasDeletions
{
get
{
// Don't call ensureOpen() here (it could affect performance)
return hasDeletions;
}
}
// check first reader
public override bool IsDeleted(int n)
{
// Don't call ensureOpen() here (it could affect performance)
if (readers.Count > 0)
return readers[0].IsDeleted(n);
return false;
}
// delete in all readers
protected internal override void DoDelete(int n)
{
foreach(var reader in readers)
{
reader.DeleteDocument(n);
}
hasDeletions = true;
}
// undeleteAll in all readers
protected internal override void DoUndeleteAll()
{
foreach(var reader in readers)
{
reader.UndeleteAll();
}
hasDeletions = false;
}
// append fields from storedFieldReaders
public override Document Document(int n, FieldSelector fieldSelector)
{
EnsureOpen();
Document result = new Document();
foreach(IndexReader reader in storedFieldReaders)
{
bool include = (fieldSelector == null);
if (!include)
{
var fields = readerToFields[reader];
foreach(var field in fields)
{
if (fieldSelector.Accept(field) != FieldSelectorResult.NO_LOAD)
{
include = true;
break;
}
}
}
if (include)
{
var fields = reader.Document(n, fieldSelector).GetFields();
foreach(var field in fields)
{
result.Add(field);
}
}
}
return result;
}
// get all vectors
public override ITermFreqVector[] GetTermFreqVectors(int n)
{
EnsureOpen();
IList<ITermFreqVector> results = new List<ITermFreqVector>();
foreach(var e in fieldToReader)
{
System.String field = e.Key;
IndexReader reader = e.Value;
ITermFreqVector vector = reader.GetTermFreqVector(n, field);
if (vector != null)
results.Add(vector);
}
return results.ToArray();
}
public override ITermFreqVector GetTermFreqVector(int n, System.String field)
{
EnsureOpen();
IndexReader reader = (fieldToReader[field]);
return reader == null?null:reader.GetTermFreqVector(n, field);
}
public override void GetTermFreqVector(int docNumber, System.String field, TermVectorMapper mapper)
{
EnsureOpen();
IndexReader reader = (fieldToReader[field]);
if (reader != null)
{
reader.GetTermFreqVector(docNumber, field, mapper);
}
}
public override void GetTermFreqVector(int docNumber, TermVectorMapper mapper)
{
EnsureOpen();
foreach(var e in fieldToReader)
{
System.String field = e.Key;
IndexReader reader = e.Value;
reader.GetTermFreqVector(docNumber, field, mapper);
}
}
public override bool HasNorms(System.String field)
{
EnsureOpen();
IndexReader reader = fieldToReader[field];
return reader != null && reader.HasNorms(field);
}
public override byte[] Norms(System.String field)
{
EnsureOpen();
IndexReader reader = fieldToReader[field];
return reader == null?null:reader.Norms(field);
}
public override void Norms(System.String field, byte[] result, int offset)
{
EnsureOpen();
IndexReader reader = fieldToReader[field];
if (reader != null)
reader.Norms(field, result, offset);
}
protected internal override void DoSetNorm(int n, System.String field, byte value_Renamed)
{
IndexReader reader = fieldToReader[field];
if (reader != null)
reader.DoSetNorm(n, field, value_Renamed);
}
public override TermEnum Terms()
{
EnsureOpen();
return new ParallelTermEnum(this);
}
public override TermEnum Terms(Term term)
{
EnsureOpen();
return new ParallelTermEnum(this, term);
}
public override int DocFreq(Term term)
{
EnsureOpen();
IndexReader reader = fieldToReader[term.Field];
return reader == null?0:reader.DocFreq(term);
}
public override TermDocs TermDocs(Term term)
{
EnsureOpen();
return new ParallelTermDocs(this, term);
}
public override TermDocs TermDocs()
{
EnsureOpen();
return new ParallelTermDocs(this);
}
public override TermPositions TermPositions(Term term)
{
EnsureOpen();
return new ParallelTermPositions(this, term);
}
public override TermPositions TermPositions()
{
EnsureOpen();
return new ParallelTermPositions(this);
}
/// <summary> Checks recursively if all subreaders are up to date. </summary>
public override bool IsCurrent()
{
foreach (var reader in readers)
{
if (!reader.IsCurrent())
{
return false;
}
}
// all subreaders are up to date
return true;
}
/// <summary> Checks recursively if all subindexes are optimized </summary>
public override bool IsOptimized()
{
foreach (var reader in readers)
{
if (!reader.IsOptimized())
{
return false;
}
}
// all subindexes are optimized
return true;
}
/// <summary>Not implemented.</summary>
/// <throws> UnsupportedOperationException </throws>
public override long Version
{
get { throw new System.NotSupportedException("ParallelReader does not support this method."); }
}
// for testing
public /*internal*/ virtual IndexReader[] GetSubReaders()
{
return readers.ToArray();
}
protected internal override void DoCommit(IDictionary<string, string> commitUserData)
{
foreach(var reader in readers)
reader.Commit(commitUserData);
}
protected internal override void DoClose()
{
lock (this)
{
for (int i = 0; i < readers.Count; i++)
{
if (decrefOnClose[i])
{
readers[i].DecRef();
}
else
{
readers[i].Close();
}
}
}
Lucene.Net.Search.FieldCache_Fields.DEFAULT.Purge(this);
}
public override System.Collections.Generic.ICollection<string> GetFieldNames(IndexReader.FieldOption fieldNames)
{
EnsureOpen();
ISet<string> fieldSet = Lucene.Net.Support.Compatibility.SetFactory.CreateHashSet<string>();
foreach(var reader in readers)
{
ICollection<string> names = reader.GetFieldNames(fieldNames);
fieldSet.UnionWith(names);
}
return fieldSet;
}
private class ParallelTermEnum : TermEnum
{
private void InitBlock(ParallelReader enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
}
private ParallelReader enclosingInstance;
public ParallelReader Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
private System.String field;
private IEnumerator<string> fieldIterator;
private TermEnum termEnum;
private bool isDisposed;
public ParallelTermEnum(ParallelReader enclosingInstance)
{
InitBlock(enclosingInstance);
try
{
field = Enclosing_Instance.fieldToReader.Keys.First();
}
catch (ArgumentOutOfRangeException)
{
// No fields, so keep field == null, termEnum == null
return;
}
if (field != null)
termEnum = Enclosing_Instance.fieldToReader[field].Terms();
}
public ParallelTermEnum(ParallelReader enclosingInstance, Term term)
{
InitBlock(enclosingInstance);
field = term.Field;
IndexReader reader = Enclosing_Instance.fieldToReader[field];
if (reader != null)
termEnum = reader.Terms(term);
}
public override bool Next()
{
if (termEnum == null)
return false;
// another term in this field?
if (termEnum.Next() && (System.Object) termEnum.Term.Field == (System.Object) field)
return true; // yes, keep going
termEnum.Close(); // close old termEnum
// find the next field with terms, if any
if (fieldIterator == null)
{
var newList = new List<string>();
if (Enclosing_Instance.fieldToReader != null && Enclosing_Instance.fieldToReader.Count > 0)
{
var comparer = Enclosing_Instance.fieldToReader.Comparer;
foreach(var entry in Enclosing_Instance.fieldToReader.Keys.Where(x => comparer.Compare(x, field) >= 0))
newList.Add(entry);
}
fieldIterator = newList.Skip(1).GetEnumerator(); // Skip field to get next one
}
while (fieldIterator.MoveNext())
{
field = fieldIterator.Current;
termEnum = Enclosing_Instance.fieldToReader[field].Terms(new Term(field));
Term term = termEnum.Term;
if (term != null && (System.Object) term.Field == (System.Object) field)
return true;
else
termEnum.Close();
}
return false; // no more fields
}
public override Term Term
{
get
{
if (termEnum == null)
return null;
return termEnum.Term;
}
}
public override int DocFreq()
{
if (termEnum == null)
return 0;
return termEnum.DocFreq();
}
protected override void Dispose(bool disposing)
{
if (isDisposed) return;
if (disposing)
{
if (termEnum != null)
termEnum.Close();
}
isDisposed = true;
}
}
// wrap a TermDocs in order to support seek(Term)
private class ParallelTermDocs : TermDocs
{
private void InitBlock(ParallelReader enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
}
private ParallelReader enclosingInstance;
public ParallelReader Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
protected internal TermDocs termDocs;
private bool isDisposed;
public ParallelTermDocs(ParallelReader enclosingInstance)
{
InitBlock(enclosingInstance);
}
public ParallelTermDocs(ParallelReader enclosingInstance, Term term)
{
InitBlock(enclosingInstance);
if(term == null)
termDocs = (Enclosing_Instance.readers.Count == 0)
? null
: Enclosing_Instance.readers[0].TermDocs(null);
else
Seek(term);
}
public virtual int Doc
{
get { return termDocs.Doc; }
}
public virtual int Freq
{
get { return termDocs.Freq; }
}
public virtual void Seek(Term term)
{
IndexReader reader = Enclosing_Instance.fieldToReader[term.Field];
termDocs = reader != null?reader.TermDocs(term):null;
}
public virtual void Seek(TermEnum termEnum)
{
Seek(termEnum.Term);
}
public virtual bool Next()
{
if (termDocs == null)
return false;
return termDocs.Next();
}
public virtual int Read(int[] docs, int[] freqs)
{
if (termDocs == null)
return 0;
return termDocs.Read(docs, freqs);
}
public virtual bool SkipTo(int target)
{
if (termDocs == null)
return false;
return termDocs.SkipTo(target);
}
[Obsolete("Use Dispose() instead")]
public virtual void Close()
{
Dispose();
}
public void Dispose()
{
Dispose(true);
}
protected virtual void Dispose(bool disposing)
{
if (isDisposed) return;
if (disposing)
{
if (termDocs != null)
termDocs.Close();
}
isDisposed = true;
}
}
private class ParallelTermPositions:ParallelTermDocs, TermPositions
{
private void InitBlock(ParallelReader enclosingInstance)
{
this.enclosingInstance = enclosingInstance;
}
private ParallelReader enclosingInstance;
public new ParallelReader Enclosing_Instance
{
get
{
return enclosingInstance;
}
}
public ParallelTermPositions(ParallelReader enclosingInstance):base(enclosingInstance)
{
InitBlock(enclosingInstance);
}
public ParallelTermPositions(ParallelReader enclosingInstance, Term term):base(enclosingInstance)
{
InitBlock(enclosingInstance);
Seek(term);
}
public override void Seek(Term term)
{
IndexReader reader = Enclosing_Instance.fieldToReader[term.Field];
termDocs = reader != null?reader.TermPositions(term):null;
}
public virtual int NextPosition()
{
// It is an error to call this if there is no next position, e.g. if termDocs==null
return ((TermPositions) termDocs).NextPosition();
}
public virtual int PayloadLength
{
get { return ((TermPositions) termDocs).PayloadLength; }
}
public virtual byte[] GetPayload(byte[] data, int offset)
{
return ((TermPositions) termDocs).GetPayload(data, offset);
}
// TODO: Remove warning after API has been finalized
public virtual bool IsPayloadAvailable
{
get { return ((TermPositions) termDocs).IsPayloadAvailable; }
}
}
}
}