blob: 3bb322c16ab5314b588072712b996bac58e38c77 [file] [log] [blame]
using System.Collections.Generic;
using System.IO;
using JCG = J2N.Collections.Generic;
namespace Lucene.Net.Search.Payloads
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using AtomicReaderContext = Lucene.Net.Index.AtomicReaderContext;
using IndexReaderContext = Lucene.Net.Index.IndexReaderContext;
using SpanNearQuery = Lucene.Net.Search.Spans.SpanNearQuery;
using SpanOrQuery = Lucene.Net.Search.Spans.SpanOrQuery;
using SpanQuery = Lucene.Net.Search.Spans.SpanQuery;
using Spans = Lucene.Net.Search.Spans.Spans;
using SpanTermQuery = Lucene.Net.Search.Spans.SpanTermQuery;
using Term = Lucene.Net.Index.Term;
using TermContext = Lucene.Net.Index.TermContext;
/// <summary>
/// Experimental class to get set of payloads for most standard Lucene queries.
/// Operates like Highlighter - <see cref="Index.IndexReader"/> should only contain doc of interest,
/// best to use MemoryIndex.
/// <para/>
/// @lucene.experimental
/// </summary>
public class PayloadSpanUtil
{
private readonly IndexReaderContext context; // LUCENENET: marked readonly
/// <param name="context">
/// that contains doc with payloads to extract
/// </param>
/// <seealso cref="Index.IndexReader.Context"/>
public PayloadSpanUtil(IndexReaderContext context)
{
this.context = context;
}
/// <summary>
/// Query should be rewritten for wild/fuzzy support.
/// </summary>
/// <param name="query"> rewritten query </param>
/// <returns> payloads Collection </returns>
/// <exception cref="IOException"> if there is a low-level I/O error </exception>
public virtual ICollection<byte[]> GetPayloadsForQuery(Query query)
{
var payloads = new List<byte[]>();
QueryToSpanQuery(query, payloads);
return payloads;
}
private void QueryToSpanQuery(Query query, ICollection<byte[]> payloads)
{
if (query is BooleanQuery booleanQuery)
{
BooleanClause[] queryClauses = booleanQuery.GetClauses();
for (int i = 0; i < queryClauses.Length; i++)
{
if (!queryClauses[i].IsProhibited)
{
QueryToSpanQuery(queryClauses[i].Query, payloads);
}
}
}
else if (query is PhraseQuery phraseQuery)
{
Term[] phraseQueryTerms = phraseQuery.GetTerms();
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.Length];
for (int i = 0; i < phraseQueryTerms.Length; i++)
{
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
}
int slop = phraseQuery.Slop;
bool inorder = false;
if (slop == 0)
{
inorder = true;
}
SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder) { Boost = query.Boost };
GetPayloads(payloads, sp);
}
else if (query is TermQuery termQuery)
{
SpanTermQuery stq = new SpanTermQuery(termQuery.Term) { Boost = query.Boost };
GetPayloads(payloads, stq);
}
else if (query is SpanQuery spanQuery)
{
GetPayloads(payloads, spanQuery);
}
else if (query is FilteredQuery filteredQuery)
{
QueryToSpanQuery(filteredQuery.Query, payloads);
}
else if (query is DisjunctionMaxQuery disjunctionMaxQuery)
{
foreach (var q in disjunctionMaxQuery)
{
QueryToSpanQuery(q, payloads);
}
}
else if (query is MultiPhraseQuery mpq)
{
IList<Term[]> termArrays = mpq.GetTermArrays();
int[] positions = mpq.GetPositions();
if (positions.Length > 0)
{
int maxPosition = positions[positions.Length - 1];
for (int i = 0; i < positions.Length - 1; ++i)
{
if (positions[i] > maxPosition)
{
maxPosition = positions[i];
}
}
// LUCENENET: Changed from Query to SpanQuery to eliminate the O(n) cast
// required to instantiate SpanOrQuery below
IList<SpanQuery>[] disjunctLists = new List<SpanQuery>[maxPosition + 1];
int distinctPositions = 0;
for (int i = 0; i < termArrays.Count; ++i)
{
Term[] termArray = termArrays[i];
IList<SpanQuery> disjuncts = disjunctLists[positions[i]]; // LUCENENET: Changed from Query to SpanQuery
if (disjuncts == null)
{
disjuncts = (disjunctLists[positions[i]] = new List<SpanQuery>(termArray.Length)); // LUCENENET: Changed from Query to SpanQuery
++distinctPositions;
}
foreach (Term term in termArray)
{
disjuncts.Add(new SpanTermQuery(term));
}
}
int positionGaps = 0;
int position = 0;
SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (int i = 0; i < disjunctLists.Length; ++i)
{
IList<SpanQuery> disjuncts = disjunctLists[i]; // LUCENENET: Changed from Query to SpanQuery
if (disjuncts != null)
{
clauses[position++] = new SpanOrQuery(disjuncts);
}
else
{
++positionGaps;
}
}
int slop = mpq.Slop;
bool inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder);
sp.Boost = query.Boost;
GetPayloads(payloads, sp);
}
}
}
private void GetPayloads(ICollection<byte[]> payloads, SpanQuery query)
{
IDictionary<Term, TermContext> termContexts = new Dictionary<Term, TermContext>();
var terms = new JCG.SortedSet<Term>();
query.ExtractTerms(terms);
foreach (Term term in terms)
{
termContexts[term] = TermContext.Build(context, term);
}
foreach (AtomicReaderContext atomicReaderContext in context.Leaves)
{
Spans spans = query.GetSpans(atomicReaderContext, atomicReaderContext.AtomicReader.LiveDocs, termContexts);
while (spans.MoveNext() == true)
{
if (spans.IsPayloadAvailable)
{
var payload = spans.GetPayload();
foreach (var bytes in payload)
{
payloads.Add(bytes);
}
}
}
}
}
}
}