blob: 8b1eb1a02ef21e555319aa470c11aa8951c74565 [file] [log] [blame]
// Lucene version compatibility level 4.8.1
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
namespace Lucene.Net.Join
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
internal class TermsIncludingScoreQuery : Query
{
private readonly string _field;
private readonly bool _multipleValuesPerDocument;
private readonly BytesRefHash _terms;
private readonly float[] _scores;
private readonly int[] _ords;
private readonly Query _originalQuery;
private readonly Query _unwrittenOriginalQuery;
internal TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
float[] scores, Query originalQuery)
{
_field = field;
_multipleValuesPerDocument = multipleValuesPerDocument;
_terms = terms;
_scores = scores;
_originalQuery = originalQuery;
_ords = terms.Sort(BytesRef.UTF8SortedAsUnicodeComparer);
_unwrittenOriginalQuery = originalQuery;
}
private TermsIncludingScoreQuery(string field, bool multipleValuesPerDocument, BytesRefHash terms,
float[] scores, int[] ords, Query originalQuery, Query unwrittenOriginalQuery)
{
_field = field;
_multipleValuesPerDocument = multipleValuesPerDocument;
_terms = terms;
_scores = scores;
_originalQuery = originalQuery;
_ords = ords;
_unwrittenOriginalQuery = unwrittenOriginalQuery;
}
public override string ToString(string @string)
{
return string.Format("TermsIncludingScoreQuery{{field={0};originalQuery={1}}}", _field,
_unwrittenOriginalQuery);
}
public override void ExtractTerms(ISet<Term> terms)
{
_originalQuery.ExtractTerms(terms);
}
public override Query Rewrite(IndexReader reader)
{
Query originalQueryRewrite = _originalQuery.Rewrite(reader);
if (originalQueryRewrite != _originalQuery)
{
Query rewritten = new TermsIncludingScoreQuery(_field, _multipleValuesPerDocument, _terms, _scores,
_ords, originalQueryRewrite, _originalQuery);
rewritten.Boost = Boost;
return rewritten;
}
return this;
}
public override bool Equals(object obj)
{
if (obj is null) return false;
if (ReferenceEquals(this, obj)) return true;
if (!base.Equals(obj)) return false;
if (obj.GetType() != GetType()) return false;
var other = (TermsIncludingScoreQuery)obj;
if (!_field.Equals(other._field, StringComparison.Ordinal))
{
return false;
}
if (!_unwrittenOriginalQuery.Equals(other._unwrittenOriginalQuery))
{
return false;
}
return true;
}
public override int GetHashCode()
{
unchecked
{
int hashCode = base.GetHashCode();
hashCode = (hashCode*397) ^ (_field != null ? _field.GetHashCode() : 0);
hashCode = (hashCode*397) ^
(_unwrittenOriginalQuery != null ? _unwrittenOriginalQuery.GetHashCode() : 0);
return hashCode;
}
}
public override Weight CreateWeight(IndexSearcher searcher)
{
Weight originalWeight = _originalQuery.CreateWeight(searcher);
return new WeightAnonymousInnerClassHelper(this, originalWeight);
}
private class WeightAnonymousInnerClassHelper : Weight
{
private readonly TermsIncludingScoreQuery outerInstance;
private readonly Weight originalWeight;
public WeightAnonymousInnerClassHelper(TermsIncludingScoreQuery outerInstance, Weight originalWeight)
{
this.outerInstance = outerInstance;
this.originalWeight = originalWeight;
}
private TermsEnum segmentTermsEnum;
public override Explanation Explain(AtomicReaderContext context, int doc)
{
SVInnerScorer scorer = (SVInnerScorer) GetBulkScorer(context, false, null);
if (scorer != null)
{
return scorer.Explain(doc);
}
return new ComplexExplanation(false, 0.0f, "Not a match");
}
public override bool ScoresDocsOutOfOrder =>
// We have optimized impls below if we are allowed
// to score out-of-order:
true;
public override Query Query => outerInstance;
public override float GetValueForNormalization()
{
return originalWeight.GetValueForNormalization() * outerInstance.Boost*outerInstance.Boost;
}
public override void Normalize(float norm, float topLevelBoost)
{
originalWeight.Normalize(norm, topLevelBoost*outerInstance.Boost);
}
public override Scorer GetScorer(AtomicReaderContext context, IBits acceptDocs)
{
Terms terms = context.AtomicReader.GetTerms(outerInstance._field);
if (terms == null)
{
return null;
}
// what is the runtime...seems ok?
long cost = context.AtomicReader.MaxDoc * terms.Count;
segmentTermsEnum = terms.GetEnumerator(segmentTermsEnum);
if (outerInstance._multipleValuesPerDocument)
{
return new MVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
}
return new SVInOrderScorer(outerInstance, this, acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc, cost);
}
public override BulkScorer GetBulkScorer(AtomicReaderContext context, bool scoreDocsInOrder, IBits acceptDocs)
{
if (scoreDocsInOrder)
{
return base.GetBulkScorer(context, scoreDocsInOrder, acceptDocs);
}
Terms terms = context.AtomicReader.GetTerms(outerInstance._field);
if (terms == null)
{
return null;
}
// what is the runtime...seems ok?
//long cost = context.AtomicReader.MaxDoc * terms.Count; // LUCENENET: IDE0059: Remove unnecessary value assignment
segmentTermsEnum = terms.GetEnumerator(segmentTermsEnum);
// Optimized impls that take advantage of docs
// being allowed to be out of order:
if (outerInstance._multipleValuesPerDocument)
{
return new MVInnerScorer(outerInstance, /*this, // LUCENENET: Never read */
acceptDocs, segmentTermsEnum, context.AtomicReader.MaxDoc /*, cost // LUCENENET: Never read */);
}
return new SVInnerScorer(outerInstance, /*this, // LUCENENET: Never read */
acceptDocs, segmentTermsEnum /*, cost // LUCENENET: Never read */);
}
}
// This impl assumes that the 'join' values are used uniquely per doc per field. Used for one to many relations.
internal class SVInnerScorer : BulkScorer
{
private readonly TermsIncludingScoreQuery outerInstance;
private readonly BytesRef _spare = new BytesRef();
private readonly IBits _acceptDocs;
private readonly TermsEnum _termsEnum;
//private readonly long _cost; // LUCENENET: Never read
private int _upto;
internal DocsEnum docsEnum;
private DocsEnum _reuse;
private int _scoreUpto;
private int _doc;
internal SVInnerScorer(TermsIncludingScoreQuery outerInstance, /* Weight weight, // LUCENENET: Never read */
IBits acceptDocs, TermsEnum termsEnum /*, long cost // LUCENENET: Never read */)
{
this.outerInstance = outerInstance;
_acceptDocs = acceptDocs;
_termsEnum = termsEnum;
//_cost = cost; // LUCENENET: Never read
_doc = -1;
}
public override bool Score(ICollector collector, int max)
{
FakeScorer fakeScorer = new FakeScorer();
collector.SetScorer(fakeScorer);
if (_doc == -1)
{
_doc = NextDocOutOfOrder();
}
while (_doc < max)
{
fakeScorer.doc = _doc;
fakeScorer._score = outerInstance._scores[outerInstance._ords[_scoreUpto]];
collector.Collect(_doc);
_doc = NextDocOutOfOrder();
}
return _doc != DocIdSetIterator.NO_MORE_DOCS;
}
private int NextDocOutOfOrder()
{
while (true)
{
if (docsEnum != null)
{
int docId = DocsEnumNextDoc();
if (docId == DocIdSetIterator.NO_MORE_DOCS)
{
docsEnum = null;
}
else
{
return _doc = docId;
}
}
if (_upto == outerInstance._terms.Count)
{
return _doc = DocIdSetIterator.NO_MORE_DOCS;
}
_scoreUpto = _upto;
if (_termsEnum.SeekExact(outerInstance._terms.Get(outerInstance._ords[_upto++], _spare)))
{
docsEnum = _reuse = _termsEnum.Docs(_acceptDocs, _reuse, DocsFlags.NONE);
}
}
}
protected virtual int DocsEnumNextDoc()
{
return docsEnum.NextDoc();
}
internal Explanation Explain(int target) // LUCENENET NOTE: changed accessibility from private to internal
{
int docId;
do
{
docId = NextDocOutOfOrder();
if (docId < target)
{
int tempDocId = docsEnum.Advance(target);
if (tempDocId == target)
{
//docId = tempDocId; // LUCENENET: IDE0059: Remove unnecessary value assignment
break;
}
}
else if (docId == target)
{
break;
}
docsEnum = null; // goto the next ord.
} while (docId != DocIdSetIterator.NO_MORE_DOCS);
return new ComplexExplanation(true, outerInstance._scores[outerInstance._ords[_scoreUpto]],
"Score based on join value " + _termsEnum.Term.Utf8ToString());
}
}
// This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted
// twice for different join values. This means that the first encountered join value determines the score of a document
// even if other join values yield a higher score.
internal class MVInnerScorer : SVInnerScorer
{
internal readonly FixedBitSet alreadyEmittedDocs;
internal MVInnerScorer(TermsIncludingScoreQuery outerInstance, /* Weight weight, // LUCENENET: Never read */
IBits acceptDocs, TermsEnum termsEnum, int maxDoc /*, long cost // LUCENENET: Never read */)
: base(outerInstance, /*weight, // LUCENENET: Never read */
acceptDocs, termsEnum /*, cost // LUCENENET: Never read */)
{
alreadyEmittedDocs = new FixedBitSet(maxDoc);
}
protected override int DocsEnumNextDoc()
{
while (true)
{
int docId = docsEnum.NextDoc();
if (docId == DocIdSetIterator.NO_MORE_DOCS)
{
return docId;
}
if (!alreadyEmittedDocs.GetAndSet(docId))
{
return docId; //if it wasn't previously set, return it
}
}
}
}
internal class SVInOrderScorer : Scorer
{
protected readonly TermsIncludingScoreQuery m_outerInstance;
internal readonly DocIdSetIterator matchingDocsIterator;
internal readonly float[] scores;
internal readonly long cost;
internal int currentDoc = -1;
internal SVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, IBits acceptDocs,
TermsEnum termsEnum, int maxDoc, long cost)
: base(weight)
{
this.m_outerInstance = outerInstance;
FixedBitSet matchingDocs = new FixedBitSet(maxDoc);
scores = new float[maxDoc];
FillDocsAndScores(matchingDocs, acceptDocs, termsEnum);
matchingDocsIterator = matchingDocs.GetIterator();
this.cost = cost;
}
protected virtual void FillDocsAndScores(FixedBitSet matchingDocs, IBits acceptDocs,
TermsEnum termsEnum)
{
BytesRef spare = new BytesRef();
DocsEnum docsEnum = null;
for (int i = 0; i < m_outerInstance._terms.Count; i++)
{
if (termsEnum.SeekExact(m_outerInstance._terms.Get(m_outerInstance._ords[i], spare)))
{
docsEnum = termsEnum.Docs(acceptDocs, docsEnum, DocsFlags.NONE);
float score = m_outerInstance._scores[m_outerInstance._ords[i]];
for (int doc = docsEnum.NextDoc();
doc != NO_MORE_DOCS;
doc = docsEnum.NextDoc())
{
matchingDocs.Set(doc);
// In the case the same doc is also related to a another doc, a score might be overwritten. I think this
// can only happen in a many-to-many relation
scores[doc] = score;
}
}
}
}
public override float GetScore()
{
return scores[currentDoc];
}
public override int Freq => 1;
public override int DocID => currentDoc;
public override int NextDoc()
{
return currentDoc = matchingDocsIterator.NextDoc();
}
public override int Advance(int target)
{
return currentDoc = matchingDocsIterator.Advance(target);
}
public override long GetCost()
{
return cost;
}
}
// This scorer deals with the fact that a document can have more than one score from multiple related documents.
internal class MVInOrderScorer : SVInOrderScorer
{
internal MVInOrderScorer(TermsIncludingScoreQuery outerInstance, Weight weight, IBits acceptDocs,
TermsEnum termsEnum, int maxDoc, long cost)
: base(outerInstance, weight, acceptDocs, termsEnum, maxDoc, cost)
{
}
protected override void FillDocsAndScores(FixedBitSet matchingDocs, IBits acceptDocs,
TermsEnum termsEnum)
{
BytesRef spare = new BytesRef();
DocsEnum docsEnum = null;
for (int i = 0; i < m_outerInstance._terms.Count; i++)
{
if (termsEnum.SeekExact(m_outerInstance._terms.Get(m_outerInstance._ords[i], spare)))
{
docsEnum = termsEnum.Docs(acceptDocs, docsEnum, DocsFlags.NONE);
float score = m_outerInstance._scores[m_outerInstance._ords[i]];
for (int doc = docsEnum.NextDoc();
doc != NO_MORE_DOCS;
doc = docsEnum.NextDoc())
{
// I prefer this:
/*if (scores[doc] < score) {
scores[doc] = score;
matchingDocs.set(doc);
}*/
// But this behaves the same as MVInnerScorer and only then the tests will pass:
if (!matchingDocs.Get(doc))
{
scores[doc] = score;
matchingDocs.Set(doc);
}
}
}
}
}
}
}
}