blob: 73ad263152bb99ac411962714c1a6730a6c356ea [file] [log] [blame]
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Index;
using Lucene.Net.Util;
using System.Collections.Generic;
namespace Lucene.Net.Search.Highlight
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// <see cref="TokenStream"/> created from a term vector field.
/// </summary>
public sealed class TokenStreamFromTermPositionVector : TokenStream
{
private readonly List<Token> positionedTokens = new List<Token>();
private IEnumerator<Token> tokensAtCurrentPosition;
private ICharTermAttribute termAttribute;
private IPositionIncrementAttribute positionIncrementAttribute;
private IOffsetAttribute offsetAttribute;
private IPayloadAttribute payloadAttribute;
///<summary>Constructor</summary>
/// <param name="vector">
/// Terms that contains the data for
/// creating the <see cref="TokenStream"/>. Must have positions and offsets.
/// </param>
public TokenStreamFromTermPositionVector(Terms vector)
{
termAttribute = AddAttribute<ICharTermAttribute>();
positionIncrementAttribute = AddAttribute<IPositionIncrementAttribute>();
offsetAttribute = AddAttribute<IOffsetAttribute>();
payloadAttribute = AddAttribute<IPayloadAttribute>();
bool hasOffsets = vector.HasOffsets;
bool hasPayloads = vector.HasPayloads;
TermsEnum termsEnum = vector.GetEnumerator();
BytesRef text;
DocsAndPositionsEnum dpEnum = null;
while (termsEnum.MoveNext())
{
text = termsEnum.Term;
dpEnum = termsEnum.DocsAndPositions(null, dpEnum);
dpEnum.NextDoc();
int freq = dpEnum.Freq;
for (int j = 0; j < freq; j++)
{
int pos = dpEnum.NextPosition();
Token token;
if (hasOffsets)
{
token = new Token(text.Utf8ToString(),
dpEnum.StartOffset,
dpEnum.EndOffset);
}
else
{
token = new Token();
token.SetEmpty().Append(text.Utf8ToString());
}
if (hasPayloads)
{
// Must make a deep copy of the returned payload,
// since D&PEnum API is allowed to re-use on every
// call:
token.Payload = BytesRef.DeepCopyOf(dpEnum.GetPayload());
}
// Yes - this is the position, not the increment! This is for
// sorting. This value
// will be corrected before use.
token.PositionIncrement = pos;
this.positionedTokens.Add(token);
}
}
CollectionUtil.TimSort(this.positionedTokens, tokenComparer);
int lastPosition = -1;
foreach (Token token in this.positionedTokens)
{
int thisPosition = token.PositionIncrement;
token.PositionIncrement = thisPosition - lastPosition;
lastPosition = thisPosition;
}
this.tokensAtCurrentPosition = this.positionedTokens.GetEnumerator();
}
private static readonly IComparer<Token> tokenComparer = new TokenComparer();
public override bool IncrementToken()
{
if (this.tokensAtCurrentPosition.MoveNext())
{
Token next = this.tokensAtCurrentPosition.Current;
ClearAttributes();
termAttribute.SetEmpty().Append(next);
positionIncrementAttribute.PositionIncrement = next.PositionIncrement;
offsetAttribute.SetOffset(next.StartOffset, next.EndOffset);
payloadAttribute.Payload = next.Payload;
return true;
}
return false;
}
public override void Reset()
{
this.tokensAtCurrentPosition = this.positionedTokens.GetEnumerator();
}
private class TokenComparer : IComparer<Token>
{
public int Compare(Token o1, Token o2)
{
return o1.PositionIncrement - o2.PositionIncrement;
}
}
}
}