blob: 2579e3e1bef7b4cf7e85c3168f485f627a12cbe9 [file] [log] [blame]
using J2N.Text;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Diagnostics;
using Lucene.Net.QueryParsers.Flexible.Core.Nodes;
using Lucene.Net.QueryParsers.Flexible.Core.Processors;
using Lucene.Net.QueryParsers.Flexible.Standard.Config;
using Lucene.Net.QueryParsers.Flexible.Standard.Nodes;
using Lucene.Net.Support;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using Operator = Lucene.Net.QueryParsers.Flexible.Standard.Config.StandardQueryConfigHandler.Operator;
namespace Lucene.Net.QueryParsers.Flexible.Standard.Processors
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/// <summary>
/// This processor verifies if <see cref="ConfigurationKeys.ANALYZER"/>
/// is defined in the <see cref="Core.Config.QueryConfigHandler"/>. If it is and the analyzer is
/// not <c>null</c>, it looks for every <see cref="FieldQueryNode"/> that is not
/// <see cref="WildcardQueryNode"/>, <see cref="FuzzyQueryNode"/> or
/// <see cref="IRangeQueryNode"/> contained in the query node tree, then it applies
/// the analyzer to that <see cref="FieldQueryNode"/> object.
/// <para/>
/// If the analyzer return only one term, the returned term is set to the
/// <see cref="FieldQueryNode"/> and it's returned.
/// <para/>
/// If the analyzer return more than one term, a <see cref="TokenizedPhraseQueryNode"/>
/// or <see cref="MultiPhraseQueryNode"/> is created, whether there is one or more
/// terms at the same position, and it's returned.
/// <para/>
/// If no term is returned by the analyzer a <see cref="NoTokenFoundQueryNode"/> object
/// is returned.
/// </summary>
/// <seealso cref="ConfigurationKeys.ANALYZER"/>
/// <seealso cref="Analyzer"/>
/// <seealso cref="TokenStream"/>
public class AnalyzerQueryNodeProcessor : QueryNodeProcessor
{
private Analyzer analyzer;
private bool positionIncrementsEnabled;
private Operator defaultOperator;
public AnalyzerQueryNodeProcessor()
{
// empty constructor
}
public override IQueryNode Process(IQueryNode queryTree)
{
Analyzer analyzer = GetQueryConfigHandler().Get(ConfigurationKeys.ANALYZER);
if (analyzer != null)
{
this.analyzer = analyzer;
this.positionIncrementsEnabled = false;
bool? positionIncrementsEnabled = GetQueryConfigHandler().Get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS);
// LUCENENET specific - rather than using null, we are relying on the behavior that the default
// value for an enum is 0 (OR in this case).
//var defaultOperator = GetQueryConfigHandler().Get(ConfigurationKeys.DEFAULT_OPERATOR);
//this.defaultOperator = defaultOperator != null ? defaultOperator.Value : Operator.OR;
this.defaultOperator = GetQueryConfigHandler().Get(ConfigurationKeys.DEFAULT_OPERATOR);
if (positionIncrementsEnabled != null)
{
this.positionIncrementsEnabled = positionIncrementsEnabled.Value;
}
if (this.analyzer != null)
{
return base.Process(queryTree);
}
}
return queryTree;
}
protected override IQueryNode PostProcessNode(IQueryNode node)
{
if (node is ITextableQueryNode
&& !(node is WildcardQueryNode)
&& !(node is FuzzyQueryNode)
&& !(node is RegexpQueryNode)
&& !(node.Parent is IRangeQueryNode))
{
FieldQueryNode fieldNode = ((FieldQueryNode)node);
string text = fieldNode.GetTextAsString();
string field = fieldNode.GetFieldAsString();
CachingTokenFilter buffer = null;
IPositionIncrementAttribute posIncrAtt = null;
int numTokens = 0;
int positionCount = 0;
bool severalTokensAtSamePosition = false;
TokenStream source = null;
try
{
source = this.analyzer.GetTokenStream(field, text);
source.Reset();
buffer = new CachingTokenFilter(source);
if (buffer.HasAttribute<IPositionIncrementAttribute>())
{
posIncrAtt = buffer.GetAttribute<IPositionIncrementAttribute>();
}
try
{
while (buffer.IncrementToken())
{
numTokens++;
int positionIncrement = (posIncrAtt != null) ? posIncrAtt
.PositionIncrement : 1;
if (positionIncrement != 0)
{
positionCount += positionIncrement;
}
else
{
severalTokensAtSamePosition = true;
}
}
}
#pragma warning disable 168
catch (IOException e)
#pragma warning restore 168
{
// ignore
}
}
catch (IOException e)
{
throw new Exception(e.ToString(), e);
}
finally
{
IOUtils.DisposeWhileHandlingException(source);
}
// rewind the buffer stream
buffer.Reset();
if (!buffer.HasAttribute<ICharTermAttribute>())
{
return new NoTokenFoundQueryNode();
}
ICharTermAttribute termAtt = buffer.GetAttribute<ICharTermAttribute>();
if (numTokens == 0)
{
return new NoTokenFoundQueryNode();
}
else if (numTokens == 1)
{
string term = null;
try
{
bool hasNext;
hasNext = buffer.IncrementToken();
if (Debugging.AssertsEnabled) Debugging.Assert(hasNext == true);
term = termAtt.ToString();
}
catch (IOException) // LUCENENET: IDE0059: Remove unnecessary value assignment
{
// safe to ignore, because we know the number of tokens
}
fieldNode.Text = term.AsCharSequence();
return fieldNode;
}
else if (severalTokensAtSamePosition || !(node is QuotedFieldQueryNode))
{
if (positionCount == 1 || !(node is QuotedFieldQueryNode))
{
// no phrase query:
if (positionCount == 1)
{
// simple case: only one position, with synonyms
List<IQueryNode> children = new List<IQueryNode>();
for (int i = 0; i < numTokens; i++)
{
string term = null;
try
{
bool hasNext = buffer.IncrementToken();
if (Debugging.AssertsEnabled) Debugging.Assert(hasNext == true);
term = termAtt.ToString();
}
catch (IOException) // LUCENENET: IDE0059: Remove unnecessary value assignment
{
// safe to ignore, because we know the number of tokens
}
children.Add(new FieldQueryNode(field, term, -1, -1));
}
return new GroupQueryNode(
new StandardBooleanQueryNode(children, positionCount == 1));
}
else
{
// multiple positions
IQueryNode q = new StandardBooleanQueryNode(Collections.EmptyList<IQueryNode>(), false);
IQueryNode currentQuery = null;
for (int i = 0; i < numTokens; i++)
{
string term = null;
try
{
bool hasNext = buffer.IncrementToken();
if (Debugging.AssertsEnabled) Debugging.Assert(hasNext == true);
term = termAtt.ToString();
}
catch (IOException) // LUCENENET: IDE0059: Remove unnecessary value assignment
{
// safe to ignore, because we know the number of tokens
}
if (posIncrAtt != null && posIncrAtt.PositionIncrement == 0)
{
if (!(currentQuery is BooleanQueryNode))
{
IQueryNode t = currentQuery;
currentQuery = new StandardBooleanQueryNode(Collections.EmptyList<IQueryNode>(), true);
((BooleanQueryNode)currentQuery).Add(t);
}
((BooleanQueryNode)currentQuery).Add(new FieldQueryNode(field, term, -1, -1));
}
else
{
if (currentQuery != null)
{
if (this.defaultOperator == Operator.OR)
{
q.Add(currentQuery);
}
else
{
q.Add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ));
}
}
currentQuery = new FieldQueryNode(field, term, -1, -1);
}
}
if (this.defaultOperator == Operator.OR)
{
q.Add(currentQuery);
}
else
{
q.Add(new ModifierQueryNode(currentQuery, Modifier.MOD_REQ));
}
if (q is BooleanQueryNode)
{
q = new GroupQueryNode(q);
}
return q;
}
}
else
{
// phrase query:
MultiPhraseQueryNode mpq = new MultiPhraseQueryNode();
List<FieldQueryNode> multiTerms = new List<FieldQueryNode>();
int position = -1;
int i = 0;
int termGroupCount = 0;
for (; i < numTokens; i++)
{
string term = null;
int positionIncrement = 1;
try
{
bool hasNext = buffer.IncrementToken();
if (Debugging.AssertsEnabled) Debugging.Assert(hasNext == true);
term = termAtt.ToString();
if (posIncrAtt != null)
{
positionIncrement = posIncrAtt.PositionIncrement;
}
}
catch (IOException) // LUCENENET: IDE0059: Remove unnecessary value assignment
{
// safe to ignore, because we know the number of tokens
}
if (positionIncrement > 0 && multiTerms.Count > 0)
{
foreach (FieldQueryNode termNode in multiTerms)
{
if (this.positionIncrementsEnabled)
{
termNode.PositionIncrement = position;
}
else
{
termNode.PositionIncrement = termGroupCount;
}
mpq.Add(termNode);
}
// Only increment once for each "group" of
// terms that were in the same position:
termGroupCount++;
multiTerms.Clear();
}
position += positionIncrement;
multiTerms.Add(new FieldQueryNode(field, term, -1, -1));
}
foreach (FieldQueryNode termNode in multiTerms)
{
if (this.positionIncrementsEnabled)
{
termNode.PositionIncrement = position;
}
else
{
termNode.PositionIncrement = termGroupCount;
}
mpq.Add(termNode);
}
return mpq;
}
}
else
{
TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode();
int position = -1;
for (int i = 0; i < numTokens; i++)
{
string term = null;
int positionIncrement = 1;
try
{
bool hasNext = buffer.IncrementToken();
if (Debugging.AssertsEnabled) Debugging.Assert(hasNext == true);
term = termAtt.ToString();
if (posIncrAtt != null)
{
positionIncrement = posIncrAtt.PositionIncrement;
}
}
catch (IOException) // LUCENENET: IDE0059: Remove unnecessary value assignment
{
// safe to ignore, because we know the number of tokens
}
FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1);
if (this.positionIncrementsEnabled)
{
position += positionIncrement;
newFieldNode.PositionIncrement = position;
}
else
{
newFieldNode.PositionIncrement = i;
}
pq.Add(newFieldNode);
}
return pq;
}
}
return node;
}
protected override IQueryNode PreProcessNode(IQueryNode node)
{
return node;
}
protected override IList<IQueryNode> SetChildrenOrder(IList<IQueryNode> children)
{
return children;
}
}
}