using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.QueryParsers.Classic;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.Util;
using NUnit.Framework;
namespace Lucene.Net.QueryParsers.Analyzing
[SuppressCodecs("Lucene3x")] // binary terms
public class TestAnalyzingQueryParser : LuceneTestCase
private readonly static string FIELD = "field";
private Analyzer a;
private string[] wildcardInput;
private string[] wildcardExpected;
private string[] prefixInput;
private string[] prefixExpected;
private string[] rangeInput;
private string[] rangeExpected;
private string[] fuzzyInput;
private string[] fuzzyExpected;
private IDictionary<string, string> wildcardEscapeHits = new Dictionary<string, string>();
private IDictionary<string, string> wildcardEscapeMisses = new Dictionary<string, string>();
public override void SetUp()
wildcardInput = new string[] { "*bersetzung über*ung",
"Mötley Cr\u00fce Mötl?* Crü?", "Renée Zellweger Ren?? Zellw?ger" };
wildcardExpected = new string[] { "*bersetzung uber*ung", "motley crue motl?* cru?",
"renee zellweger ren?? zellw?ger" };
prefixInput = new string[] { "übersetzung übersetz*",
"Mötley Crüe Mötl* crü*", "René? Zellw*" };
prefixExpected = new string[] { "ubersetzung ubersetz*", "motley crue motl* cru*",
"rene? zellw*" };
rangeInput = new string[] { "[aa TO bb]", "{Anaïs TO Zoé}" };
rangeExpected = new string[] { "[aa TO bb]", "{anais TO zoe}" };
fuzzyInput = new string[] { "Übersetzung Übersetzung~0.9",
"Mötley Crüe Mötley~0.75 Crüe~0.5",
"Renée Zellweger Renée~0.9 Zellweger~" };
fuzzyExpected = new string[] { "ubersetzung ubersetzung~1",
"motley crue motley~1 crue~2", "renee zellweger renee~0 zellweger~2" };
wildcardEscapeHits["mö*tley"] = "moatley";
// need to have at least one genuine wildcard to trigger the wildcard analysis
// hence the * before the y
wildcardEscapeHits["mö\\*tl*y"] = "mo*tley";
// escaped backslash then true wildcard
wildcardEscapeHits["mö\\\\*tley"] = "mo\\atley";
// escaped wildcard then true wildcard
wildcardEscapeHits["mö\\??ley"] = "mo?tley";
// the first is an escaped * which should yield a miss
wildcardEscapeMisses["mö\\*tl*y"] = "moatley";
a = new ASCIIAnalyzer();
public virtual void TestSingleChunkExceptions()
bool ex = false;
string termStr = "the*tre";
Analyzer stopsAnalyzer = new MockAnalyzer
(Random, MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET);
string q = ParseWithAnalyzingQueryParser(termStr, stopsAnalyzer, true);
catch (ParseException e)
if (e.Message.Contains("returned nothing"))
ex = true;
assertEquals("Should have returned nothing", true, ex);
ex = false;
AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, a);
qp.AnalyzeSingleChunk(FIELD, "", "not a single chunk");
catch (ParseException e)
if (e.Message.Contains("multiple terms"))
ex = true;
assertEquals("Should have produced multiple terms", true, ex);
public virtual void TestWildcardAlone()
//seems like crazy edge case, but can be useful in concordance
bool pex = false;
Query q = GetAnalyzedQuery("*", a, false);
catch (ParseException /*e*/)
pex = true;
assertEquals("Wildcard alone with allowWildcard=false", true, pex);
pex = false;
String qString = ParseWithAnalyzingQueryParser("*", a, true);
assertEquals("Every word", "*", qString);
catch (ParseException /*e*/)
pex = true;
assertEquals("Wildcard alone with allowWildcard=true", false, pex);
public virtual void TestWildCardEscapes()
foreach (var entry in wildcardEscapeHits)
Query q = GetAnalyzedQuery(entry.Key, a, false);
assertEquals("WildcardEscapeHits: " + entry.Key, true, IsAHit(q, entry.Value, a));
foreach (var entry in wildcardEscapeMisses)
Query q = GetAnalyzedQuery(entry.Key, a, false);
assertEquals("WildcardEscapeMisses: " + entry.Key, false, IsAHit(q, entry.Value, a));
public virtual void TestWildCardQueryNoLeadingAllowed()
bool ex = false;
string q = ParseWithAnalyzingQueryParser(wildcardInput[0], a, false);
catch (ParseException /*e*/)
ex = true;
assertEquals("Testing initial wildcard not allowed",
true, ex);
public virtual void TestWildCardQuery()
for (int i = 0; i < wildcardInput.Length; i++)
assertEquals("Testing wildcards with analyzer " + a.GetType() + ", input string: "
+ wildcardInput[i], wildcardExpected[i], ParseWithAnalyzingQueryParser(wildcardInput[i], a, true));
public virtual void TestPrefixQuery()
for (int i = 0; i < prefixInput.Length; i++)
assertEquals("Testing prefixes with analyzer " + a.GetType() + ", input string: "
+ prefixInput[i], prefixExpected[i], ParseWithAnalyzingQueryParser(prefixInput[i], a, false));
public virtual void TestRangeQuery()
for (int i = 0; i < rangeInput.Length; i++)
assertEquals("Testing ranges with analyzer " + a.GetType() + ", input string: "
+ rangeInput[i], rangeExpected[i], ParseWithAnalyzingQueryParser(rangeInput[i], a, false));
public virtual void TestFuzzyQuery()
for (int i = 0; i < fuzzyInput.Length; i++)
assertEquals("Testing fuzzys with analyzer " + a.GetType() + ", input string: "
+ fuzzyInput[i], fuzzyExpected[i], ParseWithAnalyzingQueryParser(fuzzyInput[i], a, false));
private string ParseWithAnalyzingQueryParser(string s, Analyzer a, bool allowLeadingWildcard)
Query q = GetAnalyzedQuery(s, a, allowLeadingWildcard);
return q.ToString(FIELD);
private Query GetAnalyzedQuery(string s, Analyzer a, bool allowLeadingWildcard)
AnalyzingQueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, a);
qp.AllowLeadingWildcard = allowLeadingWildcard;
Query q = qp.Parse(s);
return q;
internal sealed class FoldingFilter : TokenFilter
private readonly ICharTermAttribute termAtt;
public FoldingFilter(TokenStream input)
: base(input)
termAtt = AddAttribute<ICharTermAttribute>();
public sealed override bool IncrementToken()
if (m_input.IncrementToken())
char[] term = termAtt.Buffer;
for (int i = 0; i < term.Length; i++)
switch (term[i])
case 'ü':
term[i] = 'u';
case 'ö':
term[i] = 'o';
case 'é':
term[i] = 'e';
case 'ï':
term[i] = 'i';
return true;
return false;
internal sealed class ASCIIAnalyzer : Analyzer
protected internal override TokenStreamComponents CreateComponents(string fieldName, System.IO.TextReader reader)
Tokenizer result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
return new TokenStreamComponents(result, new FoldingFilter(result));
// LUCENE-4176
public virtual void TestByteTerms()
string s = "เข";
Analyzer analyzer = new MockBytesAnalyzer();
Classic.QueryParser qp = new AnalyzingQueryParser(TEST_VERSION_CURRENT, FIELD, analyzer);
Query q = qp.Parse("[เข TO เข]");
assertEquals(true, IsAHit(q, s, analyzer));
private bool IsAHit(Query q, string content, Analyzer analyzer)
int hits;
using (Directory ramDir = NewDirectory())
using (RandomIndexWriter writer = new RandomIndexWriter(
Random, ramDir, analyzer))
Document doc = new Document();
FieldType fieldType = new FieldType();
fieldType.IsIndexed = (true);
fieldType.IsTokenized = (true);
fieldType.IsStored = (true);
Field field = new Field(FIELD, content, fieldType);
using DirectoryReader ir = DirectoryReader.Open(ramDir);
IndexSearcher @is = new IndexSearcher(ir);
hits = @is.Search(q, 10).TotalHits;
if (hits == 1)
return true;
return false;