blob: c6b0acd776bd9ddfbf58918ec11ca59c767c93d3 [file] [log] [blame]
using Lucene.Net.Documents;
using NUnit.Framework;
using Assert = Lucene.Net.TestFramework.Assert;
namespace Lucene.Net.Search
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using Automaton = Lucene.Net.Util.Automaton.Automaton;
using Directory = Lucene.Net.Store.Directory;
using Document = Documents.Document;
using Field = Field;
using IndexReader = Lucene.Net.Index.IndexReader;
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter;
using RegExp = Lucene.Net.Util.Automaton.RegExp;
using Term = Lucene.Net.Index.Term;
/// <summary>
/// Test the automaton query for several unicode corner cases,
/// specifically enumerating strings/indexes containing supplementary characters,
/// and the differences between UTF-8/UTF-32 and UTF-16 binary sort order.
/// </summary>
[TestFixture]
public class TestAutomatonQueryUnicode : LuceneTestCase
{
private IndexReader reader;
private IndexSearcher searcher;
private Directory directory;
private const string FN = "field";
[SetUp]
public override void SetUp()
{
base.SetUp();
directory = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, directory);
Document doc = new Document();
Field titleField = NewTextField("title", "some title", Field.Store.NO);
Field field = NewTextField(FN, "", Field.Store.NO);
Field footerField = NewTextField("footer", "a footer", Field.Store.NO);
doc.Add(titleField);
doc.Add(field);
doc.Add(footerField);
field.SetStringValue("\uD866\uDF05abcdef");
writer.AddDocument(doc);
field.SetStringValue("\uD866\uDF06ghijkl");
writer.AddDocument(doc);
// this sorts before the previous two in UTF-8/UTF-32, but after in UTF-16!!!
field.SetStringValue("\uFB94mnopqr");
writer.AddDocument(doc);
field.SetStringValue("\uFB95stuvwx"); // this one too.
writer.AddDocument(doc);
field.SetStringValue("a\uFFFCbc");
writer.AddDocument(doc);
field.SetStringValue("a\uFFFDbc");
writer.AddDocument(doc);
field.SetStringValue("a\uFFFEbc");
writer.AddDocument(doc);
field.SetStringValue("a\uFB94bc");
writer.AddDocument(doc);
field.SetStringValue("bacadaba");
writer.AddDocument(doc);
field.SetStringValue("\uFFFD");
writer.AddDocument(doc);
field.SetStringValue("\uFFFD\uD866\uDF05");
writer.AddDocument(doc);
field.SetStringValue("\uFFFD\uFFFD");
writer.AddDocument(doc);
reader = writer.GetReader();
searcher = NewSearcher(reader);
writer.Dispose();
}
[TearDown]
public override void TearDown()
{
reader.Dispose();
directory.Dispose();
base.TearDown();
}
private Term NewTerm(string value)
{
return new Term(FN, value);
}
private int AutomatonQueryNrHits(AutomatonQuery query)
{
return searcher.Search(query, 5).TotalHits;
}
private void AssertAutomatonHits(int expected, Automaton automaton)
{
AutomatonQuery query = new AutomatonQuery(NewTerm("bogus"), automaton);
query.MultiTermRewriteMethod = (MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
Assert.AreEqual(expected, AutomatonQueryNrHits(query));
query.MultiTermRewriteMethod = (MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
Assert.AreEqual(expected, AutomatonQueryNrHits(query));
query.MultiTermRewriteMethod = (MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
Assert.AreEqual(expected, AutomatonQueryNrHits(query));
query.MultiTermRewriteMethod = (MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
Assert.AreEqual(expected, AutomatonQueryNrHits(query));
}
/// <summary>
/// Test that AutomatonQuery interacts with lucene's sort order correctly.
///
/// this expression matches something either starting with the arabic
/// presentation forms block, or a supplementary character.
/// </summary>
[Test]
public virtual void TestSortOrder()
{
Automaton a = (new RegExp("((\uD866\uDF05)|\uFB94).*")).ToAutomaton();
AssertAutomatonHits(2, a);
}
}
}