blob: ef2c04bf635f35f3cc60e1780a6d33d92ccc7b97 [file] [log] [blame]
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*
*/
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Documents;
using Lucene.Net.QueryParsers;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.Test.Analysis;
using NUnit.Framework;
using Version = Lucene.Net.Util.Version;
namespace Lucene.Net.Index.Memory.Test
{
/**
* Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour,
* returning the same results for queries on some randomish indexes.
*/
public class MemoryIndexTest : BaseTokenStreamTestCase
{
private readonly HashSet<String> _queries = new HashSet<String>();
private Random random;
public static int ITERATIONS = 100;
[SetUp]
public override void SetUp()
{
base.SetUp();
_queries.UnionWith(ReadQueries("testqueries.txt"));
_queries.UnionWith(ReadQueries("testqueries2.txt"));
random = NewRandom();
}
/**
* read a set of queries from a resource file
*/
private IEnumerable<string> ReadQueries(String resource)
{
var queries = new HashSet<String>();
using (var fs = File.Open(resource, FileMode.Open, FileAccess.Read))
using (var reader = new StreamReader(fs, Encoding.UTF8))
{
string line;
while ((line = reader.ReadLine()) != null)
{
line = line.Trim();
if (line.Length > 0 && !line.StartsWith("#") && !line.StartsWith("//"))
{
queries.Add(line);
}
}
return queries;
}
}
/**
* runs random tests, up to ITERATIONS times.
*/
[Test]
public void TestRandomQueries()
{
for (int i = 0; i < ITERATIONS; i++)
AssertAgainstRAMDirectory();
}
/**
* Build a randomish document for both RAMDirectory and MemoryIndex,
* and run all the queries against it.
*/
public void AssertAgainstRAMDirectory()
{
var fooField = new StringBuilder();
var termField = new StringBuilder();
// add up to 250 terms to field "foo"
for (int i = 0; i < random.Next(250); i++)
{
fooField.Append(" ");
fooField.Append(RandomTerm());
}
// add up to 250 terms to field "term"
for (int i = 0; i < random.Next(250); i++)
{
termField.Append(" ");
termField.Append(RandomTerm());
}
var ramdir = new RAMDirectory();
var analyzer = RandomAnalyzer();
var writer = new IndexWriter(ramdir, analyzer,
IndexWriter.MaxFieldLength.UNLIMITED);
var doc = new Document();
var field1 = new Field("foo", fooField.ToString(), Field.Store.NO, Field.Index.ANALYZED);
var field2 = new Field("term", termField.ToString(), Field.Store.NO, Field.Index.ANALYZED);
doc.Add(field1);
doc.Add(field2);
writer.AddDocument(doc);
writer.Close();
var memory = new MemoryIndex();
memory.AddField("foo", fooField.ToString(), analyzer);
memory.AddField("term", termField.ToString(), analyzer);
AssertAllQueries(memory, ramdir, analyzer);
}
/**
* Run all queries against both the RAMDirectory and MemoryIndex, ensuring they are the same.
*/
public void AssertAllQueries(MemoryIndex memory, RAMDirectory ramdir, Analyzer analyzer)
{
var ram = new IndexSearcher(ramdir);
var mem = memory.CreateSearcher();
var qp = new QueryParser(Version.LUCENE_CURRENT, "foo", analyzer);
foreach (String query in _queries)
{
var ramDocs = ram.Search(qp.Parse(query), 1);
var memDocs = mem.Search(qp.Parse(query), 1);
Assert.AreEqual(ramDocs.TotalHits, memDocs.TotalHits);
}
}
/**
* Return a random analyzer (Simple, Stop, Standard) to analyze the terms.
*/
private Analyzer RandomAnalyzer()
{
switch (random.Next(3))
{
case 0:
return new SimpleAnalyzer();
case 1:
return new StopAnalyzer(Version.LUCENE_CURRENT);
default:
return new StandardAnalyzer(Version.LUCENE_CURRENT);
}
}
/**
* Some terms to be indexed, in addition to random words.
* These terms are commonly used in the queries.
*/
private static readonly string[] TEST_TERMS = {
"term", "Term", "tErm", "TERM",
"telm", "stop", "drop", "roll", "phrase", "a", "c", "bar",
"blar",
"gack", "weltbank", "worlbank", "hello", "on", "the", "apache"
, "Apache",
"copyright", "Copyright"
};
/**
* half of the time, returns a random term from TEST_TERMS.
* the other half of the time, returns a random unicode string.
*/
private String RandomTerm()
{
if (random.Next(2) == 1)
{
// return a random TEST_TERM
return TEST_TERMS[random.Next(TEST_TERMS.Length)];
}
else
{
// return a random unicode term
return RandomString();
}
}
/**
* Return a random unicode term, like TestStressIndexing.
*/
private String RandomString()
{
int end = random.Next(20);
if (buffer.Length < 1 + end)
{
char[] newBuffer = new char[(int) ((1 + end)*1.25)];
Array.Copy(buffer, 0, newBuffer, 0, buffer.Length);
buffer = newBuffer;
}
for (int i = 0; i < end - 1; i++)
{
int t = random.Next(6);
if (0 == t && i < end - 1)
{
// Make a surrogate pair
// High surrogate
buffer[i++] = (char) NextInt(0xd800, 0xdc00);
// Low surrogate
buffer[i] = (char) NextInt(0xdc00, 0xe000);
}
else if (t <= 1) buffer[i] = (char) random.Next(0x80);
else if (2 == t) buffer[i] = (char) NextInt(0x80, 0x800);
else if (3 == t) buffer[i] = (char) NextInt(0x800, 0xd7ff);
else if (4 == t) buffer[i] = (char) NextInt(0xe000, 0xffff);
else if (5 == t)
{
// Illegal unpaired surrogate
if (random.Next(1) == 1) buffer[i] = (char) NextInt(0xd800, 0xdc00);
else buffer[i] = (char) NextInt(0xdc00, 0xe000);
}
}
return new String(buffer, 0, end);
}
private char[] buffer = new char[20];
// start is inclusive and end is exclusive
private int NextInt(int start, int end)
{
return start + random.Next(end - start);
}
}
}