blob: ff2b2ab3bd2a69527e313483d2470f52d12d75b5 [file] [log] [blame]
using J2N.Text;
using NUnit.Framework;
using System;
using System.IO;
using System.Text;
namespace Lucene.Net.Documents
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
using BytesRef = Lucene.Net.Util.BytesRef;
using Directory = Lucene.Net.Store.Directory;
using DirectoryReader = Lucene.Net.Index.DirectoryReader;
using DocsAndPositionsEnum = Lucene.Net.Index.DocsAndPositionsEnum;
using Fields = Lucene.Net.Index.Fields;
using IIndexableField = Lucene.Net.Index.IIndexableField;
using IndexReader = Lucene.Net.Index.IndexReader;
using IndexSearcher = Lucene.Net.Search.IndexSearcher;
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
using MockTokenizer = Lucene.Net.Analysis.MockTokenizer;
using PhraseQuery = Lucene.Net.Search.PhraseQuery;
using Query = Lucene.Net.Search.Query;
using RandomIndexWriter = Lucene.Net.Index.RandomIndexWriter;
using ScoreDoc = Lucene.Net.Search.ScoreDoc;
using Term = Lucene.Net.Index.Term;
using TermQuery = Lucene.Net.Search.TermQuery;
using Terms = Lucene.Net.Index.Terms;
using TermsEnum = Lucene.Net.Index.TermsEnum;
/// <summary>
/// Tests <seealso cref="Document"/> class.
/// </summary>
[TestFixture]
public class TestDocument : LuceneTestCase
{
internal string BinaryVal = "this text will be stored as a byte array in the index";
internal string BinaryVal2 = "this text will be also stored as a byte array in the index";
[Test]
public virtual void TestBinaryField()
{
Documents.Document doc = new Documents.Document();
FieldType ft = new FieldType();
ft.IsStored = true;
IIndexableField stringFld = new Field("string", BinaryVal, ft);
IIndexableField binaryFld = new StoredField("binary", BinaryVal.GetBytes(Encoding.UTF8));
IIndexableField binaryFld2 = new StoredField("binary", BinaryVal2.GetBytes(Encoding.UTF8));
doc.Add(stringFld);
doc.Add(binaryFld);
Assert.AreEqual(2, doc.Fields.Count);
Assert.IsTrue(binaryFld.GetBinaryValue() != null);
Assert.IsTrue(binaryFld.IndexableFieldType.IsStored);
Assert.IsFalse(binaryFld.IndexableFieldType.IsIndexed);
string binaryTest = doc.GetBinaryValue("binary").Utf8ToString();
Assert.IsTrue(binaryTest.Equals(BinaryVal, StringComparison.Ordinal));
string stringTest = doc.Get("string");
Assert.IsTrue(binaryTest.Equals(stringTest, StringComparison.Ordinal));
doc.Add(binaryFld2);
Assert.AreEqual(3, doc.Fields.Count);
BytesRef[] binaryTests = doc.GetBinaryValues("binary");
Assert.AreEqual(2, binaryTests.Length);
binaryTest = binaryTests[0].Utf8ToString();
string binaryTest2 = binaryTests[1].Utf8ToString();
Assert.IsFalse(binaryTest.Equals(binaryTest2, StringComparison.Ordinal));
Assert.IsTrue(binaryTest.Equals(BinaryVal, StringComparison.Ordinal));
Assert.IsTrue(binaryTest2.Equals(BinaryVal2, StringComparison.Ordinal));
doc.RemoveField("string");
Assert.AreEqual(2, doc.Fields.Count);
doc.RemoveFields("binary");
Assert.AreEqual(0, doc.Fields.Count);
}
/// <summary>
/// Tests <seealso cref="Document#removeField(String)"/> method for a brand new Document
/// that has not been indexed yet.
/// </summary>
/// <exception cref="Exception"> on error </exception>
[Test]
public virtual void TestRemoveForNewDocument()
{
Documents.Document doc = MakeDocumentWithFields();
Assert.AreEqual(10, doc.Fields.Count);
doc.RemoveFields("keyword");
Assert.AreEqual(8, doc.Fields.Count);
doc.RemoveFields("doesnotexists"); // removing non-existing fields is
// siltenlty ignored
doc.RemoveFields("keyword"); // removing a field more than once
Assert.AreEqual(8, doc.Fields.Count);
doc.RemoveFields("text");
Assert.AreEqual(6, doc.Fields.Count);
doc.RemoveFields("text");
Assert.AreEqual(6, doc.Fields.Count);
doc.RemoveFields("text");
Assert.AreEqual(6, doc.Fields.Count);
doc.RemoveFields("doesnotexists"); // removing non-existing fields is
// siltenlty ignored
Assert.AreEqual(6, doc.Fields.Count);
doc.RemoveFields("unindexed");
Assert.AreEqual(4, doc.Fields.Count);
doc.RemoveFields("unstored");
Assert.AreEqual(2, doc.Fields.Count);
doc.RemoveFields("doesnotexists"); // removing non-existing fields is
// siltenlty ignored
Assert.AreEqual(2, doc.Fields.Count);
doc.RemoveFields("indexed_not_tokenized");
Assert.AreEqual(0, doc.Fields.Count);
}
[Test]
public virtual void TestConstructorExceptions()
{
FieldType ft = new FieldType();
ft.IsStored = true;
new Field("name", "value", ft); // okay
new StringField("name", "value", Field.Store.NO); // okay
try
{
new Field("name", "value", new FieldType());
Assert.Fail();
}
#pragma warning disable 168
catch (System.ArgumentException e)
#pragma warning restore 168
{
// expected exception
}
new Field("name", "value", ft); // okay
try
{
FieldType ft2 = new FieldType();
ft2.IsStored = true;
ft2.StoreTermVectors = true;
new Field("name", "value", ft2);
Assert.Fail();
}
#pragma warning disable 168
catch (System.ArgumentException e)
#pragma warning restore 168
{
// expected exception
}
}
/// <summary>
/// Tests <seealso cref="Document#getValues(String)"/> method for a brand new Document
/// that has not been indexed yet.
/// </summary>
/// <exception cref="Exception"> on error </exception>
[Test]
public virtual void TestGetValuesForNewDocument()
{
DoAssert(MakeDocumentWithFields(), false);
}
/// <summary>
/// Tests <seealso cref="Document#getValues(String)"/> method for a Document retrieved
/// from an index.
/// </summary>
/// <exception cref="Exception"> on error </exception>
[Test]
public virtual void TestGetValuesForIndexedDocument()
{
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
writer.AddDocument(MakeDocumentWithFields());
IndexReader reader = writer.GetReader();
IndexSearcher searcher = NewSearcher(reader);
// search for something that does exists
Query query = new TermQuery(new Term("keyword", "test1"));
// ensure that queries return expected results without DateFilter first
ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
DoAssert(searcher.Doc(hits[0].Doc), true);
writer.Dispose();
reader.Dispose();
dir.Dispose();
}
[Test]
public virtual void TestGetValues()
{
Documents.Document doc = MakeDocumentWithFields();
Assert.AreEqual(new string[] { "test1", "test2" }, doc.GetValues("keyword"));
Assert.AreEqual(new string[] { "test1", "test2" }, doc.GetValues("text"));
Assert.AreEqual(new string[] { "test1", "test2" }, doc.GetValues("unindexed"));
Assert.AreEqual(new string[0], doc.GetValues("nope"));
}
[Test]
public virtual void TestPositionIncrementMultiFields()
{
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
writer.AddDocument(MakeDocumentWithFields());
IndexReader reader = writer.GetReader();
IndexSearcher searcher = NewSearcher(reader);
PhraseQuery query = new PhraseQuery();
query.Add(new Term("indexed_not_tokenized", "test1"));
query.Add(new Term("indexed_not_tokenized", "test2"));
ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(1, hits.Length);
DoAssert(searcher.Doc(hits[0].Doc), true);
writer.Dispose();
reader.Dispose();
dir.Dispose();
}
private Documents.Document MakeDocumentWithFields()
{
Documents.Document doc = new Documents.Document();
FieldType stored = new FieldType();
stored.IsStored = true;
FieldType indexedNotTokenized = new FieldType();
indexedNotTokenized.IsIndexed = true;
indexedNotTokenized.IsTokenized = false;
doc.Add(new StringField("keyword", "test1", Field.Store.YES));
doc.Add(new StringField("keyword", "test2", Field.Store.YES));
doc.Add(new TextField("text", "test1", Field.Store.YES));
doc.Add(new TextField("text", "test2", Field.Store.YES));
doc.Add(new Field("unindexed", "test1", stored));
doc.Add(new Field("unindexed", "test2", stored));
doc.Add(new TextField("unstored", "test1", Field.Store.NO));
doc.Add(new TextField("unstored", "test2", Field.Store.NO));
doc.Add(new Field("indexed_not_tokenized", "test1", indexedNotTokenized));
doc.Add(new Field("indexed_not_tokenized", "test2", indexedNotTokenized));
return doc;
}
private void DoAssert(Documents.Document doc, bool fromIndex)
{
IIndexableField[] keywordFieldValues = doc.GetFields("keyword");
IIndexableField[] textFieldValues = doc.GetFields("text");
IIndexableField[] unindexedFieldValues = doc.GetFields("unindexed");
IIndexableField[] unstoredFieldValues = doc.GetFields("unstored");
Assert.IsTrue(keywordFieldValues.Length == 2);
Assert.IsTrue(textFieldValues.Length == 2);
Assert.IsTrue(unindexedFieldValues.Length == 2);
// this test cannot work for documents retrieved from the index
// since unstored fields will obviously not be returned
if (!fromIndex)
{
Assert.IsTrue(unstoredFieldValues.Length == 2);
}
Assert.IsTrue(keywordFieldValues[0].GetStringValue().Equals("test1", StringComparison.Ordinal));
Assert.IsTrue(keywordFieldValues[1].GetStringValue().Equals("test2", StringComparison.Ordinal));
Assert.IsTrue(textFieldValues[0].GetStringValue().Equals("test1", StringComparison.Ordinal));
Assert.IsTrue(textFieldValues[1].GetStringValue().Equals("test2", StringComparison.Ordinal));
Assert.IsTrue(unindexedFieldValues[0].GetStringValue().Equals("test1", StringComparison.Ordinal));
Assert.IsTrue(unindexedFieldValues[1].GetStringValue().Equals("test2", StringComparison.Ordinal));
// this test cannot work for documents retrieved from the index
// since unstored fields will obviously not be returned
if (!fromIndex)
{
Assert.IsTrue(unstoredFieldValues[0].GetStringValue().Equals("test1", StringComparison.Ordinal));
Assert.IsTrue(unstoredFieldValues[1].GetStringValue().Equals("test2", StringComparison.Ordinal));
}
}
[Test]
public virtual void TestFieldSetValue()
{
Field field = new StringField("id", "id1", Field.Store.YES);
Documents.Document doc = new Documents.Document();
doc.Add(field);
doc.Add(new StringField("keyword", "test", Field.Store.YES));
Directory dir = NewDirectory();
RandomIndexWriter writer = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
writer.AddDocument(doc);
field.SetStringValue("id2");
writer.AddDocument(doc);
field.SetStringValue("id3");
writer.AddDocument(doc);
IndexReader reader = writer.GetReader();
IndexSearcher searcher = NewSearcher(reader);
Query query = new TermQuery(new Term("keyword", "test"));
// ensure that queries return expected results without DateFilter first
ScoreDoc[] hits = searcher.Search(query, null, 1000).ScoreDocs;
Assert.AreEqual(3, hits.Length);
int result = 0;
for (int i = 0; i < 3; i++)
{
Documents.Document doc2 = searcher.Doc(hits[i].Doc);
Field f = (Field)doc2.GetField("id");
if (f.GetStringValue().Equals("id1", StringComparison.Ordinal))
{
result |= 1;
}
else if (f.GetStringValue().Equals("id2", StringComparison.Ordinal))
{
result |= 2;
}
else if (f.GetStringValue().Equals("id3", StringComparison.Ordinal))
{
result |= 4;
}
else
{
Assert.Fail("unexpected id field");
}
}
writer.Dispose();
reader.Dispose();
dir.Dispose();
Assert.AreEqual(7, result, "did not see all IDs");
}
// LUCENE-3616
[Test]
public virtual void TestInvalidFields()
{
Assert.Throws<System.ArgumentException>(() => { new Field("foo", new MockTokenizer(new StreamReader(File.Open("", FileMode.Open))), StringField.TYPE_STORED); });
}
// LUCENE-3682
[Test]
public virtual void TestTransitionAPI()
{
Directory dir = NewDirectory();
RandomIndexWriter w = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
Documents.Document doc = new Documents.Document();
#pragma warning disable 612, 618
doc.Add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO));
doc.Add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.Add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));
doc.Add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.Add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED));
doc.Add(new Field("tokenized_reader", new StringReader("abc xyz")));
doc.Add(new Field("tokenized_tokenstream", w.IndexWriter.Analyzer.GetTokenStream("tokenized_tokenstream", new StringReader("abc xyz"))));
doc.Add(new Field("binary", new byte[10]));
doc.Add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
doc.Add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
doc.Add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
doc.Add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
#pragma warning restore 612, 618
w.AddDocument(doc);
IndexReader r = w.GetReader();
w.Dispose();
doc = r.Document(0);
// 4 stored fields
Assert.AreEqual(4, doc.Fields.Count);
Assert.AreEqual("abc", doc.Get("stored"));
Assert.AreEqual("abc xyz", doc.Get("stored_indexed"));
Assert.AreEqual("abc xyz", doc.Get("stored_tokenized"));
BytesRef br = doc.GetBinaryValue("binary");
Assert.IsNotNull(br);
Assert.AreEqual(10, br.Length);
IndexSearcher s = new IndexSearcher(r);
Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).TotalHits);
Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "abc")), 1).TotalHits);
Assert.AreEqual(1, s.Search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).TotalHits);
Assert.AreEqual(1, s.Search(new TermQuery(new Term("indexed", "abc xyz")), 1).TotalHits);
Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "abc")), 1).TotalHits);
Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized", "xyz")), 1).TotalHits);
Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "abc")), 1).TotalHits);
Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).TotalHits);
Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).TotalHits);
Assert.AreEqual(1, s.Search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).TotalHits);
foreach (string field in new string[] { "tv", "tv_pos", "tv_off", "tv_pos_off" })
{
Fields tvFields = r.GetTermVectors(0);
Terms tvs = tvFields.GetTerms(field);
Assert.IsNotNull(tvs);
Assert.AreEqual(2, tvs.Count);
TermsEnum tvsEnum = tvs.GetIterator(null);
Assert.AreEqual(new BytesRef("abc"), tvsEnum.Next());
DocsAndPositionsEnum dpEnum = tvsEnum.DocsAndPositions(null, null);
if (field.Equals("tv", StringComparison.Ordinal))
{
Assert.IsNull(dpEnum);
}
else
{
Assert.IsNotNull(dpEnum);
}
Assert.AreEqual(new BytesRef("xyz"), tvsEnum.Next());
Assert.IsNull(tvsEnum.Next());
}
r.Dispose();
dir.Dispose();
}
[Test]
public virtual void TestNumericFieldAsString()
{
Documents.Document doc = new Documents.Document();
doc.Add(new Int32Field("int", 5, Field.Store.YES));
Assert.AreEqual("5", doc.Get("int"));
Assert.IsNull(doc.Get("somethingElse"));
doc.Add(new Int32Field("int", 4, Field.Store.YES));
Assert.AreEqual(new string[] { "5", "4" }, doc.GetValues("int"));
Directory dir = NewDirectory();
RandomIndexWriter iw = new RandomIndexWriter(
#if FEATURE_INSTANCE_TESTDATA_INITIALIZATION
this,
#endif
Random, dir);
iw.AddDocument(doc);
DirectoryReader ir = iw.GetReader();
Documents.Document sdoc = ir.Document(0);
Assert.AreEqual("5", sdoc.Get("int"));
Assert.IsNull(sdoc.Get("somethingElse"));
Assert.AreEqual(new string[] { "5", "4" }, sdoc.GetValues("int"));
ir.Dispose();
iw.Dispose();
dir.Dispose();
}
}
}