blob: 90a11dfe213d164722f9e7b3ad0c614075ac6bc1 [file] [log] [blame]
package org.apache.lucene.document;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.StringReader;
import org.apache.lucene.analysis.EmptyTokenizer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
/**
* Tests {@link Document} class.
*/
public class TestDocument extends LuceneTestCase {
String binaryVal = "this text will be stored as a byte array in the index";
String binaryVal2 = "this text will be also stored as a byte array in the index";
public void testBinaryField() throws Exception {
Document doc = new Document();
FieldType ft = new FieldType();
ft.setStored(true);
IndexableField stringFld = new Field("string", binaryVal, ft);
IndexableField binaryFld = new StoredField("binary", binaryVal.getBytes());
IndexableField binaryFld2 = new StoredField("binary", binaryVal2.getBytes());
doc.add(stringFld);
doc.add(binaryFld);
assertEquals(2, doc.getFields().size());
assertTrue(binaryFld.binaryValue() != null);
assertTrue(binaryFld.fieldType().stored());
assertFalse(binaryFld.fieldType().indexed());
assertFalse(binaryFld.fieldType().tokenized());
String binaryTest = doc.getBinaryValue("binary").utf8ToString();
assertTrue(binaryTest.equals(binaryVal));
String stringTest = doc.get("string");
assertTrue(binaryTest.equals(stringTest));
doc.add(binaryFld2);
assertEquals(3, doc.getFields().size());
BytesRef[] binaryTests = doc.getBinaryValues("binary");
assertEquals(2, binaryTests.length);
binaryTest = binaryTests[0].utf8ToString();
String binaryTest2 = binaryTests[1].utf8ToString();
assertFalse(binaryTest.equals(binaryTest2));
assertTrue(binaryTest.equals(binaryVal));
assertTrue(binaryTest2.equals(binaryVal2));
doc.removeField("string");
assertEquals(2, doc.getFields().size());
doc.removeFields("binary");
assertEquals(0, doc.getFields().size());
}
/**
* Tests {@link Document#removeField(String)} method for a brand new Document
* that has not been indexed yet.
*
* @throws Exception on error
*/
public void testRemoveForNewDocument() throws Exception {
Document doc = makeDocumentWithFields();
assertEquals(8, doc.getFields().size());
doc.removeFields("keyword");
assertEquals(6, doc.getFields().size());
doc.removeFields("doesnotexists"); // removing non-existing fields is
// siltenlty ignored
doc.removeFields("keyword"); // removing a field more than once
assertEquals(6, doc.getFields().size());
doc.removeField("text");
assertEquals(5, doc.getFields().size());
doc.removeField("text");
assertEquals(4, doc.getFields().size());
doc.removeField("text");
assertEquals(4, doc.getFields().size());
doc.removeField("doesnotexists"); // removing non-existing fields is
// siltenlty ignored
assertEquals(4, doc.getFields().size());
doc.removeFields("unindexed");
assertEquals(2, doc.getFields().size());
doc.removeFields("unstored");
assertEquals(0, doc.getFields().size());
doc.removeFields("doesnotexists"); // removing non-existing fields is
// siltenlty ignored
assertEquals(0, doc.getFields().size());
}
public void testConstructorExceptions() {
FieldType ft = new FieldType();
ft.setStored(true);
new Field("name", "value", ft); // okay
new StringField("name", "value"); // okay
try {
new Field("name", "value", new FieldType());
fail();
} catch (IllegalArgumentException e) {
// expected exception
}
new Field("name", "value", ft); // okay
try {
FieldType ft2 = new FieldType();
ft2.setStored(true);
ft2.setStoreTermVectors(true);
new Field("name", "value", ft2);
fail();
} catch (IllegalArgumentException e) {
// expected exception
}
}
/**
* Tests {@link Document#getValues(String)} method for a brand new Document
* that has not been indexed yet.
*
* @throws Exception on error
*/
public void testGetValuesForNewDocument() throws Exception {
doAssert(makeDocumentWithFields(), false);
}
/**
* Tests {@link Document#getValues(String)} method for a Document retrieved
* from an index.
*
* @throws Exception on error
*/
public void testGetValuesForIndexedDocument() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
writer.addDocument(makeDocumentWithFields());
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
// search for something that does exists
Query query = new TermQuery(new Term("keyword", "test1"));
// ensure that queries return expected results without DateFilter first
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals(1, hits.length);
doAssert(searcher.doc(hits[0].doc), true);
writer.close();
reader.close();
dir.close();
}
public void testGetValues() {
Document doc = makeDocumentWithFields();
assertEquals(new String[] {"test1", "test2"},
doc.getValues("keyword"));
assertEquals(new String[] {"test1", "test2"},
doc.getValues("text"));
assertEquals(new String[] {"test1", "test2"},
doc.getValues("unindexed"));
assertEquals(new String[0],
doc.getValues("nope"));
}
private Document makeDocumentWithFields() {
Document doc = new Document();
FieldType stored = new FieldType();
stored.setStored(true);
doc.add(new Field("keyword", "test1", StringField.TYPE_STORED));
doc.add(new Field("keyword", "test2", StringField.TYPE_STORED));
doc.add(new Field("text", "test1", TextField.TYPE_STORED));
doc.add(new Field("text", "test2", TextField.TYPE_STORED));
doc.add(new Field("unindexed", "test1", stored));
doc.add(new Field("unindexed", "test2", stored));
doc
.add(new TextField("unstored", "test1"));
doc
.add(new TextField("unstored", "test2"));
return doc;
}
private void doAssert(Document doc, boolean fromIndex) {
IndexableField[] keywordFieldValues = doc.getFields("keyword");
IndexableField[] textFieldValues = doc.getFields("text");
IndexableField[] unindexedFieldValues = doc.getFields("unindexed");
IndexableField[] unstoredFieldValues = doc.getFields("unstored");
assertTrue(keywordFieldValues.length == 2);
assertTrue(textFieldValues.length == 2);
assertTrue(unindexedFieldValues.length == 2);
// this test cannot work for documents retrieved from the index
// since unstored fields will obviously not be returned
if (!fromIndex) {
assertTrue(unstoredFieldValues.length == 2);
}
assertTrue(keywordFieldValues[0].stringValue().equals("test1"));
assertTrue(keywordFieldValues[1].stringValue().equals("test2"));
assertTrue(textFieldValues[0].stringValue().equals("test1"));
assertTrue(textFieldValues[1].stringValue().equals("test2"));
assertTrue(unindexedFieldValues[0].stringValue().equals("test1"));
assertTrue(unindexedFieldValues[1].stringValue().equals("test2"));
// this test cannot work for documents retrieved from the index
// since unstored fields will obviously not be returned
if (!fromIndex) {
assertTrue(unstoredFieldValues[0].stringValue().equals("test1"));
assertTrue(unstoredFieldValues[1].stringValue().equals("test2"));
}
}
public void testFieldSetValue() throws Exception {
Field field = new Field("id", "id1", StringField.TYPE_STORED);
Document doc = new Document();
doc.add(field);
doc.add(new Field("keyword", "test", StringField.TYPE_STORED));
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
writer.addDocument(doc);
field.setStringValue("id2");
writer.addDocument(doc);
field.setStringValue("id3");
writer.addDocument(doc);
IndexReader reader = writer.getReader();
IndexSearcher searcher = newSearcher(reader);
Query query = new TermQuery(new Term("keyword", "test"));
// ensure that queries return expected results without DateFilter first
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
assertEquals(3, hits.length);
int result = 0;
for (int i = 0; i < 3; i++) {
Document doc2 = searcher.doc(hits[i].doc);
Field f = (Field) doc2.getField("id");
if (f.stringValue().equals("id1")) result |= 1;
else if (f.stringValue().equals("id2")) result |= 2;
else if (f.stringValue().equals("id3")) result |= 4;
else fail("unexpected id field");
}
writer.close();
reader.close();
dir.close();
assertEquals("did not see all IDs", 7, result);
}
// LUCENE-3616
public void testInvalidFields() {
try {
new Field("foo", new EmptyTokenizer(new StringReader("")), StringField.TYPE_STORED);
fail("did not hit expected exc");
} catch (IllegalArgumentException iae) {
// expected
}
}
// LUCENE-3682
public void testTransitionAPI() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random, dir);
Document doc = new Document();
doc.add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO));
doc.add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED));
doc.add(new Field("tokenized_reader", new StringReader("abc xyz")));
doc.add(new Field("tokenized_tokenstream", w.w.getAnalyzer().tokenStream("tokenized_tokenstream", new StringReader("abc xyz"))));
doc.add(new Field("binary", new byte[10]));
doc.add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
doc.add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
doc.add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
doc.add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
doc = r.document(0);
// 4 stored fields
assertEquals(4, doc.getFields().size());
assertEquals("abc", doc.get("stored"));
assertEquals("abc xyz", doc.get("stored_indexed"));
assertEquals("abc xyz", doc.get("stored_tokenized"));
final BytesRef br = doc.getBinaryValue("binary");
assertNotNull(br);
assertEquals(10, br.length);
IndexSearcher s = new IndexSearcher(r);
assertEquals(1, s.search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).totalHits);
assertEquals(1, s.search(new TermQuery(new Term("stored_tokenized", "abc")), 1).totalHits);
assertEquals(1, s.search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).totalHits);
assertEquals(1, s.search(new TermQuery(new Term("indexed", "abc xyz")), 1).totalHits);
assertEquals(1, s.search(new TermQuery(new Term("tokenized", "abc")), 1).totalHits);
assertEquals(1, s.search(new TermQuery(new Term("tokenized", "xyz")), 1).totalHits);
assertEquals(1, s.search(new TermQuery(new Term("tokenized_reader", "abc")), 1).totalHits);
assertEquals(1, s.search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).totalHits);
assertEquals(1, s.search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).totalHits);
assertEquals(1, s.search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).totalHits);
for(String field : new String[] {"tv", "tv_pos", "tv_off", "tv_pos_off"}) {
Fields tvFields = r.getTermVectors(0);
Terms tvs = tvFields.terms(field);
assertNotNull(tvs);
assertEquals(2, tvs.getUniqueTermCount());
TermsEnum tvsEnum = tvs.iterator(null);
assertEquals(new BytesRef("abc"), tvsEnum.next());
final DocsAndPositionsEnum dpEnum = tvsEnum.docsAndPositions(null, null, false);
if (field.equals("tv")) {
assertNull(dpEnum);
} else {
assertNotNull(dpEnum);
}
assertEquals(new BytesRef("xyz"), tvsEnum.next());
assertNull(tvsEnum.next());
}
r.close();
dir.close();
}
public void testBoost() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random));
iwc.setMergePolicy(newLogMergePolicy());
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
doc.add(new Field("field1", "sometext", StringField.TYPE_STORED));
doc.add(new TextField("field2", "sometext"));
doc.add(new StringField("foo", "bar"));
iw.addDocument(doc); // add an 'ok' document
try {
doc = new Document();
// try to boost with norms omitted
StringField field = new StringField("foo", "baz");
field.setBoost(5.0f);
doc.add(field);
iw.addDocument(doc);
fail("didn't get any exception, boost silently discarded");
} catch (UnsupportedOperationException expected) {
// expected
}
DirectoryReader ir = DirectoryReader.open(iw, false);
assertEquals(1, ir.numDocs());
assertEquals("sometext", ir.document(0).get("field1"));
ir.close();
iw.close();
dir.close();
}
}