blob: 5d85e854bb04ed85b59e9f6fbd9c2673041944fd [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.luke.models.documents;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.store.Directory;
import org.junit.Test;
public class TermVectorsAdapterTest extends DocumentsTestBase {
@Override
protected void createIndex() throws IOException {
indexDir = createTempDir("testIndex");
Directory dir = newFSDirectory(indexDir);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, new StandardAnalyzer());
FieldType textType = new FieldType();
textType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
textType.setTokenized(true);
textType.setStoreTermVectors(true);
FieldType textType_pos = new FieldType();
textType_pos.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
textType_pos.setTokenized(true);
textType_pos.setStoreTermVectors(true);
textType_pos.setStoreTermVectorPositions(true);
FieldType textType_pos_offset = new FieldType();
textType_pos_offset.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
textType_pos_offset.setTokenized(true);
textType_pos_offset.setStoreTermVectors(true);
textType_pos_offset.setStoreTermVectorPositions(true);
textType_pos_offset.setStoreTermVectorOffsets(true);
String text = "It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife.";
Document doc = new Document();
doc.add(newField("text1", text, textType));
doc.add(newField("text2", text, textType_pos));
doc.add(newField("text3", text, textType_pos_offset));
writer.addDocument(doc);
writer.commit();
writer.close();
dir.close();
}
@Test
public void testGetTermVector() throws Exception {
TermVectorsAdapter adapterImpl = new TermVectorsAdapter(reader);
List<TermVectorEntry> tvEntries = adapterImpl.getTermVector(0, "text1");
assertEquals(18, tvEntries.size());
assertEquals("a", tvEntries.get(0).getTermText());
assertEquals(4, tvEntries.get(0).getFreq());
assertEquals("acknowledged", tvEntries.get(1).getTermText());
assertEquals(1, tvEntries.get(1).getFreq());
assertEquals("be", tvEntries.get(2).getTermText());
assertEquals(1, tvEntries.get(2).getFreq());
assertEquals("fortune", tvEntries.get(3).getTermText());
assertEquals(1, tvEntries.get(3).getFreq());
assertEquals("good", tvEntries.get(4).getTermText());
assertEquals(1, tvEntries.get(4).getFreq());
assertEquals("in", tvEntries.get(5).getTermText());
assertEquals(2, tvEntries.get(5).getFreq());
assertEquals("is", tvEntries.get(6).getTermText());
assertEquals(1, tvEntries.get(6).getFreq());
assertEquals("it", tvEntries.get(7).getTermText());
assertEquals(1, tvEntries.get(7).getFreq());
assertEquals("man", tvEntries.get(8).getTermText());
assertEquals(1, tvEntries.get(8).getFreq());
assertEquals("must", tvEntries.get(9).getTermText());
assertEquals(1, tvEntries.get(9).getFreq());
assertEquals("of", tvEntries.get(10).getTermText());
assertEquals(1, tvEntries.get(2).getFreq());
assertEquals("possession", tvEntries.get(11).getTermText());
assertEquals(1, tvEntries.get(11).getFreq());
assertEquals("single", tvEntries.get(12).getTermText());
assertEquals(1, tvEntries.get(12).getFreq());
assertEquals("that", tvEntries.get(13).getTermText());
assertEquals(1, tvEntries.get(13).getFreq());
assertEquals("truth", tvEntries.get(14).getTermText());
assertEquals(1, tvEntries.get(14).getFreq());
assertEquals("universally", tvEntries.get(15).getTermText());
assertEquals(1, tvEntries.get(15).getFreq());
assertEquals("want", tvEntries.get(16).getTermText());
assertEquals(1, tvEntries.get(16).getFreq());
assertEquals("wife", tvEntries.get(17).getTermText());
assertEquals(1, tvEntries.get(17).getFreq());
}
@Test
public void testGetTermVector_with_positions() throws Exception {
TermVectorsAdapter adapterImpl = new TermVectorsAdapter(reader);
List<TermVectorEntry> tvEntries = adapterImpl.getTermVector(0, "text2");
assertEquals(18, tvEntries.size());
assertEquals("acknowledged", tvEntries.get(1).getTermText());
assertEquals(1, tvEntries.get(1).getFreq());
assertEquals(5, tvEntries.get(1).getPositions().get(0).getPosition());
assertFalse(tvEntries.get(1).getPositions().get(0).getStartOffset().isPresent());
assertFalse(tvEntries.get(1).getPositions().get(0).getEndOffset().isPresent());
}
@Test
public void testGetTermVector_with_positions_offsets() throws Exception {
TermVectorsAdapter adapterImpl = new TermVectorsAdapter(reader);
List<TermVectorEntry> tvEntries = adapterImpl.getTermVector(0, "text3");
assertEquals(18, tvEntries.size());
assertEquals("acknowledged", tvEntries.get(1).getTermText());
assertEquals(1, tvEntries.get(1).getFreq());
assertEquals(5, tvEntries.get(1).getPositions().get(0).getPosition());
assertEquals(26, tvEntries.get(1).getPositions().get(0).getStartOffset().orElse(-1));
assertEquals(38, tvEntries.get(1).getPositions().get(0).getEndOffset().orElse(-1));
}
@Test
public void testGetTermVectors_notAvailable() throws Exception {
TermVectorsAdapter adapterImpl = new TermVectorsAdapter(reader);
assertEquals(0, adapterImpl.getTermVector(0, "title").size());
}
}