blob: a4c62bf5b8570485965ab92f020e315b28af8bc4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import static org.apache.lucene.index.PostingsEnum.ALL;
import static org.apache.lucene.index.PostingsEnum.FREQS;
import static org.apache.lucene.index.PostingsEnum.NONE;
import static org.apache.lucene.index.PostingsEnum.OFFSETS;
import static org.apache.lucene.index.PostingsEnum.PAYLOADS;
import static org.apache.lucene.index.PostingsEnum.POSITIONS;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.TermVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.TestUtil;
/**
* Base class aiming at testing {@link TermVectorsFormat term vectors formats}.
* To test a new format, all you need is to register a new {@link Codec} which
* uses it and extend this class and override {@link #getCodec()}.
* @lucene.experimental
*/
public abstract class BaseTermVectorsFormatTestCase extends BaseIndexFileFormatTestCase {
/**
* A combination of term vectors options.
*/
protected enum Options {
NONE(false, false, false),
POSITIONS(true, false, false),
OFFSETS(false, true, false),
POSITIONS_AND_OFFSETS(true, true, false),
POSITIONS_AND_PAYLOADS(true, false, true),
POSITIONS_AND_OFFSETS_AND_PAYLOADS(true, true, true);
final boolean positions, offsets, payloads;
private Options(boolean positions, boolean offsets, boolean payloads) {
this.positions = positions;
this.offsets = offsets;
this.payloads = payloads;
}
}
protected Set<Options> validOptions() {
return EnumSet.allOf(Options.class);
}
protected Options randomOptions() {
return RandomPicks.randomFrom(random(), new ArrayList<>(validOptions()));
}
protected FieldType fieldType(Options options) {
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(options.positions);
ft.setStoreTermVectorOffsets(options.offsets);
ft.setStoreTermVectorPayloads(options.payloads);
ft.freeze();
return ft;
}
@Override
protected void addRandomFields(Document doc) {
for (Options opts : validOptions()) {
FieldType ft = fieldType(opts);
final int numFields = random().nextInt(5);
for (int j = 0; j < numFields; ++j) {
doc.add(new Field("f_" + opts, TestUtil.randomSimpleString(random(), 2), ft));
}
}
}
// custom impl to test cases that are forbidden by the default OffsetAttribute impl
private static class PermissiveOffsetAttributeImpl extends AttributeImpl implements OffsetAttribute {
int start, end;
@Override
public int startOffset() {
return start;
}
@Override
public int endOffset() {
return end;
}
@Override
public void setOffset(int startOffset, int endOffset) {
// no check!
start = startOffset;
end = endOffset;
}
@Override
public void clear() {
start = end = 0;
}
@Override
public boolean equals(Object other) {
if (other == this) {
return true;
}
if (other instanceof PermissiveOffsetAttributeImpl) {
PermissiveOffsetAttributeImpl o = (PermissiveOffsetAttributeImpl) other;
return o.start == start && o.end == end;
}
return false;
}
@Override
public int hashCode() {
return start + 31 * end;
}
@Override
public void copyTo(AttributeImpl target) {
OffsetAttribute t = (OffsetAttribute) target;
t.setOffset(start, end);
}
@Override
public void reflectWith(AttributeReflector reflector) {
reflector.reflect(OffsetAttribute.class, "startOffset", start);
reflector.reflect(OffsetAttribute.class, "endOffset", end);
}
}
// TODO: use CannedTokenStream?
// TODO: pull out and make top-level-utility, separate from TermVectors
/** Produces a random TokenStream based off of provided terms. */
public static class RandomTokenStream extends TokenStream {
final String[] terms;
final BytesRef[] termBytes;
final int[] positionsIncrements;
final int[] positions;
final int[] startOffsets, endOffsets;
final BytesRef[] payloads;
final Map<String, Integer> freqs;
final Map<Integer, Set<Integer>> positionToTerms;
final Map<Integer, Set<Integer>> startOffsetToTerms;
final CharTermAttribute termAtt;
final PositionIncrementAttribute piAtt;
final OffsetAttribute oAtt;
final PayloadAttribute pAtt;
int i = 0;
public RandomTokenStream(int len, String[] sampleTerms, BytesRef[] sampleTermBytes) {
terms = new String[len];
termBytes = new BytesRef[len];
positionsIncrements = new int[len];
positions = new int[len];
startOffsets = new int[len];
endOffsets = new int[len];
payloads = new BytesRef[len];
for (int i = 0; i < len; ++i) {
final int o = random().nextInt(sampleTerms.length);
terms[i] = sampleTerms[o];
termBytes[i] = sampleTermBytes[o];
positionsIncrements[i] = TestUtil.nextInt(random(), i == 0 ? 1 : 0, 10);
if (i == 0) {
startOffsets[i] = TestUtil.nextInt(random(), 0, 1 << 16);
} else {
startOffsets[i] = startOffsets[i-1] + TestUtil.nextInt(random(), 0, rarely() ? 1 << 16 : 20);
}
endOffsets[i] = startOffsets[i] + TestUtil.nextInt(random(), 0, rarely() ? 1 << 10 : 20);
}
for (int i = 0; i < len; ++i) {
if (i == 0) {
positions[i] = positionsIncrements[i] - 1;
} else {
positions[i] = positions[i - 1] + positionsIncrements[i];
}
}
if (rarely()) {
Arrays.fill(payloads, randomPayload());
} else {
for (int i = 0; i < len; ++i) {
payloads[i] = randomPayload();
}
}
positionToTerms = new HashMap<>(len);
startOffsetToTerms = new HashMap<>(len);
for (int i = 0; i < len; ++i) {
if (!positionToTerms.containsKey(positions[i])) {
positionToTerms.put(positions[i], new HashSet<Integer>(1));
}
positionToTerms.get(positions[i]).add(i);
if (!startOffsetToTerms.containsKey(startOffsets[i])) {
startOffsetToTerms.put(startOffsets[i], new HashSet<Integer>(1));
}
startOffsetToTerms.get(startOffsets[i]).add(i);
}
freqs = new HashMap<>();
for (String term : terms) {
if (freqs.containsKey(term)) {
freqs.put(term, freqs.get(term) + 1);
} else {
freqs.put(term, 1);
}
}
addAttributeImpl(new PermissiveOffsetAttributeImpl());
termAtt = addAttribute(CharTermAttribute.class);
piAtt = addAttribute(PositionIncrementAttribute.class);
oAtt = addAttribute(OffsetAttribute.class);
pAtt = addAttribute(PayloadAttribute.class);
}
protected BytesRef randomPayload() {
final int len = random().nextInt(5);
if (len == 0) {
return null;
}
final BytesRef payload = new BytesRef(len);
random().nextBytes(payload.bytes);
payload.length = len;
return newBytesRef(payload);
}
public boolean hasPayloads() {
for (BytesRef payload : payloads) {
if (payload != null && payload.length > 0) {
return true;
}
}
return false;
}
public String[] getTerms() {
return terms;
}
public BytesRef[] getTermBytes() {
return termBytes;
}
public int[] getPositionsIncrements() {
return positionsIncrements;
}
public int[] getStartOffsets() {
return startOffsets;
}
public int[] getEndOffsets() {
return endOffsets;
}
public BytesRef[] getPayloads() {
return payloads;
}
@Override
public void reset() throws IOException {
i = 0;
super.reset();
}
@Override
public final boolean incrementToken() throws IOException {
if (i < terms.length) {
clearAttributes();
termAtt.setLength(0).append(terms[i]);
piAtt.setPositionIncrement(positionsIncrements[i]);
oAtt.setOffset(startOffsets[i], endOffsets[i]);
pAtt.setPayload(payloads[i]);
++i;
return true;
} else {
return false;
}
}
}
protected class RandomDocument {
private final String[] fieldNames;
private final FieldType[] fieldTypes;
private final RandomTokenStream[] tokenStreams;
protected RandomDocument(int fieldCount, int maxTermCount, Options options, String[] fieldNames, String[] sampleTerms, BytesRef[] sampleTermBytes) {
if (fieldCount > fieldNames.length) {
throw new IllegalArgumentException();
}
this.fieldNames = new String[fieldCount];
fieldTypes = new FieldType[fieldCount];
tokenStreams = new RandomTokenStream[fieldCount];
Arrays.fill(fieldTypes, fieldType(options));
final Set<String> usedFileNames = new HashSet<>();
for (int i = 0; i < fieldCount; ++i) {
do {
this.fieldNames[i] = RandomPicks.randomFrom(random(), fieldNames);
} while (usedFileNames.contains(this.fieldNames[i]));
usedFileNames.add(this.fieldNames[i]);
tokenStreams[i] = new RandomTokenStream(TestUtil.nextInt(random(), 1, maxTermCount), sampleTerms, sampleTermBytes);
}
}
public Document toDocument() {
final Document doc = new Document();
for (int i = 0; i < fieldNames.length; ++i) {
doc.add(new Field(fieldNames[i], tokenStreams[i], fieldTypes[i]));
}
return doc;
}
}
protected class RandomDocumentFactory {
private final String[] fieldNames;
private final String[] terms;
private final BytesRef[] termBytes;
protected RandomDocumentFactory(int distinctFieldNames, int disctinctTerms) {
final Set<String> fieldNames = new HashSet<>();
while (fieldNames.size() < distinctFieldNames) {
fieldNames.add(TestUtil.randomSimpleString(random()));
fieldNames.remove("id");
}
this.fieldNames = fieldNames.toArray(new String[0]);
terms = new String[disctinctTerms];
termBytes = new BytesRef[disctinctTerms];
for (int i = 0; i < disctinctTerms; ++i) {
terms[i] = TestUtil.randomRealisticUnicodeString(random());
termBytes[i] = newBytesRef(terms[i]);
}
}
public RandomDocument newDocument(int fieldCount, int maxTermCount, Options options) {
return new RandomDocument(fieldCount, maxTermCount, options, fieldNames, terms, termBytes);
}
}
protected void assertEquals(RandomDocument doc, Fields fields) throws IOException {
// compare field names
assertEquals(doc == null, fields == null);
assertEquals(doc.fieldNames.length, fields.size());
final Set<String> fields1 = new HashSet<>();
final Set<String> fields2 = new HashSet<>();
for (int i = 0; i < doc.fieldNames.length; ++i) {
fields1.add(doc.fieldNames[i]);
}
for (String field : fields) {
fields2.add(field);
}
assertEquals(fields1, fields2);
for (int i = 0; i < doc.fieldNames.length; ++i) {
assertEquals(doc.tokenStreams[i], doc.fieldTypes[i], fields.terms(doc.fieldNames[i]));
}
}
protected static boolean equals(Object o1, Object o2) {
if (o1 == null) {
return o2 == null;
} else {
return o1.equals(o2);
}
}
// to test reuse
private final ThreadLocal<PostingsEnum> docsEnum = new ThreadLocal<>();
private final ThreadLocal<PostingsEnum> docsAndPositionsEnum = new ThreadLocal<>();
protected void assertEquals(RandomTokenStream tk, FieldType ft, Terms terms) throws IOException {
assertEquals(1, terms.getDocCount());
final int termCount = new HashSet<>(Arrays.asList(tk.terms)).size();
assertEquals(termCount, terms.size());
assertEquals(termCount, terms.getSumDocFreq());
assertEquals(ft.storeTermVectorPositions(), terms.hasPositions());
assertEquals(ft.storeTermVectorOffsets(), terms.hasOffsets());
assertEquals(ft.storeTermVectorPayloads() && tk.hasPayloads(), terms.hasPayloads());
final Set<BytesRef> uniqueTerms = new HashSet<>();
for (String term : tk.freqs.keySet()) {
uniqueTerms.add(new BytesRef(term));
}
final BytesRef[] sortedTerms = uniqueTerms.toArray(new BytesRef[0]);
Arrays.sort(sortedTerms);
final TermsEnum termsEnum = terms.iterator();
for (int i = 0; i < sortedTerms.length; ++i) {
final BytesRef nextTerm = termsEnum.next();
assertEquals(sortedTerms[i], nextTerm);
assertEquals(sortedTerms[i], termsEnum.term());
assertEquals(1, termsEnum.docFreq());
PostingsEnum postingsEnum = termsEnum.postings(null);
postingsEnum = termsEnum.postings(random().nextBoolean() ? null : postingsEnum);
assertNotNull(postingsEnum);
assertEquals(0, postingsEnum.nextDoc());
assertEquals(0, postingsEnum.docID());
assertEquals(tk.freqs.get(termsEnum.term().utf8ToString()), (Integer) postingsEnum.freq());
assertEquals(PostingsEnum.NO_MORE_DOCS, postingsEnum.nextDoc());
this.docsEnum.set(postingsEnum);
PostingsEnum docsAndPositionsEnum = termsEnum.postings(null);
docsAndPositionsEnum = termsEnum.postings(random().nextBoolean() ? null : docsAndPositionsEnum, PostingsEnum.POSITIONS);
if (terms.hasPositions() || terms.hasOffsets()) {
assertEquals(0, docsAndPositionsEnum.nextDoc());
final int freq = docsAndPositionsEnum.freq();
assertEquals(tk.freqs.get(termsEnum.term().utf8ToString()), (Integer) freq);
if (docsAndPositionsEnum != null) {
for (int k = 0; k < freq; ++k) {
final int position = docsAndPositionsEnum.nextPosition();
final Set<Integer> indexes;
if (terms.hasPositions()) {
indexes = tk.positionToTerms.get(position);
assertNotNull(indexes);
} else {
indexes = tk.startOffsetToTerms.get(docsAndPositionsEnum.startOffset());
assertNotNull(indexes);
}
if (terms.hasPositions()) {
boolean foundPosition = false;
for (int index : indexes) {
if (tk.termBytes[index].equals(termsEnum.term()) && tk.positions[index] == position) {
foundPosition = true;
break;
}
}
assertTrue(foundPosition);
}
if (terms.hasOffsets()) {
boolean foundOffset = false;
for (int index : indexes) {
if (tk.termBytes[index].equals(termsEnum.term()) && tk.startOffsets[index] == docsAndPositionsEnum.startOffset() && tk.endOffsets[index] == docsAndPositionsEnum.endOffset()) {
foundOffset = true;
break;
}
}
assertTrue(foundOffset);
}
if (terms.hasPayloads()) {
boolean foundPayload = false;
for (int index : indexes) {
if (tk.termBytes[index].equals(termsEnum.term()) && equals(tk.payloads[index], docsAndPositionsEnum.getPayload())) {
foundPayload = true;
break;
}
}
assertTrue(foundPayload);
}
}
try {
docsAndPositionsEnum.nextPosition();
fail();
} catch (Exception | AssertionError e) {
// ok
}
}
assertEquals(PostingsEnum.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
}
this.docsEnum.set(docsAndPositionsEnum);
}
assertNull(termsEnum.next());
for (int i = 0; i < 5; ++i) {
if (random().nextBoolean()) {
assertTrue(termsEnum.seekExact(RandomPicks.randomFrom(random(), tk.termBytes)));
} else {
assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(RandomPicks.randomFrom(random(), tk.termBytes)));
}
}
}
protected Document addId(Document doc, String id) {
doc.add(new StringField("id", id, Store.NO));
return doc;
}
protected int docID(IndexReader reader, String id) throws IOException {
return new IndexSearcher(reader).search(new TermQuery(new Term("id", id)), 1).scoreDocs[0].doc;
}
// only one doc with vectors
public void testRareVectors() throws IOException {
final RandomDocumentFactory docFactory = new RandomDocumentFactory(10, 20);
for (Options options : validOptions()) {
final int numDocs = atLeast(200);
final int docWithVectors = random().nextInt(numDocs);
final Document emptyDoc = new Document();
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
final RandomDocument doc = docFactory.newDocument(TestUtil.nextInt(random(), 1, 3), 20, options);
for (int i = 0; i < numDocs; ++i) {
if (i == docWithVectors) {
writer.addDocument(addId(doc.toDocument(), "42"));
} else {
writer.addDocument(emptyDoc);
}
}
final IndexReader reader = writer.getReader();
final int docWithVectorsID = docID(reader, "42");
for (int i = 0; i < 10; ++i) {
final int docID = random().nextInt(numDocs);
final Fields fields = reader.getTermVectors(docID);
if (docID == docWithVectorsID) {
assertEquals(doc, fields);
} else {
assertNull(fields);
}
}
final Fields fields = reader.getTermVectors(docWithVectorsID);
assertEquals(doc, fields);
reader.close();
writer.close();
dir.close();
}
}
public void testHighFreqs() throws IOException {
final RandomDocumentFactory docFactory = new RandomDocumentFactory(3, 5);
for (Options options : validOptions()) {
if (options == Options.NONE) {
continue;
}
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
final RandomDocument doc = docFactory.newDocument(TestUtil.nextInt(random(), 1, 2), atLeast(2000), options);
writer.addDocument(doc.toDocument());
final IndexReader reader = writer.getReader();
assertEquals(doc, reader.getTermVectors(0));
reader.close();
writer.close();
dir.close();
}
}
@Slow
public void testLotsOfFields() throws IOException {
int fieldCount = atLeast(100);
final RandomDocumentFactory docFactory = new RandomDocumentFactory(fieldCount, 10);
for (Options options : validOptions()) {
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
final RandomDocument doc = docFactory.newDocument(TestUtil.nextInt(random(), 20, fieldCount), 5, options);
writer.addDocument(doc.toDocument());
final IndexReader reader = writer.getReader();
assertEquals(doc, reader.getTermVectors(0));
reader.close();
writer.close();
dir.close();
}
}
// different options for the same field
public void testMixedOptions() throws IOException {
final int numFields = TestUtil.nextInt(random(), 1, 3);
final RandomDocumentFactory docFactory = new RandomDocumentFactory(numFields, 10);
for (Options options1 : validOptions()) {
for (Options options2 : validOptions()) {
if (options1 == options2) {
continue;
}
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
final RandomDocument doc1 = docFactory.newDocument(numFields, 20, options1);
final RandomDocument doc2 = docFactory.newDocument(numFields, 20, options2);
writer.addDocument(addId(doc1.toDocument(), "1"));
writer.addDocument(addId(doc2.toDocument(), "2"));
final IndexReader reader = writer.getReader();
final int doc1ID = docID(reader, "1");
assertEquals(doc1, reader.getTermVectors(doc1ID));
final int doc2ID = docID(reader, "2");
assertEquals(doc2, reader.getTermVectors(doc2ID));
reader.close();
writer.close();
dir.close();
}
}
}
public void testRandom() throws IOException {
final RandomDocumentFactory docFactory = new RandomDocumentFactory(5, 20);
final int numDocs = atLeast(50);
final RandomDocument[] docs = new RandomDocument[numDocs];
for (int i = 0; i < numDocs; ++i) {
docs[i] = docFactory.newDocument(TestUtil.nextInt(random(), 1, 3), TestUtil.nextInt(random(), 10, 50), randomOptions());
}
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
for (int i = 0; i < numDocs; ++i) {
writer.addDocument(addId(docs[i].toDocument(), ""+i));
}
final IndexReader reader = writer.getReader();
for (int i = 0; i < numDocs; ++i) {
final int docID = docID(reader, ""+i);
assertEquals(docs[i], reader.getTermVectors(docID));
}
reader.close();
writer.close();
dir.close();
}
public void testMerge() throws IOException {
final RandomDocumentFactory docFactory = new RandomDocumentFactory(5, 20);
final int numDocs = atLeast(100);
final int numDeletes = random().nextInt(numDocs);
final Set<Integer> deletes = new HashSet<>();
while (deletes.size() < numDeletes) {
deletes.add(random().nextInt(numDocs));
}
for (Options options : validOptions()) {
final RandomDocument[] docs = new RandomDocument[numDocs];
for (int i = 0; i < numDocs; ++i) {
docs[i] = docFactory.newDocument(TestUtil.nextInt(random(), 1, 3), atLeast(10), options);
}
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
for (int i = 0; i < numDocs; ++i) {
writer.addDocument(addId(docs[i].toDocument(), ""+i));
if (rarely()) {
writer.commit();
}
}
for (int delete : deletes) {
writer.deleteDocuments(new Term("id", "" + delete));
}
// merge with deletes
writer.forceMerge(1);
final IndexReader reader = writer.getReader();
for (int i = 0; i < numDocs; ++i) {
if (!deletes.contains(i)) {
final int docID = docID(reader, ""+i);
assertEquals(docs[i], reader.getTermVectors(docID));
}
}
reader.close();
writer.close();
dir.close();
}
}
// run random tests from different threads to make sure the per-thread clones
// don't share mutable data
public void testClone() throws IOException, InterruptedException {
final RandomDocumentFactory docFactory = new RandomDocumentFactory(5, 20);
final int numDocs = atLeast(50);
for (Options options : validOptions()) {
final RandomDocument[] docs = new RandomDocument[numDocs];
for (int i = 0; i < numDocs; ++i) {
docs[i] = docFactory.newDocument(TestUtil.nextInt(random(), 1, 3), atLeast(10), options);
}
final Directory dir = newDirectory();
final RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
for (int i = 0; i < numDocs; ++i) {
writer.addDocument(addId(docs[i].toDocument(), ""+i));
}
final IndexReader reader = writer.getReader();
for (int i = 0; i < numDocs; ++i) {
final int docID = docID(reader, ""+i);
assertEquals(docs[i], reader.getTermVectors(docID));
}
final AtomicReference<Throwable> exception = new AtomicReference<>();
final Thread[] threads = new Thread[2];
for (int i = 0; i < threads.length; ++i) {
threads[i] = new Thread() {
@Override
public void run() {
try {
for (int i = 0; i < atLeast(100); ++i) {
final int idx = random().nextInt(numDocs);
final int docID = docID(reader, ""+idx);
assertEquals(docs[idx], reader.getTermVectors(docID));
}
} catch (Throwable t) {
exception.set(t);
}
}
};
}
for (Thread thread : threads) {
thread.start();
}
for (Thread thread : threads) {
thread.join();
}
reader.close();
writer.close();
dir.close();
assertNull("One thread threw an exception", exception.get());
}
}
public void testPostingsEnumFreqs() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer());
}
});
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
doc.add(new Field("foo", "bar bar", ft));
iw.addDocument(doc);
DirectoryReader reader = DirectoryReader.open(iw);
Terms terms = getOnlyLeafReader(reader).getTermVector(0, "foo");
TermsEnum termsEnum = terms.iterator();
assertNotNull(termsEnum);
assertEquals(newBytesRef("bar"), termsEnum.next());
// simple use (FREQS)
PostingsEnum postings = termsEnum.postings(null);
assertEquals(-1, postings.docID());
assertEquals(0, postings.nextDoc());
assertEquals(2, postings.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
// termsenum reuse (FREQS)
PostingsEnum postings2 = termsEnum.postings(postings);
assertNotNull(postings2);
// and it had better work
assertEquals(-1, postings2.docID());
assertEquals(0, postings2.nextDoc());
assertEquals(2, postings2.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc());
// asking for docs only: ok
PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE);
assertEquals(-1, docsOnly.docID());
assertEquals(0, docsOnly.nextDoc());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc());
// reuse that too
PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE);
assertNotNull(docsOnly2);
// and it had better work
assertEquals(-1, docsOnly2.docID());
assertEquals(0, docsOnly2.nextDoc());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc());
// asking for any flags: ok
for (int flag : new int[] { NONE, FREQS, POSITIONS, PAYLOADS, OFFSETS, ALL }) {
postings = termsEnum.postings(null, flag);
assertEquals(-1, postings.docID());
assertEquals(0, postings.nextDoc());
if (flag != NONE) {
assertEquals(2, postings.freq());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
// reuse that too
postings2 = termsEnum.postings(postings, flag);
assertNotNull(postings2);
// and it had better work
assertEquals(-1, postings2.docID());
assertEquals(0, postings2.nextDoc());
if (flag != NONE) {
assertEquals(2, postings2.freq());
}
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc());
}
iw.close();
reader.close();
dir.close();
}
public void testPostingsEnumPositions() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer());
}
});
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
doc.add(new Field("foo", "bar bar", ft));
iw.addDocument(doc);
DirectoryReader reader = DirectoryReader.open(iw);
Terms terms = getOnlyLeafReader(reader).getTermVector(0, "foo");
TermsEnum termsEnum = terms.iterator();
assertNotNull(termsEnum);
assertEquals(newBytesRef("bar"), termsEnum.next());
// simple use (FREQS)
PostingsEnum postings = termsEnum.postings(null);
assertEquals(-1, postings.docID());
assertEquals(0, postings.nextDoc());
assertEquals(2, postings.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
// termsenum reuse (FREQS)
PostingsEnum postings2 = termsEnum.postings(postings);
assertNotNull(postings2);
// and it had better work
assertEquals(-1, postings2.docID());
assertEquals(0, postings2.nextDoc());
assertEquals(2, postings2.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc());
// asking for docs only: ok
PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE);
assertEquals(-1, docsOnly.docID());
assertEquals(0, docsOnly.nextDoc());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc());
// reuse that too
PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE);
assertNotNull(docsOnly2);
// and it had better work
assertEquals(-1, docsOnly2.docID());
assertEquals(0, docsOnly2.nextDoc());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsOnly2.freq() == 1 || docsOnly2.freq() == 2);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc());
// asking for positions, ok
PostingsEnum docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.POSITIONS);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(1, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// now reuse the positions
PostingsEnum docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.POSITIONS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(1, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
// payloads, offsets, etc don't cause an error if they aren't there
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.PAYLOADS);
assertNotNull(docsAndPositionsEnum);
// but make sure they work
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(1, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// reuse
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.PAYLOADS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(1, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS);
assertNotNull(docsAndPositionsEnum);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(1, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// reuse
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(1, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.ALL);
assertNotNull(docsAndPositionsEnum);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(1, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.ALL);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(1, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
iw.close();
reader.close();
dir.close();
}
public void testPostingsEnumOffsets() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer());
}
});
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorOffsets(true);
doc.add(new Field("foo", "bar bar", ft));
iw.addDocument(doc);
DirectoryReader reader = DirectoryReader.open(iw);
Terms terms = getOnlyLeafReader(reader).getTermVector(0, "foo");
TermsEnum termsEnum = terms.iterator();
assertNotNull(termsEnum);
assertEquals(newBytesRef("bar"), termsEnum.next());
// simple usage (FREQS)
PostingsEnum postings = termsEnum.postings(null);
assertEquals(-1, postings.docID());
assertEquals(0, postings.nextDoc());
assertEquals(2, postings.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
// termsenum reuse (FREQS)
PostingsEnum postings2 = termsEnum.postings(postings);
assertNotNull(postings2);
// and it had better work
assertEquals(-1, postings2.docID());
assertEquals(0, postings2.nextDoc());
assertEquals(2, postings2.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc());
// asking for docs only: ok
PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE);
assertEquals(-1, docsOnly.docID());
assertEquals(0, docsOnly.nextDoc());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc());
// reuse that too
PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE);
assertNotNull(docsOnly2);
// and it had better work
assertEquals(-1, docsOnly2.docID());
assertEquals(0, docsOnly2.nextDoc());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsOnly2.freq() == 1 || docsOnly2.freq() == 2);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc());
// asking for positions, ok
PostingsEnum docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.POSITIONS);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 0);
assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 3);
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(1, docsAndPositionsEnum.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 4);
assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 7);
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// now reuse the positions
PostingsEnum docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.POSITIONS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 0);
assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 3);
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(1, docsAndPositionsEnum2.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 4);
assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 7);
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
// payloads don't cause an error if they aren't there
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.PAYLOADS);
assertNotNull(docsAndPositionsEnum);
// but make sure they work
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 0);
assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 3);
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(1, docsAndPositionsEnum.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 4);
assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 7);
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// reuse
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.PAYLOADS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 0);
assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 3);
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(1, docsAndPositionsEnum2.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 4);
assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 7);
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS);
assertNotNull(docsAndPositionsEnum);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
assertEquals(0, docsAndPositionsEnum.startOffset());
assertEquals(3, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(1, docsAndPositionsEnum.nextPosition());
assertEquals(4, docsAndPositionsEnum.startOffset());
assertEquals(7, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// reuse
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
assertEquals(0, docsAndPositionsEnum2.startOffset());
assertEquals(3, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(1, docsAndPositionsEnum2.nextPosition());
assertEquals(4, docsAndPositionsEnum2.startOffset());
assertEquals(7, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.ALL);
assertNotNull(docsAndPositionsEnum);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
assertEquals(0, docsAndPositionsEnum.startOffset());
assertEquals(3, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(1, docsAndPositionsEnum.nextPosition());
assertEquals(4, docsAndPositionsEnum.startOffset());
assertEquals(7, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.ALL);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
assertEquals(0, docsAndPositionsEnum2.startOffset());
assertEquals(3, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(1, docsAndPositionsEnum2.nextPosition());
assertEquals(4, docsAndPositionsEnum2.startOffset());
assertEquals(7, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
iw.close();
reader.close();
dir.close();
}
public void testPostingsEnumOffsetsWithoutPositions() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer());
}
});
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorOffsets(true);
doc.add(new Field("foo", "bar bar", ft));
iw.addDocument(doc);
DirectoryReader reader = DirectoryReader.open(iw);
Terms terms = getOnlyLeafReader(reader).getTermVector(0, "foo");
TermsEnum termsEnum = terms.iterator();
assertNotNull(termsEnum);
assertEquals(newBytesRef("bar"), termsEnum.next());
// simple usage (FREQS)
PostingsEnum postings = termsEnum.postings(null);
assertEquals(-1, postings.docID());
assertEquals(0, postings.nextDoc());
assertEquals(2, postings.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
// termsenum reuse (FREQS)
PostingsEnum postings2 = termsEnum.postings(postings);
assertNotNull(postings2);
// and it had better work
assertEquals(-1, postings2.docID());
assertEquals(0, postings2.nextDoc());
assertEquals(2, postings2.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc());
// asking for docs only: ok
PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE);
assertEquals(-1, docsOnly.docID());
assertEquals(0, docsOnly.nextDoc());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc());
// reuse that too
PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE);
assertNotNull(docsOnly2);
// and it had better work
assertEquals(-1, docsOnly2.docID());
assertEquals(0, docsOnly2.nextDoc());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsOnly2.freq() == 1 || docsOnly2.freq() == 2);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc());
// asking for positions, ok
PostingsEnum docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.POSITIONS);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(-1, docsAndPositionsEnum.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 0);
assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 3);
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(-1, docsAndPositionsEnum.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 4);
assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 7);
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// now reuse the positions
PostingsEnum docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.POSITIONS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(-1, docsAndPositionsEnum2.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 0);
assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 3);
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(-1, docsAndPositionsEnum2.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 4);
assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 7);
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
// payloads don't cause an error if they aren't there
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.PAYLOADS);
assertNotNull(docsAndPositionsEnum);
// but make sure they work
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(-1, docsAndPositionsEnum.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 0);
assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 3);
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(-1, docsAndPositionsEnum.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 4);
assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 7);
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// reuse
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.PAYLOADS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(-1, docsAndPositionsEnum2.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 0);
assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 3);
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(-1, docsAndPositionsEnum2.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 4);
assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 7);
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS);
assertNotNull(docsAndPositionsEnum);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(-1, docsAndPositionsEnum.nextPosition());
assertEquals(0, docsAndPositionsEnum.startOffset());
assertEquals(3, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(-1, docsAndPositionsEnum.nextPosition());
assertEquals(4, docsAndPositionsEnum.startOffset());
assertEquals(7, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// reuse
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(-1, docsAndPositionsEnum2.nextPosition());
assertEquals(0, docsAndPositionsEnum2.startOffset());
assertEquals(3, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(-1, docsAndPositionsEnum2.nextPosition());
assertEquals(4, docsAndPositionsEnum2.startOffset());
assertEquals(7, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.ALL);
assertNotNull(docsAndPositionsEnum);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(-1, docsAndPositionsEnum.nextPosition());
assertEquals(0, docsAndPositionsEnum.startOffset());
assertEquals(3, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(-1, docsAndPositionsEnum.nextPosition());
assertEquals(4, docsAndPositionsEnum.startOffset());
assertEquals(7, docsAndPositionsEnum.endOffset());
assertNull(docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.ALL);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(-1, docsAndPositionsEnum2.nextPosition());
assertEquals(0, docsAndPositionsEnum2.startOffset());
assertEquals(3, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(-1, docsAndPositionsEnum2.nextPosition());
assertEquals(4, docsAndPositionsEnum2.startOffset());
assertEquals(7, docsAndPositionsEnum2.endOffset());
assertNull(docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
iw.close();
reader.close();
dir.close();
}
public void testPostingsEnumPayloads() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(null);
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
Token token1 = new Token("bar", 0, 3);
token1.setPayload(newBytesRef("pay1"));
Token token2 = new Token("bar", 4, 7);
token2.setPayload(newBytesRef("pay2"));
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorPayloads(true);
doc.add(new Field("foo", new CannedTokenStream(token1, token2), ft));
iw.addDocument(doc);
DirectoryReader reader = DirectoryReader.open(iw);
Terms terms = getOnlyLeafReader(reader).getTermVector(0, "foo");
TermsEnum termsEnum = terms.iterator();
assertNotNull(termsEnum);
assertEquals(newBytesRef("bar"), termsEnum.next());
// sugar method (FREQS)
PostingsEnum postings = termsEnum.postings(null);
assertEquals(-1, postings.docID());
assertEquals(0, postings.nextDoc());
assertEquals(2, postings.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
// termsenum reuse (FREQS)
PostingsEnum postings2 = termsEnum.postings(postings);
assertNotNull(postings2);
// and it had better work
assertEquals(-1, postings2.docID());
assertEquals(0, postings2.nextDoc());
assertEquals(2, postings2.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc());
// asking for docs only: ok
PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE);
assertEquals(-1, docsOnly.docID());
assertEquals(0, docsOnly.nextDoc());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc());
// reuse that too
PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE);
assertNotNull(docsOnly2);
// and it had better work
assertEquals(-1, docsOnly2.docID());
assertEquals(0, docsOnly2.nextDoc());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsOnly2.freq() == 1 || docsOnly2.freq() == 2);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc());
// asking for positions, ok
PostingsEnum docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.POSITIONS);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum.getPayload() == null
|| newBytesRef("pay1").equals(docsAndPositionsEnum.getPayload()));
assertEquals(1, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum.getPayload() == null
|| newBytesRef("pay2").equals(docsAndPositionsEnum.getPayload()));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// now reuse the positions
PostingsEnum docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.POSITIONS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum2.getPayload() == null
|| newBytesRef("pay1").equals(docsAndPositionsEnum2.getPayload()));
assertEquals(1, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum2.getPayload() == null
|| newBytesRef("pay2").equals(docsAndPositionsEnum2.getPayload()));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
// payloads
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.PAYLOADS);
assertNotNull(docsAndPositionsEnum);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
assertEquals(newBytesRef("pay1"), docsAndPositionsEnum.getPayload());
assertEquals(1, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
assertEquals(newBytesRef("pay2"), docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// reuse
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.PAYLOADS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
assertEquals(newBytesRef("pay1"), docsAndPositionsEnum2.getPayload());
assertEquals(1, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
assertEquals(newBytesRef("pay2"), docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS);
assertNotNull(docsAndPositionsEnum);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum.getPayload() == null
|| newBytesRef("pay1").equals(docsAndPositionsEnum.getPayload()));
assertEquals(1, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum.getPayload() == null
|| newBytesRef("pay2").equals(docsAndPositionsEnum.getPayload()));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// reuse
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum2.getPayload() == null
|| newBytesRef("pay1").equals(docsAndPositionsEnum2.getPayload()));
assertEquals(1, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum2.getPayload() == null
|| newBytesRef("pay2").equals(docsAndPositionsEnum2.getPayload()));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.ALL);
assertNotNull(docsAndPositionsEnum);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
assertEquals(newBytesRef("pay1"), docsAndPositionsEnum.getPayload());
assertEquals(1, docsAndPositionsEnum.nextPosition());
assertEquals(-1, docsAndPositionsEnum.startOffset());
assertEquals(-1, docsAndPositionsEnum.endOffset());
assertEquals(newBytesRef("pay2"), docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.ALL);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
assertEquals(newBytesRef("pay1"), docsAndPositionsEnum2.getPayload());
assertEquals(1, docsAndPositionsEnum2.nextPosition());
assertEquals(-1, docsAndPositionsEnum2.startOffset());
assertEquals(-1, docsAndPositionsEnum2.endOffset());
assertEquals(newBytesRef("pay2"), docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
iw.close();
reader.close();
dir.close();
}
public void testPostingsEnumAll() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig(null);
IndexWriter iw = new IndexWriter(dir, iwc);
Document doc = new Document();
Token token1 = new Token("bar", 0, 3);
token1.setPayload(newBytesRef("pay1"));
Token token2 = new Token("bar", 4, 7);
token2.setPayload(newBytesRef("pay2"));
FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
ft.setStoreTermVectors(true);
ft.setStoreTermVectorPositions(true);
ft.setStoreTermVectorPayloads(true);
ft.setStoreTermVectorOffsets(true);
doc.add(new Field("foo", new CannedTokenStream(token1, token2), ft));
iw.addDocument(doc);
DirectoryReader reader = DirectoryReader.open(iw);
Terms terms = getOnlyLeafReader(reader).getTermVector(0, "foo");
TermsEnum termsEnum = terms.iterator();
assertNotNull(termsEnum);
assertEquals(newBytesRef("bar"), termsEnum.next());
// sugar method (FREQS)
PostingsEnum postings = termsEnum.postings(null);
assertEquals(-1, postings.docID());
assertEquals(0, postings.nextDoc());
assertEquals(2, postings.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
// termsenum reuse (FREQS)
PostingsEnum postings2 = termsEnum.postings(postings);
assertNotNull(postings2);
// and it had better work
assertEquals(-1, postings2.docID());
assertEquals(0, postings2.nextDoc());
assertEquals(2, postings2.freq());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc());
// asking for docs only: ok
PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE);
assertEquals(-1, docsOnly.docID());
assertEquals(0, docsOnly.nextDoc());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc());
// reuse that too
PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE);
assertNotNull(docsOnly2);
// and it had better work
assertEquals(-1, docsOnly2.docID());
assertEquals(0, docsOnly2.nextDoc());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsOnly2.freq() == 1 || docsOnly2.freq() == 2);
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc());
// asking for positions, ok
PostingsEnum docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.POSITIONS);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 0);
assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 3);
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum.getPayload() == null
|| newBytesRef("pay1").equals(docsAndPositionsEnum.getPayload()));
assertEquals(1, docsAndPositionsEnum.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 4);
assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 7);
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum.getPayload() == null
|| newBytesRef("pay2").equals(docsAndPositionsEnum.getPayload()));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// now reuse the positions
PostingsEnum docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.POSITIONS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 0);
assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 3);
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum2.getPayload() == null
|| newBytesRef("pay1").equals(docsAndPositionsEnum2.getPayload()));
assertEquals(1, docsAndPositionsEnum2.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 4);
assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 7);
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum2.getPayload() == null
|| newBytesRef("pay2").equals(docsAndPositionsEnum2.getPayload()));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
// payloads
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.PAYLOADS);
assertNotNull(docsAndPositionsEnum);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 0);
assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 3);
assertEquals(newBytesRef("pay1"), docsAndPositionsEnum.getPayload());
assertEquals(1, docsAndPositionsEnum.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 4);
assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 7);
assertEquals(newBytesRef("pay2"), docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// reuse
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.PAYLOADS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 0);
assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 3);
assertEquals(newBytesRef("pay1"), docsAndPositionsEnum2.getPayload());
assertEquals(1, docsAndPositionsEnum2.nextPosition());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 4);
assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 7);
assertEquals(newBytesRef("pay2"), docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS);
assertNotNull(docsAndPositionsEnum);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
assertEquals(0, docsAndPositionsEnum.startOffset());
assertEquals(3, docsAndPositionsEnum.endOffset());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum.getPayload() == null
|| newBytesRef("pay1").equals(docsAndPositionsEnum.getPayload()));
assertEquals(1, docsAndPositionsEnum.nextPosition());
assertEquals(4, docsAndPositionsEnum.startOffset());
assertEquals(7, docsAndPositionsEnum.endOffset());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum.getPayload() == null
|| newBytesRef("pay2").equals(docsAndPositionsEnum.getPayload()));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
// reuse
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
assertEquals(0, docsAndPositionsEnum2.startOffset());
assertEquals(3, docsAndPositionsEnum2.endOffset());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum2.getPayload() == null
|| newBytesRef("pay1").equals(docsAndPositionsEnum2.getPayload()));
assertEquals(1, docsAndPositionsEnum2.nextPosition());
assertEquals(4, docsAndPositionsEnum2.startOffset());
assertEquals(7, docsAndPositionsEnum2.endOffset());
// we don't define what it is, but if its something else, we should look into it?
assertTrue(
docsAndPositionsEnum2.getPayload() == null
|| newBytesRef("pay2").equals(docsAndPositionsEnum2.getPayload()));
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.ALL);
assertNotNull(docsAndPositionsEnum);
assertEquals(-1, docsAndPositionsEnum.docID());
assertEquals(0, docsAndPositionsEnum.nextDoc());
assertEquals(2, docsAndPositionsEnum.freq());
assertEquals(0, docsAndPositionsEnum.nextPosition());
assertEquals(0, docsAndPositionsEnum.startOffset());
assertEquals(3, docsAndPositionsEnum.endOffset());
assertEquals(newBytesRef("pay1"), docsAndPositionsEnum.getPayload());
assertEquals(1, docsAndPositionsEnum.nextPosition());
assertEquals(4, docsAndPositionsEnum.startOffset());
assertEquals(7, docsAndPositionsEnum.endOffset());
assertEquals(newBytesRef("pay2"), docsAndPositionsEnum.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc());
docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.ALL);
assertEquals(-1, docsAndPositionsEnum2.docID());
assertEquals(0, docsAndPositionsEnum2.nextDoc());
assertEquals(2, docsAndPositionsEnum2.freq());
assertEquals(0, docsAndPositionsEnum2.nextPosition());
assertEquals(0, docsAndPositionsEnum2.startOffset());
assertEquals(3, docsAndPositionsEnum2.endOffset());
assertEquals(newBytesRef("pay1"), docsAndPositionsEnum2.getPayload());
assertEquals(1, docsAndPositionsEnum2.nextPosition());
assertEquals(4, docsAndPositionsEnum2.startOffset());
assertEquals(7, docsAndPositionsEnum2.endOffset());
assertEquals(newBytesRef("pay2"), docsAndPositionsEnum2.getPayload());
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc());
iw.close();
reader.close();
dir.close();
}
}