| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| import static org.apache.lucene.index.PostingsEnum.ALL; |
| import static org.apache.lucene.index.PostingsEnum.FREQS; |
| import static org.apache.lucene.index.PostingsEnum.NONE; |
| import static org.apache.lucene.index.PostingsEnum.OFFSETS; |
| import static org.apache.lucene.index.PostingsEnum.PAYLOADS; |
| import static org.apache.lucene.index.PostingsEnum.POSITIONS; |
| |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.EnumSet; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.concurrent.atomic.AtomicReference; |
| |
| import com.carrotsearch.randomizedtesting.generators.RandomPicks; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.CannedTokenStream; |
| import org.apache.lucene.analysis.MockTokenizer; |
| import org.apache.lucene.analysis.Token; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; |
| import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; |
| import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.TermVectorsFormat; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.Field.Store; |
| import org.apache.lucene.document.FieldType; |
| import org.apache.lucene.document.StringField; |
| import org.apache.lucene.document.TextField; |
| import org.apache.lucene.index.TermsEnum.SeekStatus; |
| import org.apache.lucene.search.DocIdSetIterator; |
| import org.apache.lucene.search.IndexSearcher; |
| import org.apache.lucene.search.TermQuery; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.AttributeImpl; |
| import org.apache.lucene.util.AttributeReflector; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.TestUtil; |
| |
| /** |
| * Base class aiming at testing {@link TermVectorsFormat term vectors formats}. |
| * To test a new format, all you need is to register a new {@link Codec} which |
| * uses it and extend this class and override {@link #getCodec()}. |
| * @lucene.experimental |
| */ |
| public abstract class BaseTermVectorsFormatTestCase extends BaseIndexFileFormatTestCase { |
| |
| /** |
| * A combination of term vectors options. |
| */ |
| protected enum Options { |
| NONE(false, false, false), |
| POSITIONS(true, false, false), |
| OFFSETS(false, true, false), |
| POSITIONS_AND_OFFSETS(true, true, false), |
| POSITIONS_AND_PAYLOADS(true, false, true), |
| POSITIONS_AND_OFFSETS_AND_PAYLOADS(true, true, true); |
| final boolean positions, offsets, payloads; |
| private Options(boolean positions, boolean offsets, boolean payloads) { |
| this.positions = positions; |
| this.offsets = offsets; |
| this.payloads = payloads; |
| } |
| } |
| |
| protected Set<Options> validOptions() { |
| return EnumSet.allOf(Options.class); |
| } |
| |
| protected Options randomOptions() { |
| return RandomPicks.randomFrom(random(), new ArrayList<>(validOptions())); |
| } |
| |
| protected FieldType fieldType(Options options) { |
| FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); |
| ft.setStoreTermVectors(true); |
| ft.setStoreTermVectorPositions(options.positions); |
| ft.setStoreTermVectorOffsets(options.offsets); |
| ft.setStoreTermVectorPayloads(options.payloads); |
| ft.freeze(); |
| return ft; |
| } |
| |
| @Override |
| protected void addRandomFields(Document doc) { |
| for (Options opts : validOptions()) { |
| FieldType ft = fieldType(opts); |
| final int numFields = random().nextInt(5); |
| for (int j = 0; j < numFields; ++j) { |
| doc.add(new Field("f_" + opts, TestUtil.randomSimpleString(random(), 2), ft)); |
| } |
| } |
| } |
| |
| // custom impl to test cases that are forbidden by the default OffsetAttribute impl |
| private static class PermissiveOffsetAttributeImpl extends AttributeImpl implements OffsetAttribute { |
| |
| int start, end; |
| |
| @Override |
| public int startOffset() { |
| return start; |
| } |
| |
| @Override |
| public int endOffset() { |
| return end; |
| } |
| |
| @Override |
| public void setOffset(int startOffset, int endOffset) { |
| // no check! |
| start = startOffset; |
| end = endOffset; |
| } |
| |
| @Override |
| public void clear() { |
| start = end = 0; |
| } |
| |
| @Override |
| public boolean equals(Object other) { |
| if (other == this) { |
| return true; |
| } |
| |
| if (other instanceof PermissiveOffsetAttributeImpl) { |
| PermissiveOffsetAttributeImpl o = (PermissiveOffsetAttributeImpl) other; |
| return o.start == start && o.end == end; |
| } |
| |
| return false; |
| } |
| |
| @Override |
| public int hashCode() { |
| return start + 31 * end; |
| } |
| |
| @Override |
| public void copyTo(AttributeImpl target) { |
| OffsetAttribute t = (OffsetAttribute) target; |
| t.setOffset(start, end); |
| } |
| |
| @Override |
| public void reflectWith(AttributeReflector reflector) { |
| reflector.reflect(OffsetAttribute.class, "startOffset", start); |
| reflector.reflect(OffsetAttribute.class, "endOffset", end); |
| } |
| |
| } |
| |
| // TODO: use CannedTokenStream? |
| // TODO: pull out and make top-level-utility, separate from TermVectors |
| /** Produces a random TokenStream based off of provided terms. */ |
| public static class RandomTokenStream extends TokenStream { |
| |
| final String[] terms; |
| final BytesRef[] termBytes; |
| final int[] positionsIncrements; |
| final int[] positions; |
| final int[] startOffsets, endOffsets; |
| final BytesRef[] payloads; |
| |
| final Map<String, Integer> freqs; |
| final Map<Integer, Set<Integer>> positionToTerms; |
| final Map<Integer, Set<Integer>> startOffsetToTerms; |
| |
| final CharTermAttribute termAtt; |
| final PositionIncrementAttribute piAtt; |
| final OffsetAttribute oAtt; |
| final PayloadAttribute pAtt; |
| int i = 0; |
| |
| public RandomTokenStream(int len, String[] sampleTerms, BytesRef[] sampleTermBytes) { |
| terms = new String[len]; |
| termBytes = new BytesRef[len]; |
| positionsIncrements = new int[len]; |
| positions = new int[len]; |
| startOffsets = new int[len]; |
| endOffsets = new int[len]; |
| payloads = new BytesRef[len]; |
| for (int i = 0; i < len; ++i) { |
| final int o = random().nextInt(sampleTerms.length); |
| terms[i] = sampleTerms[o]; |
| termBytes[i] = sampleTermBytes[o]; |
| positionsIncrements[i] = TestUtil.nextInt(random(), i == 0 ? 1 : 0, 10); |
| if (i == 0) { |
| startOffsets[i] = TestUtil.nextInt(random(), 0, 1 << 16); |
| } else { |
| startOffsets[i] = startOffsets[i-1] + TestUtil.nextInt(random(), 0, rarely() ? 1 << 16 : 20); |
| } |
| endOffsets[i] = startOffsets[i] + TestUtil.nextInt(random(), 0, rarely() ? 1 << 10 : 20); |
| } |
| |
| for (int i = 0; i < len; ++i) { |
| if (i == 0) { |
| positions[i] = positionsIncrements[i] - 1; |
| } else { |
| positions[i] = positions[i - 1] + positionsIncrements[i]; |
| } |
| } |
| if (rarely()) { |
| Arrays.fill(payloads, randomPayload()); |
| } else { |
| for (int i = 0; i < len; ++i) { |
| payloads[i] = randomPayload(); |
| } |
| } |
| |
| positionToTerms = new HashMap<>(len); |
| startOffsetToTerms = new HashMap<>(len); |
| for (int i = 0; i < len; ++i) { |
| if (!positionToTerms.containsKey(positions[i])) { |
| positionToTerms.put(positions[i], new HashSet<Integer>(1)); |
| } |
| positionToTerms.get(positions[i]).add(i); |
| if (!startOffsetToTerms.containsKey(startOffsets[i])) { |
| startOffsetToTerms.put(startOffsets[i], new HashSet<Integer>(1)); |
| } |
| startOffsetToTerms.get(startOffsets[i]).add(i); |
| } |
| |
| freqs = new HashMap<>(); |
| for (String term : terms) { |
| if (freqs.containsKey(term)) { |
| freqs.put(term, freqs.get(term) + 1); |
| } else { |
| freqs.put(term, 1); |
| } |
| } |
| |
| addAttributeImpl(new PermissiveOffsetAttributeImpl()); |
| |
| termAtt = addAttribute(CharTermAttribute.class); |
| piAtt = addAttribute(PositionIncrementAttribute.class); |
| oAtt = addAttribute(OffsetAttribute.class); |
| pAtt = addAttribute(PayloadAttribute.class); |
| } |
| |
| protected BytesRef randomPayload() { |
| final int len = random().nextInt(5); |
| if (len == 0) { |
| return null; |
| } |
| final BytesRef payload = new BytesRef(len); |
| random().nextBytes(payload.bytes); |
| payload.length = len; |
| |
| return newBytesRef(payload); |
| } |
| |
| public boolean hasPayloads() { |
| for (BytesRef payload : payloads) { |
| if (payload != null && payload.length > 0) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| public String[] getTerms() { |
| return terms; |
| } |
| |
| public BytesRef[] getTermBytes() { |
| return termBytes; |
| } |
| |
| public int[] getPositionsIncrements() { |
| return positionsIncrements; |
| } |
| |
| public int[] getStartOffsets() { |
| return startOffsets; |
| } |
| |
| public int[] getEndOffsets() { |
| return endOffsets; |
| } |
| |
| public BytesRef[] getPayloads() { |
| return payloads; |
| } |
| |
| @Override |
| public void reset() throws IOException { |
| i = 0; |
| super.reset(); |
| } |
| |
| @Override |
| public final boolean incrementToken() throws IOException { |
| if (i < terms.length) { |
| clearAttributes(); |
| termAtt.setLength(0).append(terms[i]); |
| piAtt.setPositionIncrement(positionsIncrements[i]); |
| oAtt.setOffset(startOffsets[i], endOffsets[i]); |
| pAtt.setPayload(payloads[i]); |
| ++i; |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| } |
| |
| protected class RandomDocument { |
| |
| private final String[] fieldNames; |
| private final FieldType[] fieldTypes; |
| private final RandomTokenStream[] tokenStreams; |
| |
| protected RandomDocument(int fieldCount, int maxTermCount, Options options, String[] fieldNames, String[] sampleTerms, BytesRef[] sampleTermBytes) { |
| if (fieldCount > fieldNames.length) { |
| throw new IllegalArgumentException(); |
| } |
| this.fieldNames = new String[fieldCount]; |
| fieldTypes = new FieldType[fieldCount]; |
| tokenStreams = new RandomTokenStream[fieldCount]; |
| Arrays.fill(fieldTypes, fieldType(options)); |
| final Set<String> usedFileNames = new HashSet<>(); |
| for (int i = 0; i < fieldCount; ++i) { |
| do { |
| this.fieldNames[i] = RandomPicks.randomFrom(random(), fieldNames); |
| } while (usedFileNames.contains(this.fieldNames[i])); |
| usedFileNames.add(this.fieldNames[i]); |
| tokenStreams[i] = new RandomTokenStream(TestUtil.nextInt(random(), 1, maxTermCount), sampleTerms, sampleTermBytes); |
| } |
| } |
| |
| public Document toDocument() { |
| final Document doc = new Document(); |
| for (int i = 0; i < fieldNames.length; ++i) { |
| doc.add(new Field(fieldNames[i], tokenStreams[i], fieldTypes[i])); |
| } |
| return doc; |
| } |
| |
| } |
| |
| protected class RandomDocumentFactory { |
| |
| private final String[] fieldNames; |
| private final String[] terms; |
| private final BytesRef[] termBytes; |
| |
| protected RandomDocumentFactory(int distinctFieldNames, int disctinctTerms) { |
| final Set<String> fieldNames = new HashSet<>(); |
| while (fieldNames.size() < distinctFieldNames) { |
| fieldNames.add(TestUtil.randomSimpleString(random())); |
| fieldNames.remove("id"); |
| } |
| this.fieldNames = fieldNames.toArray(new String[0]); |
| terms = new String[disctinctTerms]; |
| termBytes = new BytesRef[disctinctTerms]; |
| for (int i = 0; i < disctinctTerms; ++i) { |
| terms[i] = TestUtil.randomRealisticUnicodeString(random()); |
| termBytes[i] = newBytesRef(terms[i]); |
| } |
| } |
| |
| public RandomDocument newDocument(int fieldCount, int maxTermCount, Options options) { |
| return new RandomDocument(fieldCount, maxTermCount, options, fieldNames, terms, termBytes); |
| } |
| |
| } |
| |
| protected void assertEquals(RandomDocument doc, Fields fields) throws IOException { |
| // compare field names |
| assertEquals(doc == null, fields == null); |
| assertEquals(doc.fieldNames.length, fields.size()); |
| final Set<String> fields1 = new HashSet<>(); |
| final Set<String> fields2 = new HashSet<>(); |
| for (int i = 0; i < doc.fieldNames.length; ++i) { |
| fields1.add(doc.fieldNames[i]); |
| } |
| for (String field : fields) { |
| fields2.add(field); |
| } |
| assertEquals(fields1, fields2); |
| |
| for (int i = 0; i < doc.fieldNames.length; ++i) { |
| assertEquals(doc.tokenStreams[i], doc.fieldTypes[i], fields.terms(doc.fieldNames[i])); |
| } |
| } |
| |
| protected static boolean equals(Object o1, Object o2) { |
| if (o1 == null) { |
| return o2 == null; |
| } else { |
| return o1.equals(o2); |
| } |
| } |
| |
| // to test reuse |
| private final ThreadLocal<PostingsEnum> docsEnum = new ThreadLocal<>(); |
| private final ThreadLocal<PostingsEnum> docsAndPositionsEnum = new ThreadLocal<>(); |
| |
| protected void assertEquals(RandomTokenStream tk, FieldType ft, Terms terms) throws IOException { |
| assertEquals(1, terms.getDocCount()); |
| final int termCount = new HashSet<>(Arrays.asList(tk.terms)).size(); |
| assertEquals(termCount, terms.size()); |
| assertEquals(termCount, terms.getSumDocFreq()); |
| assertEquals(ft.storeTermVectorPositions(), terms.hasPositions()); |
| assertEquals(ft.storeTermVectorOffsets(), terms.hasOffsets()); |
| assertEquals(ft.storeTermVectorPayloads() && tk.hasPayloads(), terms.hasPayloads()); |
| final Set<BytesRef> uniqueTerms = new HashSet<>(); |
| for (String term : tk.freqs.keySet()) { |
| uniqueTerms.add(new BytesRef(term)); |
| } |
| final BytesRef[] sortedTerms = uniqueTerms.toArray(new BytesRef[0]); |
| Arrays.sort(sortedTerms); |
| final TermsEnum termsEnum = terms.iterator(); |
| for (int i = 0; i < sortedTerms.length; ++i) { |
| final BytesRef nextTerm = termsEnum.next(); |
| assertEquals(sortedTerms[i], nextTerm); |
| assertEquals(sortedTerms[i], termsEnum.term()); |
| assertEquals(1, termsEnum.docFreq()); |
| |
| PostingsEnum postingsEnum = termsEnum.postings(null); |
| postingsEnum = termsEnum.postings(random().nextBoolean() ? null : postingsEnum); |
| assertNotNull(postingsEnum); |
| assertEquals(0, postingsEnum.nextDoc()); |
| assertEquals(0, postingsEnum.docID()); |
| assertEquals(tk.freqs.get(termsEnum.term().utf8ToString()), (Integer) postingsEnum.freq()); |
| assertEquals(PostingsEnum.NO_MORE_DOCS, postingsEnum.nextDoc()); |
| this.docsEnum.set(postingsEnum); |
| |
| PostingsEnum docsAndPositionsEnum = termsEnum.postings(null); |
| docsAndPositionsEnum = termsEnum.postings(random().nextBoolean() ? null : docsAndPositionsEnum, PostingsEnum.POSITIONS); |
| if (terms.hasPositions() || terms.hasOffsets()) { |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| final int freq = docsAndPositionsEnum.freq(); |
| assertEquals(tk.freqs.get(termsEnum.term().utf8ToString()), (Integer) freq); |
| if (docsAndPositionsEnum != null) { |
| for (int k = 0; k < freq; ++k) { |
| final int position = docsAndPositionsEnum.nextPosition(); |
| final Set<Integer> indexes; |
| if (terms.hasPositions()) { |
| indexes = tk.positionToTerms.get(position); |
| assertNotNull(indexes); |
| } else { |
| indexes = tk.startOffsetToTerms.get(docsAndPositionsEnum.startOffset()); |
| assertNotNull(indexes); |
| } |
| if (terms.hasPositions()) { |
| boolean foundPosition = false; |
| for (int index : indexes) { |
| if (tk.termBytes[index].equals(termsEnum.term()) && tk.positions[index] == position) { |
| foundPosition = true; |
| break; |
| } |
| } |
| assertTrue(foundPosition); |
| } |
| if (terms.hasOffsets()) { |
| boolean foundOffset = false; |
| for (int index : indexes) { |
| if (tk.termBytes[index].equals(termsEnum.term()) && tk.startOffsets[index] == docsAndPositionsEnum.startOffset() && tk.endOffsets[index] == docsAndPositionsEnum.endOffset()) { |
| foundOffset = true; |
| break; |
| } |
| } |
| assertTrue(foundOffset); |
| } |
| if (terms.hasPayloads()) { |
| boolean foundPayload = false; |
| for (int index : indexes) { |
| if (tk.termBytes[index].equals(termsEnum.term()) && equals(tk.payloads[index], docsAndPositionsEnum.getPayload())) { |
| foundPayload = true; |
| break; |
| } |
| } |
| assertTrue(foundPayload); |
| } |
| } |
| try { |
| docsAndPositionsEnum.nextPosition(); |
| fail(); |
| } catch (Exception | AssertionError e) { |
| // ok |
| } |
| } |
| assertEquals(PostingsEnum.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| } |
| this.docsEnum.set(docsAndPositionsEnum); |
| } |
| assertNull(termsEnum.next()); |
| for (int i = 0; i < 5; ++i) { |
| if (random().nextBoolean()) { |
| assertTrue(termsEnum.seekExact(RandomPicks.randomFrom(random(), tk.termBytes))); |
| } else { |
| assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(RandomPicks.randomFrom(random(), tk.termBytes))); |
| } |
| } |
| } |
| |
| protected Document addId(Document doc, String id) { |
| doc.add(new StringField("id", id, Store.NO)); |
| return doc; |
| } |
| |
| protected int docID(IndexReader reader, String id) throws IOException { |
| return new IndexSearcher(reader).search(new TermQuery(new Term("id", id)), 1).scoreDocs[0].doc; |
| } |
| |
| // only one doc with vectors |
| public void testRareVectors() throws IOException { |
| final RandomDocumentFactory docFactory = new RandomDocumentFactory(10, 20); |
| for (Options options : validOptions()) { |
| final int numDocs = atLeast(200); |
| final int docWithVectors = random().nextInt(numDocs); |
| final Document emptyDoc = new Document(); |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter writer = new RandomIndexWriter(random(), dir); |
| final RandomDocument doc = docFactory.newDocument(TestUtil.nextInt(random(), 1, 3), 20, options); |
| for (int i = 0; i < numDocs; ++i) { |
| if (i == docWithVectors) { |
| writer.addDocument(addId(doc.toDocument(), "42")); |
| } else { |
| writer.addDocument(emptyDoc); |
| } |
| } |
| final IndexReader reader = writer.getReader(); |
| final int docWithVectorsID = docID(reader, "42"); |
| for (int i = 0; i < 10; ++i) { |
| final int docID = random().nextInt(numDocs); |
| final Fields fields = reader.getTermVectors(docID); |
| if (docID == docWithVectorsID) { |
| assertEquals(doc, fields); |
| } else { |
| assertNull(fields); |
| } |
| } |
| final Fields fields = reader.getTermVectors(docWithVectorsID); |
| assertEquals(doc, fields); |
| reader.close(); |
| writer.close(); |
| dir.close(); |
| } |
| } |
| |
| public void testHighFreqs() throws IOException { |
| final RandomDocumentFactory docFactory = new RandomDocumentFactory(3, 5); |
| for (Options options : validOptions()) { |
| if (options == Options.NONE) { |
| continue; |
| } |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter writer = new RandomIndexWriter(random(), dir); |
| final RandomDocument doc = docFactory.newDocument(TestUtil.nextInt(random(), 1, 2), atLeast(2000), options); |
| writer.addDocument(doc.toDocument()); |
| final IndexReader reader = writer.getReader(); |
| assertEquals(doc, reader.getTermVectors(0)); |
| reader.close(); |
| writer.close(); |
| dir.close(); |
| } |
| } |
| |
| @Slow |
| public void testLotsOfFields() throws IOException { |
| int fieldCount = atLeast(100); |
| final RandomDocumentFactory docFactory = new RandomDocumentFactory(fieldCount, 10); |
| for (Options options : validOptions()) { |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter writer = new RandomIndexWriter(random(), dir); |
| final RandomDocument doc = docFactory.newDocument(TestUtil.nextInt(random(), 20, fieldCount), 5, options); |
| writer.addDocument(doc.toDocument()); |
| final IndexReader reader = writer.getReader(); |
| assertEquals(doc, reader.getTermVectors(0)); |
| reader.close(); |
| writer.close(); |
| dir.close(); |
| } |
| } |
| |
| // different options for the same field |
| public void testMixedOptions() throws IOException { |
| final int numFields = TestUtil.nextInt(random(), 1, 3); |
| final RandomDocumentFactory docFactory = new RandomDocumentFactory(numFields, 10); |
| for (Options options1 : validOptions()) { |
| for (Options options2 : validOptions()) { |
| if (options1 == options2) { |
| continue; |
| } |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter writer = new RandomIndexWriter(random(), dir); |
| final RandomDocument doc1 = docFactory.newDocument(numFields, 20, options1); |
| final RandomDocument doc2 = docFactory.newDocument(numFields, 20, options2); |
| writer.addDocument(addId(doc1.toDocument(), "1")); |
| writer.addDocument(addId(doc2.toDocument(), "2")); |
| final IndexReader reader = writer.getReader(); |
| final int doc1ID = docID(reader, "1"); |
| assertEquals(doc1, reader.getTermVectors(doc1ID)); |
| final int doc2ID = docID(reader, "2"); |
| assertEquals(doc2, reader.getTermVectors(doc2ID)); |
| reader.close(); |
| writer.close(); |
| dir.close(); |
| } |
| } |
| } |
| |
| public void testRandom() throws IOException { |
| final RandomDocumentFactory docFactory = new RandomDocumentFactory(5, 20); |
| final int numDocs = atLeast(50); |
| final RandomDocument[] docs = new RandomDocument[numDocs]; |
| for (int i = 0; i < numDocs; ++i) { |
| docs[i] = docFactory.newDocument(TestUtil.nextInt(random(), 1, 3), TestUtil.nextInt(random(), 10, 50), randomOptions()); |
| } |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter writer = new RandomIndexWriter(random(), dir); |
| for (int i = 0; i < numDocs; ++i) { |
| writer.addDocument(addId(docs[i].toDocument(), ""+i)); |
| } |
| final IndexReader reader = writer.getReader(); |
| for (int i = 0; i < numDocs; ++i) { |
| final int docID = docID(reader, ""+i); |
| assertEquals(docs[i], reader.getTermVectors(docID)); |
| } |
| reader.close(); |
| writer.close(); |
| dir.close(); |
| } |
| |
| public void testMerge() throws IOException { |
| final RandomDocumentFactory docFactory = new RandomDocumentFactory(5, 20); |
| final int numDocs = atLeast(100); |
| final int numDeletes = random().nextInt(numDocs); |
| final Set<Integer> deletes = new HashSet<>(); |
| while (deletes.size() < numDeletes) { |
| deletes.add(random().nextInt(numDocs)); |
| } |
| for (Options options : validOptions()) { |
| final RandomDocument[] docs = new RandomDocument[numDocs]; |
| for (int i = 0; i < numDocs; ++i) { |
| docs[i] = docFactory.newDocument(TestUtil.nextInt(random(), 1, 3), atLeast(10), options); |
| } |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter writer = new RandomIndexWriter(random(), dir); |
| for (int i = 0; i < numDocs; ++i) { |
| writer.addDocument(addId(docs[i].toDocument(), ""+i)); |
| if (rarely()) { |
| writer.commit(); |
| } |
| } |
| for (int delete : deletes) { |
| writer.deleteDocuments(new Term("id", "" + delete)); |
| } |
| // merge with deletes |
| writer.forceMerge(1); |
| final IndexReader reader = writer.getReader(); |
| for (int i = 0; i < numDocs; ++i) { |
| if (!deletes.contains(i)) { |
| final int docID = docID(reader, ""+i); |
| assertEquals(docs[i], reader.getTermVectors(docID)); |
| } |
| } |
| reader.close(); |
| writer.close(); |
| dir.close(); |
| } |
| } |
| |
| // run random tests from different threads to make sure the per-thread clones |
| // don't share mutable data |
| public void testClone() throws IOException, InterruptedException { |
| final RandomDocumentFactory docFactory = new RandomDocumentFactory(5, 20); |
| final int numDocs = atLeast(50); |
| for (Options options : validOptions()) { |
| final RandomDocument[] docs = new RandomDocument[numDocs]; |
| for (int i = 0; i < numDocs; ++i) { |
| docs[i] = docFactory.newDocument(TestUtil.nextInt(random(), 1, 3), atLeast(10), options); |
| } |
| final Directory dir = newDirectory(); |
| final RandomIndexWriter writer = new RandomIndexWriter(random(), dir); |
| for (int i = 0; i < numDocs; ++i) { |
| writer.addDocument(addId(docs[i].toDocument(), ""+i)); |
| } |
| final IndexReader reader = writer.getReader(); |
| for (int i = 0; i < numDocs; ++i) { |
| final int docID = docID(reader, ""+i); |
| assertEquals(docs[i], reader.getTermVectors(docID)); |
| } |
| |
| final AtomicReference<Throwable> exception = new AtomicReference<>(); |
| final Thread[] threads = new Thread[2]; |
| for (int i = 0; i < threads.length; ++i) { |
| threads[i] = new Thread() { |
| @Override |
| public void run() { |
| try { |
| for (int i = 0; i < atLeast(100); ++i) { |
| final int idx = random().nextInt(numDocs); |
| final int docID = docID(reader, ""+idx); |
| assertEquals(docs[idx], reader.getTermVectors(docID)); |
| } |
| } catch (Throwable t) { |
| exception.set(t); |
| } |
| } |
| }; |
| } |
| for (Thread thread : threads) { |
| thread.start(); |
| } |
| for (Thread thread : threads) { |
| thread.join(); |
| } |
| reader.close(); |
| writer.close(); |
| dir.close(); |
| assertNull("One thread threw an exception", exception.get()); |
| } |
| } |
| |
| public void testPostingsEnumFreqs() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new Analyzer() { |
| @Override |
| protected TokenStreamComponents createComponents(String fieldName) { |
| return new TokenStreamComponents(new MockTokenizer()); |
| } |
| }); |
| IndexWriter iw = new IndexWriter(dir, iwc); |
| Document doc = new Document(); |
| FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); |
| ft.setStoreTermVectors(true); |
| doc.add(new Field("foo", "bar bar", ft)); |
| iw.addDocument(doc); |
| DirectoryReader reader = DirectoryReader.open(iw); |
| |
| Terms terms = getOnlyLeafReader(reader).getTermVector(0, "foo"); |
| TermsEnum termsEnum = terms.iterator(); |
| assertNotNull(termsEnum); |
| assertEquals(newBytesRef("bar"), termsEnum.next()); |
| |
| // simple use (FREQS) |
| PostingsEnum postings = termsEnum.postings(null); |
| assertEquals(-1, postings.docID()); |
| assertEquals(0, postings.nextDoc()); |
| assertEquals(2, postings.freq()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc()); |
| |
| // termsenum reuse (FREQS) |
| PostingsEnum postings2 = termsEnum.postings(postings); |
| assertNotNull(postings2); |
| // and it had better work |
| assertEquals(-1, postings2.docID()); |
| assertEquals(0, postings2.nextDoc()); |
| assertEquals(2, postings2.freq()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc()); |
| |
| // asking for docs only: ok |
| PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE); |
| assertEquals(-1, docsOnly.docID()); |
| assertEquals(0, docsOnly.nextDoc()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc()); |
| // reuse that too |
| PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE); |
| assertNotNull(docsOnly2); |
| // and it had better work |
| assertEquals(-1, docsOnly2.docID()); |
| assertEquals(0, docsOnly2.nextDoc()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc()); |
| |
| // asking for any flags: ok |
| for (int flag : new int[] { NONE, FREQS, POSITIONS, PAYLOADS, OFFSETS, ALL }) { |
| postings = termsEnum.postings(null, flag); |
| assertEquals(-1, postings.docID()); |
| assertEquals(0, postings.nextDoc()); |
| if (flag != NONE) { |
| assertEquals(2, postings.freq()); |
| } |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc()); |
| // reuse that too |
| postings2 = termsEnum.postings(postings, flag); |
| assertNotNull(postings2); |
| // and it had better work |
| assertEquals(-1, postings2.docID()); |
| assertEquals(0, postings2.nextDoc()); |
| if (flag != NONE) { |
| assertEquals(2, postings2.freq()); |
| } |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc()); |
| } |
| |
| iw.close(); |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testPostingsEnumPositions() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new Analyzer() { |
| @Override |
| protected TokenStreamComponents createComponents(String fieldName) { |
| return new TokenStreamComponents(new MockTokenizer()); |
| } |
| }); |
| IndexWriter iw = new IndexWriter(dir, iwc); |
| Document doc = new Document(); |
| FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); |
| ft.setStoreTermVectors(true); |
| ft.setStoreTermVectorPositions(true); |
| doc.add(new Field("foo", "bar bar", ft)); |
| iw.addDocument(doc); |
| DirectoryReader reader = DirectoryReader.open(iw); |
| |
| Terms terms = getOnlyLeafReader(reader).getTermVector(0, "foo"); |
| TermsEnum termsEnum = terms.iterator(); |
| assertNotNull(termsEnum); |
| assertEquals(newBytesRef("bar"), termsEnum.next()); |
| |
| // simple use (FREQS) |
| PostingsEnum postings = termsEnum.postings(null); |
| assertEquals(-1, postings.docID()); |
| assertEquals(0, postings.nextDoc()); |
| assertEquals(2, postings.freq()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc()); |
| |
| // termsenum reuse (FREQS) |
| PostingsEnum postings2 = termsEnum.postings(postings); |
| assertNotNull(postings2); |
| // and it had better work |
| assertEquals(-1, postings2.docID()); |
| assertEquals(0, postings2.nextDoc()); |
| assertEquals(2, postings2.freq()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc()); |
| |
| // asking for docs only: ok |
| PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE); |
| assertEquals(-1, docsOnly.docID()); |
| assertEquals(0, docsOnly.nextDoc()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc()); |
| // reuse that too |
| PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE); |
| assertNotNull(docsOnly2); |
| // and it had better work |
| assertEquals(-1, docsOnly2.docID()); |
| assertEquals(0, docsOnly2.nextDoc()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsOnly2.freq() == 1 || docsOnly2.freq() == 2); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc()); |
| |
| // asking for positions, ok |
| PostingsEnum docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.POSITIONS); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| |
| // now reuse the positions |
| PostingsEnum docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.POSITIONS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| // payloads, offsets, etc don't cause an error if they aren't there |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.PAYLOADS); |
| assertNotNull(docsAndPositionsEnum); |
| // but make sure they work |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| // reuse |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.PAYLOADS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS); |
| assertNotNull(docsAndPositionsEnum); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| // reuse |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.ALL); |
| assertNotNull(docsAndPositionsEnum); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.ALL); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| iw.close(); |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testPostingsEnumOffsets() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new Analyzer() { |
| @Override |
| protected TokenStreamComponents createComponents(String fieldName) { |
| return new TokenStreamComponents(new MockTokenizer()); |
| } |
| }); |
| IndexWriter iw = new IndexWriter(dir, iwc); |
| Document doc = new Document(); |
| FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); |
| ft.setStoreTermVectors(true); |
| ft.setStoreTermVectorPositions(true); |
| ft.setStoreTermVectorOffsets(true); |
| doc.add(new Field("foo", "bar bar", ft)); |
| iw.addDocument(doc); |
| DirectoryReader reader = DirectoryReader.open(iw); |
| |
| Terms terms = getOnlyLeafReader(reader).getTermVector(0, "foo"); |
| TermsEnum termsEnum = terms.iterator(); |
| assertNotNull(termsEnum); |
| assertEquals(newBytesRef("bar"), termsEnum.next()); |
| |
| // simple usage (FREQS) |
| PostingsEnum postings = termsEnum.postings(null); |
| assertEquals(-1, postings.docID()); |
| assertEquals(0, postings.nextDoc()); |
| assertEquals(2, postings.freq()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc()); |
| |
| // termsenum reuse (FREQS) |
| PostingsEnum postings2 = termsEnum.postings(postings); |
| assertNotNull(postings2); |
| // and it had better work |
| assertEquals(-1, postings2.docID()); |
| assertEquals(0, postings2.nextDoc()); |
| assertEquals(2, postings2.freq()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc()); |
| |
| // asking for docs only: ok |
| PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE); |
| assertEquals(-1, docsOnly.docID()); |
| assertEquals(0, docsOnly.nextDoc()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc()); |
| // reuse that too |
| PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE); |
| assertNotNull(docsOnly2); |
| // and it had better work |
| assertEquals(-1, docsOnly2.docID()); |
| assertEquals(0, docsOnly2.nextDoc()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsOnly2.freq() == 1 || docsOnly2.freq() == 2); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc()); |
| |
| // asking for positions, ok |
| PostingsEnum docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.POSITIONS); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 0); |
| assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 3); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 4); |
| assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 7); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| |
| // now reuse the positions |
| PostingsEnum docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.POSITIONS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 0); |
| assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 3); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 4); |
| assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 7); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| // payloads don't cause an error if they aren't there |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.PAYLOADS); |
| assertNotNull(docsAndPositionsEnum); |
| // but make sure they work |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 0); |
| assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 3); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 4); |
| assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 7); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| // reuse |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.PAYLOADS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 0); |
| assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 3); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 4); |
| assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 7); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS); |
| assertNotNull(docsAndPositionsEnum); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| assertEquals(0, docsAndPositionsEnum.startOffset()); |
| assertEquals(3, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(4, docsAndPositionsEnum.startOffset()); |
| assertEquals(7, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| // reuse |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(0, docsAndPositionsEnum2.startOffset()); |
| assertEquals(3, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(4, docsAndPositionsEnum2.startOffset()); |
| assertEquals(7, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.ALL); |
| assertNotNull(docsAndPositionsEnum); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| assertEquals(0, docsAndPositionsEnum.startOffset()); |
| assertEquals(3, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(4, docsAndPositionsEnum.startOffset()); |
| assertEquals(7, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.ALL); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(0, docsAndPositionsEnum2.startOffset()); |
| assertEquals(3, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(4, docsAndPositionsEnum2.startOffset()); |
| assertEquals(7, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| iw.close(); |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testPostingsEnumOffsetsWithoutPositions() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(new Analyzer() { |
| @Override |
| protected TokenStreamComponents createComponents(String fieldName) { |
| return new TokenStreamComponents(new MockTokenizer()); |
| } |
| }); |
| IndexWriter iw = new IndexWriter(dir, iwc); |
| Document doc = new Document(); |
| FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); |
| ft.setStoreTermVectors(true); |
| ft.setStoreTermVectorOffsets(true); |
| doc.add(new Field("foo", "bar bar", ft)); |
| iw.addDocument(doc); |
| DirectoryReader reader = DirectoryReader.open(iw); |
| |
| Terms terms = getOnlyLeafReader(reader).getTermVector(0, "foo"); |
| TermsEnum termsEnum = terms.iterator(); |
| assertNotNull(termsEnum); |
| assertEquals(newBytesRef("bar"), termsEnum.next()); |
| |
| // simple usage (FREQS) |
| PostingsEnum postings = termsEnum.postings(null); |
| assertEquals(-1, postings.docID()); |
| assertEquals(0, postings.nextDoc()); |
| assertEquals(2, postings.freq()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc()); |
| |
| // termsenum reuse (FREQS) |
| PostingsEnum postings2 = termsEnum.postings(postings); |
| assertNotNull(postings2); |
| // and it had better work |
| assertEquals(-1, postings2.docID()); |
| assertEquals(0, postings2.nextDoc()); |
| assertEquals(2, postings2.freq()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc()); |
| |
| // asking for docs only: ok |
| PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE); |
| assertEquals(-1, docsOnly.docID()); |
| assertEquals(0, docsOnly.nextDoc()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc()); |
| // reuse that too |
| PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE); |
| assertNotNull(docsOnly2); |
| // and it had better work |
| assertEquals(-1, docsOnly2.docID()); |
| assertEquals(0, docsOnly2.nextDoc()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsOnly2.freq() == 1 || docsOnly2.freq() == 2); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc()); |
| |
| // asking for positions, ok |
| PostingsEnum docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.POSITIONS); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(-1, docsAndPositionsEnum.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 0); |
| assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 3); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(-1, docsAndPositionsEnum.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 4); |
| assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 7); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| |
| // now reuse the positions |
| PostingsEnum docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.POSITIONS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(-1, docsAndPositionsEnum2.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 0); |
| assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 3); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(-1, docsAndPositionsEnum2.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 4); |
| assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 7); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| // payloads don't cause an error if they aren't there |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.PAYLOADS); |
| assertNotNull(docsAndPositionsEnum); |
| // but make sure they work |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(-1, docsAndPositionsEnum.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 0); |
| assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 3); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(-1, docsAndPositionsEnum.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 4); |
| assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 7); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| // reuse |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.PAYLOADS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(-1, docsAndPositionsEnum2.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 0); |
| assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 3); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(-1, docsAndPositionsEnum2.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 4); |
| assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 7); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS); |
| assertNotNull(docsAndPositionsEnum); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(-1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(0, docsAndPositionsEnum.startOffset()); |
| assertEquals(3, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(-1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(4, docsAndPositionsEnum.startOffset()); |
| assertEquals(7, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| // reuse |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(-1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(0, docsAndPositionsEnum2.startOffset()); |
| assertEquals(3, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(-1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(4, docsAndPositionsEnum2.startOffset()); |
| assertEquals(7, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.ALL); |
| assertNotNull(docsAndPositionsEnum); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(-1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(0, docsAndPositionsEnum.startOffset()); |
| assertEquals(3, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(-1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(4, docsAndPositionsEnum.startOffset()); |
| assertEquals(7, docsAndPositionsEnum.endOffset()); |
| assertNull(docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.ALL); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(-1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(0, docsAndPositionsEnum2.startOffset()); |
| assertEquals(3, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(-1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(4, docsAndPositionsEnum2.startOffset()); |
| assertEquals(7, docsAndPositionsEnum2.endOffset()); |
| assertNull(docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| iw.close(); |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testPostingsEnumPayloads() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(null); |
| IndexWriter iw = new IndexWriter(dir, iwc); |
| Document doc = new Document(); |
| Token token1 = new Token("bar", 0, 3); |
| token1.setPayload(newBytesRef("pay1")); |
| Token token2 = new Token("bar", 4, 7); |
| token2.setPayload(newBytesRef("pay2")); |
| FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); |
| ft.setStoreTermVectors(true); |
| ft.setStoreTermVectorPositions(true); |
| ft.setStoreTermVectorPayloads(true); |
| doc.add(new Field("foo", new CannedTokenStream(token1, token2), ft)); |
| iw.addDocument(doc); |
| DirectoryReader reader = DirectoryReader.open(iw); |
| |
| Terms terms = getOnlyLeafReader(reader).getTermVector(0, "foo"); |
| TermsEnum termsEnum = terms.iterator(); |
| assertNotNull(termsEnum); |
| assertEquals(newBytesRef("bar"), termsEnum.next()); |
| |
| // sugar method (FREQS) |
| PostingsEnum postings = termsEnum.postings(null); |
| assertEquals(-1, postings.docID()); |
| assertEquals(0, postings.nextDoc()); |
| assertEquals(2, postings.freq()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc()); |
| |
| // termsenum reuse (FREQS) |
| PostingsEnum postings2 = termsEnum.postings(postings); |
| assertNotNull(postings2); |
| // and it had better work |
| assertEquals(-1, postings2.docID()); |
| assertEquals(0, postings2.nextDoc()); |
| assertEquals(2, postings2.freq()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc()); |
| |
| // asking for docs only: ok |
| PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE); |
| assertEquals(-1, docsOnly.docID()); |
| assertEquals(0, docsOnly.nextDoc()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc()); |
| // reuse that too |
| PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE); |
| assertNotNull(docsOnly2); |
| // and it had better work |
| assertEquals(-1, docsOnly2.docID()); |
| assertEquals(0, docsOnly2.nextDoc()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsOnly2.freq() == 1 || docsOnly2.freq() == 2); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc()); |
| |
| // asking for positions, ok |
| PostingsEnum docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.POSITIONS); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum.getPayload() == null |
| || newBytesRef("pay1").equals(docsAndPositionsEnum.getPayload())); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum.getPayload() == null |
| || newBytesRef("pay2").equals(docsAndPositionsEnum.getPayload())); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| |
| // now reuse the positions |
| PostingsEnum docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.POSITIONS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum2.getPayload() == null |
| || newBytesRef("pay1").equals(docsAndPositionsEnum2.getPayload())); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum2.getPayload() == null |
| || newBytesRef("pay2").equals(docsAndPositionsEnum2.getPayload())); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| // payloads |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.PAYLOADS); |
| assertNotNull(docsAndPositionsEnum); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| assertEquals(newBytesRef("pay1"), docsAndPositionsEnum.getPayload()); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| assertEquals(newBytesRef("pay2"), docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| // reuse |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.PAYLOADS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| assertEquals(newBytesRef("pay1"), docsAndPositionsEnum2.getPayload()); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| assertEquals(newBytesRef("pay2"), docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS); |
| assertNotNull(docsAndPositionsEnum); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum.getPayload() == null |
| || newBytesRef("pay1").equals(docsAndPositionsEnum.getPayload())); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum.getPayload() == null |
| || newBytesRef("pay2").equals(docsAndPositionsEnum.getPayload())); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| // reuse |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum2.getPayload() == null |
| || newBytesRef("pay1").equals(docsAndPositionsEnum2.getPayload())); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum2.getPayload() == null |
| || newBytesRef("pay2").equals(docsAndPositionsEnum2.getPayload())); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.ALL); |
| assertNotNull(docsAndPositionsEnum); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| assertEquals(newBytesRef("pay1"), docsAndPositionsEnum.getPayload()); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum.endOffset()); |
| assertEquals(newBytesRef("pay2"), docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.ALL); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| assertEquals(newBytesRef("pay1"), docsAndPositionsEnum2.getPayload()); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(-1, docsAndPositionsEnum2.startOffset()); |
| assertEquals(-1, docsAndPositionsEnum2.endOffset()); |
| assertEquals(newBytesRef("pay2"), docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| iw.close(); |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testPostingsEnumAll() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = new IndexWriterConfig(null); |
| IndexWriter iw = new IndexWriter(dir, iwc); |
| Document doc = new Document(); |
| Token token1 = new Token("bar", 0, 3); |
| token1.setPayload(newBytesRef("pay1")); |
| Token token2 = new Token("bar", 4, 7); |
| token2.setPayload(newBytesRef("pay2")); |
| FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); |
| ft.setStoreTermVectors(true); |
| ft.setStoreTermVectorPositions(true); |
| ft.setStoreTermVectorPayloads(true); |
| ft.setStoreTermVectorOffsets(true); |
| doc.add(new Field("foo", new CannedTokenStream(token1, token2), ft)); |
| iw.addDocument(doc); |
| DirectoryReader reader = DirectoryReader.open(iw); |
| |
| Terms terms = getOnlyLeafReader(reader).getTermVector(0, "foo"); |
| TermsEnum termsEnum = terms.iterator(); |
| assertNotNull(termsEnum); |
| assertEquals(newBytesRef("bar"), termsEnum.next()); |
| |
| // sugar method (FREQS) |
| PostingsEnum postings = termsEnum.postings(null); |
| assertEquals(-1, postings.docID()); |
| assertEquals(0, postings.nextDoc()); |
| assertEquals(2, postings.freq()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc()); |
| |
| // termsenum reuse (FREQS) |
| PostingsEnum postings2 = termsEnum.postings(postings); |
| assertNotNull(postings2); |
| // and it had better work |
| assertEquals(-1, postings2.docID()); |
| assertEquals(0, postings2.nextDoc()); |
| assertEquals(2, postings2.freq()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc()); |
| |
| // asking for docs only: ok |
| PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE); |
| assertEquals(-1, docsOnly.docID()); |
| assertEquals(0, docsOnly.nextDoc()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc()); |
| // reuse that too |
| PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE); |
| assertNotNull(docsOnly2); |
| // and it had better work |
| assertEquals(-1, docsOnly2.docID()); |
| assertEquals(0, docsOnly2.nextDoc()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsOnly2.freq() == 1 || docsOnly2.freq() == 2); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc()); |
| |
| // asking for positions, ok |
| PostingsEnum docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.POSITIONS); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 0); |
| assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 3); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum.getPayload() == null |
| || newBytesRef("pay1").equals(docsAndPositionsEnum.getPayload())); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 4); |
| assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 7); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum.getPayload() == null |
| || newBytesRef("pay2").equals(docsAndPositionsEnum.getPayload())); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| |
| // now reuse the positions |
| PostingsEnum docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.POSITIONS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 0); |
| assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 3); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum2.getPayload() == null |
| || newBytesRef("pay1").equals(docsAndPositionsEnum2.getPayload())); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 4); |
| assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 7); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum2.getPayload() == null |
| || newBytesRef("pay2").equals(docsAndPositionsEnum2.getPayload())); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| // payloads |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.PAYLOADS); |
| assertNotNull(docsAndPositionsEnum); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 0); |
| assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 3); |
| assertEquals(newBytesRef("pay1"), docsAndPositionsEnum.getPayload()); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum.startOffset() == -1 || docsAndPositionsEnum.startOffset() == 4); |
| assertTrue(docsAndPositionsEnum.endOffset() == -1 || docsAndPositionsEnum.endOffset() == 7); |
| assertEquals(newBytesRef("pay2"), docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| // reuse |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.PAYLOADS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 0); |
| assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 3); |
| assertEquals(newBytesRef("pay1"), docsAndPositionsEnum2.getPayload()); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue(docsAndPositionsEnum2.startOffset() == -1 || docsAndPositionsEnum2.startOffset() == 4); |
| assertTrue(docsAndPositionsEnum2.endOffset() == -1 || docsAndPositionsEnum2.endOffset() == 7); |
| assertEquals(newBytesRef("pay2"), docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.OFFSETS); |
| assertNotNull(docsAndPositionsEnum); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| assertEquals(0, docsAndPositionsEnum.startOffset()); |
| assertEquals(3, docsAndPositionsEnum.endOffset()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum.getPayload() == null |
| || newBytesRef("pay1").equals(docsAndPositionsEnum.getPayload())); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(4, docsAndPositionsEnum.startOffset()); |
| assertEquals(7, docsAndPositionsEnum.endOffset()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum.getPayload() == null |
| || newBytesRef("pay2").equals(docsAndPositionsEnum.getPayload())); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| // reuse |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.OFFSETS); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(0, docsAndPositionsEnum2.startOffset()); |
| assertEquals(3, docsAndPositionsEnum2.endOffset()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum2.getPayload() == null |
| || newBytesRef("pay1").equals(docsAndPositionsEnum2.getPayload())); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(4, docsAndPositionsEnum2.startOffset()); |
| assertEquals(7, docsAndPositionsEnum2.endOffset()); |
| // we don't define what it is, but if its something else, we should look into it? |
| assertTrue( |
| docsAndPositionsEnum2.getPayload() == null |
| || newBytesRef("pay2").equals(docsAndPositionsEnum2.getPayload())); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| docsAndPositionsEnum = termsEnum.postings(null, PostingsEnum.ALL); |
| assertNotNull(docsAndPositionsEnum); |
| assertEquals(-1, docsAndPositionsEnum.docID()); |
| assertEquals(0, docsAndPositionsEnum.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum.freq()); |
| assertEquals(0, docsAndPositionsEnum.nextPosition()); |
| assertEquals(0, docsAndPositionsEnum.startOffset()); |
| assertEquals(3, docsAndPositionsEnum.endOffset()); |
| assertEquals(newBytesRef("pay1"), docsAndPositionsEnum.getPayload()); |
| assertEquals(1, docsAndPositionsEnum.nextPosition()); |
| assertEquals(4, docsAndPositionsEnum.startOffset()); |
| assertEquals(7, docsAndPositionsEnum.endOffset()); |
| assertEquals(newBytesRef("pay2"), docsAndPositionsEnum.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum.nextDoc()); |
| docsAndPositionsEnum2 = termsEnum.postings(docsAndPositionsEnum, PostingsEnum.ALL); |
| assertEquals(-1, docsAndPositionsEnum2.docID()); |
| assertEquals(0, docsAndPositionsEnum2.nextDoc()); |
| assertEquals(2, docsAndPositionsEnum2.freq()); |
| assertEquals(0, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(0, docsAndPositionsEnum2.startOffset()); |
| assertEquals(3, docsAndPositionsEnum2.endOffset()); |
| assertEquals(newBytesRef("pay1"), docsAndPositionsEnum2.getPayload()); |
| assertEquals(1, docsAndPositionsEnum2.nextPosition()); |
| assertEquals(4, docsAndPositionsEnum2.startOffset()); |
| assertEquals(7, docsAndPositionsEnum2.endOffset()); |
| assertEquals(newBytesRef("pay2"), docsAndPositionsEnum2.getPayload()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsAndPositionsEnum2.nextDoc()); |
| |
| iw.close(); |
| reader.close(); |
| dir.close(); |
| } |
| |
| } |