| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.index; |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.HashMap; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.Random; |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.FieldsProducer; |
| import org.apache.lucene.codecs.NormsProducer; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field.Store; |
| import org.apache.lucene.document.StringField; |
| import org.apache.lucene.search.DocIdSetIterator; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.tests.analysis.MockAnalyzer; |
| import org.apache.lucene.tests.util.LuceneTestCase; |
| import org.apache.lucene.tests.util.TestUtil; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.InfoStream; |
| import org.apache.lucene.util.StringHelper; |
| import org.apache.lucene.util.Version; |
| import org.junit.BeforeClass; |
| |
| // TODO: test multiple codecs here? |
| |
| // TODO |
| // - test across fields |
| // - fix this test to run once for all codecs |
| // - make more docs per term, to test > 1 level skipping |
| // - test all combinations of payloads/not and omitTF/not |
| // - test w/ different indexDivisor |
| // - test field where payload length rarely changes |
| // - 0-term fields |
| // - seek/skip to same term/doc i'm already on |
| // - mix in deleted docs |
| // - seek, skip beyond end -- assert returns false |
| // - seek, skip to things that don't exist -- ensure it |
| // goes to 1 before next one known to exist |
| // - skipTo(term) |
| // - skipTo(doc) |
| |
| public class TestCodecs extends LuceneTestCase { |
| private static String[] fieldNames = new String[] {"one", "two", "three", "four"}; |
| |
| private static int NUM_TEST_ITER; |
| private static final int NUM_TEST_THREADS = 3; |
| private static final int NUM_FIELDS = 4; |
| private static final int NUM_TERMS_RAND = 50; // must be > 16 to test skipping |
| private static final int DOC_FREQ_RAND = 500; // must be > 16 to test skipping |
| private static final int TERM_DOC_FREQ_RAND = 20; |
| |
| @BeforeClass |
| public static void beforeClass() { |
| NUM_TEST_ITER = atLeast(20); |
| } |
| |
| static class FieldData implements Comparable<FieldData> { |
| final FieldInfo fieldInfo; |
| final TermData[] terms; |
| final boolean omitTF; |
| final boolean storePayloads; |
| |
| public FieldData( |
| final String name, |
| final FieldInfos.Builder fieldInfos, |
| final TermData[] terms, |
| final boolean omitTF, |
| final boolean storePayloads) { |
| this.omitTF = omitTF; |
| this.storePayloads = storePayloads; |
| // TODO: change this test to use all three |
| FieldInfo fieldInfo0 = fieldInfos.fieldInfo(name); |
| if (fieldInfo0 != null) { |
| fieldInfo = fieldInfo0; |
| } else { |
| IndexOptions indexOptions = |
| omitTF ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; |
| fieldInfo = |
| fieldInfos.add( |
| new FieldInfo( |
| name, |
| -1, |
| false, |
| false, |
| storePayloads, |
| indexOptions, |
| DocValuesType.NONE, |
| -1, |
| new HashMap<>(), |
| 0, |
| 0, |
| 0, |
| 0, |
| VectorEncoding.FLOAT32, |
| VectorSimilarityFunction.EUCLIDEAN, |
| false)); |
| } |
| this.terms = terms; |
| for (int i = 0; i < terms.length; i++) terms[i].field = this; |
| |
| Arrays.sort(terms); |
| } |
| |
| @Override |
| public int compareTo(final FieldData other) { |
| return fieldInfo.name.compareTo(other.fieldInfo.name); |
| } |
| } |
| |
| static class PositionData { |
| int pos; |
| BytesRef payload; |
| |
| PositionData(final int pos, final BytesRef payload) { |
| this.pos = pos; |
| this.payload = payload; |
| } |
| } |
| |
| static class TermData implements Comparable<TermData> { |
| String text2; |
| final BytesRef text; |
| int[] docs; |
| PositionData[][] positions; |
| FieldData field; |
| |
| public TermData(final String text, final int[] docs, final PositionData[][] positions) { |
| this.text = new BytesRef(text); |
| this.text2 = text; |
| this.docs = docs; |
| this.positions = positions; |
| } |
| |
| @Override |
| public int compareTo(final TermData o) { |
| return text.compareTo(o.text); |
| } |
| } |
| |
| private static final String SEGMENT = "0"; |
| |
| TermData[] makeRandomTerms(final boolean omitTF, final boolean storePayloads) { |
| final int numTerms = 1 + random().nextInt(NUM_TERMS_RAND); |
| // final int numTerms = 2; |
| final TermData[] terms = new TermData[numTerms]; |
| |
| final HashSet<String> termsSeen = new HashSet<>(); |
| |
| for (int i = 0; i < numTerms; i++) { |
| |
| // Make term text |
| String text2; |
| while (true) { |
| text2 = TestUtil.randomUnicodeString(random()); |
| if (!termsSeen.contains(text2) && !text2.endsWith(".")) { |
| termsSeen.add(text2); |
| break; |
| } |
| } |
| |
| final int docFreq = 1 + random().nextInt(DOC_FREQ_RAND); |
| final int[] docs = new int[docFreq]; |
| PositionData[][] positions; |
| |
| if (!omitTF) positions = new PositionData[docFreq][]; |
| else positions = null; |
| |
| int docID = 0; |
| for (int j = 0; j < docFreq; j++) { |
| docID += TestUtil.nextInt(random(), 1, 10); |
| docs[j] = docID; |
| |
| if (!omitTF) { |
| final int termFreq = 1 + random().nextInt(TERM_DOC_FREQ_RAND); |
| positions[j] = new PositionData[termFreq]; |
| int position = 0; |
| for (int k = 0; k < termFreq; k++) { |
| position += TestUtil.nextInt(random(), 1, 10); |
| |
| final BytesRef payload; |
| if (storePayloads && random().nextInt(4) == 0) { |
| final byte[] bytes = new byte[1 + random().nextInt(5)]; |
| for (int l = 0; l < bytes.length; l++) { |
| bytes[l] = (byte) random().nextInt(255); |
| } |
| payload = new BytesRef(bytes); |
| } else { |
| payload = null; |
| } |
| |
| positions[j][k] = new PositionData(position, payload); |
| } |
| } |
| } |
| |
| terms[i] = new TermData(text2, docs, positions); |
| } |
| |
| return terms; |
| } |
| |
| public void testFixedPostings() throws Throwable { |
| final int NUM_TERMS = 100; |
| final TermData[] terms = new TermData[NUM_TERMS]; |
| for (int i = 0; i < NUM_TERMS; i++) { |
| final int[] docs = new int[] {i}; |
| final String text = Integer.toString(i, Character.MAX_RADIX); |
| terms[i] = new TermData(text, docs, null); |
| } |
| |
| final FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null)); |
| |
| final FieldData field = new FieldData("field", builder, terms, true, false); |
| final FieldData[] fields = new FieldData[] {field}; |
| final FieldInfos fieldInfos = builder.finish(); |
| final Directory dir = newDirectory(); |
| Codec codec = Codec.getDefault(); |
| final SegmentInfo si = |
| new SegmentInfo( |
| dir, |
| Version.LATEST, |
| Version.LATEST, |
| SEGMENT, |
| 10000, |
| false, |
| codec, |
| Collections.emptyMap(), |
| StringHelper.randomId(), |
| new HashMap<>(), |
| null); |
| |
| this.write(si, fieldInfos, dir, fields); |
| final FieldsProducer reader = |
| codec |
| .postingsFormat() |
| .fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()))); |
| |
| final Iterator<String> fieldsEnum = reader.iterator(); |
| String fieldName = fieldsEnum.next(); |
| assertNotNull(fieldName); |
| final Terms terms2 = reader.terms(fieldName); |
| assertNotNull(terms2); |
| |
| final TermsEnum termsEnum = terms2.iterator(); |
| |
| PostingsEnum postingsEnum = null; |
| for (int i = 0; i < NUM_TERMS; i++) { |
| final BytesRef term = termsEnum.next(); |
| assertNotNull(term); |
| assertEquals(terms[i].text2, term.utf8ToString()); |
| |
| // do this twice to stress test the codec's reuse, ie, |
| // make sure it properly fully resets (rewinds) its |
| // internal state: |
| for (int iter = 0; iter < 2; iter++) { |
| postingsEnum = TestUtil.docs(random(), termsEnum, postingsEnum, PostingsEnum.NONE); |
| assertEquals(terms[i].docs[0], postingsEnum.nextDoc()); |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postingsEnum.nextDoc()); |
| } |
| } |
| assertNull(termsEnum.next()); |
| |
| for (int i = 0; i < NUM_TERMS; i++) { |
| assertEquals(termsEnum.seekCeil(new BytesRef(terms[i].text2)), TermsEnum.SeekStatus.FOUND); |
| } |
| |
| assertFalse(fieldsEnum.hasNext()); |
| reader.close(); |
| dir.close(); |
| } |
| |
| public void testRandomPostings() throws Throwable { |
| final FieldInfos.Builder builder = new FieldInfos.Builder(new FieldInfos.FieldNumbers(null)); |
| |
| final FieldData[] fields = new FieldData[NUM_FIELDS]; |
| for (int i = 0; i < NUM_FIELDS; i++) { |
| final boolean omitTF = 0 == (i % 3); |
| final boolean storePayloads = 1 == (i % 3); |
| fields[i] = |
| new FieldData( |
| fieldNames[i], |
| builder, |
| this.makeRandomTerms(omitTF, storePayloads), |
| omitTF, |
| storePayloads); |
| } |
| |
| final Directory dir = newDirectory(); |
| final FieldInfos fieldInfos = builder.finish(); |
| |
| if (VERBOSE) { |
| System.out.println("TEST: now write postings"); |
| } |
| |
| Codec codec = Codec.getDefault(); |
| final SegmentInfo si = |
| new SegmentInfo( |
| dir, |
| Version.LATEST, |
| Version.LATEST, |
| SEGMENT, |
| 10000, |
| false, |
| codec, |
| Collections.emptyMap(), |
| StringHelper.randomId(), |
| new HashMap<>(), |
| null); |
| this.write(si, fieldInfos, dir, fields); |
| |
| if (VERBOSE) { |
| System.out.println("TEST: now read postings"); |
| } |
| final FieldsProducer terms = |
| codec |
| .postingsFormat() |
| .fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()))); |
| |
| final Verify[] threads = new Verify[NUM_TEST_THREADS - 1]; |
| for (int i = 0; i < NUM_TEST_THREADS - 1; i++) { |
| threads[i] = new Verify(si, fields, terms); |
| threads[i].setDaemon(true); |
| threads[i].start(); |
| } |
| |
| new Verify(si, fields, terms).run(); |
| |
| for (int i = 0; i < NUM_TEST_THREADS - 1; i++) { |
| threads[i].join(); |
| assert !threads[i].failed; |
| } |
| |
| terms.close(); |
| dir.close(); |
| } |
| |
| private static class Verify extends Thread { |
| final Fields termsDict; |
| final FieldData[] fields; |
| volatile boolean failed; |
| |
| Verify(final SegmentInfo si, final FieldData[] fields, final Fields termsDict) { |
| this.fields = fields; |
| this.termsDict = termsDict; |
| } |
| |
| @Override |
| public void run() { |
| try { |
| this._run(); |
| } catch (final Throwable t) { |
| failed = true; |
| throw new RuntimeException(t); |
| } |
| } |
| |
| private void verifyDocs( |
| final int[] docs, |
| final PositionData[][] positions, |
| final PostingsEnum postingsEnum, |
| final boolean doPos) |
| throws Throwable { |
| for (int i = 0; i < docs.length; i++) { |
| final int doc = postingsEnum.nextDoc(); |
| assertTrue(doc != DocIdSetIterator.NO_MORE_DOCS); |
| assertEquals(docs[i], doc); |
| if (doPos) { |
| this.verifyPositions(positions[i], postingsEnum); |
| } |
| } |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postingsEnum.nextDoc()); |
| } |
| |
| private void verifyPositions(final PositionData[] positions, final PostingsEnum posEnum) |
| throws Throwable { |
| for (int i = 0; i < positions.length; i++) { |
| final int pos = posEnum.nextPosition(); |
| assertEquals(positions[i].pos, pos); |
| if (positions[i].payload != null) { |
| assertNotNull(posEnum.getPayload()); |
| if (random().nextInt(3) < 2) { |
| // Verify the payload bytes |
| final BytesRef otherPayload = posEnum.getPayload(); |
| assertTrue( |
| "expected=" + positions[i].payload.toString() + " got=" + otherPayload.toString(), |
| positions[i].payload.equals(otherPayload)); |
| } |
| } else { |
| assertNull(posEnum.getPayload()); |
| } |
| } |
| } |
| |
| public void _run() throws Throwable { |
| |
| for (int iter = 0; iter < NUM_TEST_ITER; iter++) { |
| final FieldData field = fields[random().nextInt(fields.length)]; |
| final TermsEnum termsEnum = termsDict.terms(field.fieldInfo.name).iterator(); |
| |
| int upto = 0; |
| // Test straight enum of the terms: |
| while (true) { |
| final BytesRef term = termsEnum.next(); |
| if (term == null) { |
| break; |
| } |
| final BytesRef expected = new BytesRef(field.terms[upto++].text2); |
| assertTrue("expected=" + expected + " vs actual " + term, expected.bytesEquals(term)); |
| } |
| assertEquals(upto, field.terms.length); |
| |
| // Test random seek: |
| TermData term = field.terms[random().nextInt(field.terms.length)]; |
| TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(term.text2)); |
| assertEquals(status, TermsEnum.SeekStatus.FOUND); |
| assertEquals(term.docs.length, termsEnum.docFreq()); |
| if (field.omitTF) { |
| this.verifyDocs( |
| term.docs, |
| term.positions, |
| TestUtil.docs(random(), termsEnum, null, PostingsEnum.NONE), |
| false); |
| } else { |
| this.verifyDocs( |
| term.docs, term.positions, termsEnum.postings(null, PostingsEnum.ALL), true); |
| } |
| |
| // Test random seek by ord: |
| final int idx = random().nextInt(field.terms.length); |
| term = field.terms[idx]; |
| boolean success = false; |
| try { |
| termsEnum.seekExact(idx); |
| success = true; |
| } catch ( |
| @SuppressWarnings("unused") |
| UnsupportedOperationException uoe) { |
| // ok -- skip it |
| } |
| if (success) { |
| assertEquals(status, TermsEnum.SeekStatus.FOUND); |
| assertTrue(termsEnum.term().bytesEquals(new BytesRef(term.text2))); |
| assertEquals(term.docs.length, termsEnum.docFreq()); |
| if (field.omitTF) { |
| this.verifyDocs( |
| term.docs, |
| term.positions, |
| TestUtil.docs(random(), termsEnum, null, PostingsEnum.NONE), |
| false); |
| } else { |
| this.verifyDocs( |
| term.docs, term.positions, termsEnum.postings(null, PostingsEnum.ALL), true); |
| } |
| } |
| |
| // Test seek to non-existent terms: |
| if (VERBOSE) { |
| System.out.println("TEST: seek non-exist terms"); |
| } |
| for (int i = 0; i < 100; i++) { |
| final String text2 = TestUtil.randomUnicodeString(random()) + "."; |
| status = termsEnum.seekCeil(new BytesRef(text2)); |
| assertTrue( |
| status == TermsEnum.SeekStatus.NOT_FOUND || status == TermsEnum.SeekStatus.END); |
| } |
| |
| // Seek to each term, backwards: |
| if (VERBOSE) { |
| System.out.println("TEST: seek terms backwards"); |
| } |
| for (int i = field.terms.length - 1; i >= 0; i--) { |
| assertEquals( |
| Thread.currentThread().getName() |
| + ": field=" |
| + field.fieldInfo.name |
| + " term=" |
| + field.terms[i].text2, |
| TermsEnum.SeekStatus.FOUND, |
| termsEnum.seekCeil(new BytesRef(field.terms[i].text2))); |
| assertEquals(field.terms[i].docs.length, termsEnum.docFreq()); |
| } |
| |
| // Seek to each term by ord, backwards |
| for (int i = field.terms.length - 1; i >= 0; i--) { |
| try { |
| termsEnum.seekExact(i); |
| assertEquals(field.terms[i].docs.length, termsEnum.docFreq()); |
| assertTrue(termsEnum.term().bytesEquals(new BytesRef(field.terms[i].text2))); |
| } catch ( |
| @SuppressWarnings("unused") |
| UnsupportedOperationException uoe) { |
| } |
| } |
| |
| // Seek to non-existent empty-string term |
| status = termsEnum.seekCeil(new BytesRef("")); |
| assertNotNull(status); |
| // assertEquals(TermsEnum.SeekStatus.NOT_FOUND, status); |
| |
| // Make sure we're now pointing to first term |
| assertTrue(termsEnum.term().bytesEquals(new BytesRef(field.terms[0].text2))); |
| |
| // Test docs enum |
| termsEnum.seekCeil(new BytesRef("")); |
| upto = 0; |
| do { |
| term = field.terms[upto]; |
| if (random().nextInt(3) == 1) { |
| final PostingsEnum postings; |
| if (!field.omitTF) { |
| // TODO: we should randomize which postings features are available, but |
| // need to coordinate this with the checks below that rely on such features |
| postings = termsEnum.postings(null, PostingsEnum.ALL); |
| } else { |
| postings = TestUtil.docs(random(), termsEnum, null, PostingsEnum.FREQS); |
| } |
| assertNotNull(postings); |
| int upto2 = -1; |
| boolean ended = false; |
| while (upto2 < term.docs.length - 1) { |
| // Maybe skip: |
| final int left = term.docs.length - upto2; |
| int doc; |
| if (random().nextInt(3) == 1 && left >= 1) { |
| final int inc = 1 + random().nextInt(left - 1); |
| upto2 += inc; |
| if (random().nextInt(2) == 1) { |
| doc = postings.advance(term.docs[upto2]); |
| assertEquals(term.docs[upto2], doc); |
| } else { |
| doc = postings.advance(1 + term.docs[upto2]); |
| if (doc == DocIdSetIterator.NO_MORE_DOCS) { |
| // skipped past last doc |
| assert upto2 == term.docs.length - 1; |
| ended = true; |
| break; |
| } else { |
| // skipped to next doc |
| assert upto2 < term.docs.length - 1; |
| if (doc >= term.docs[1 + upto2]) { |
| upto2++; |
| } |
| } |
| } |
| } else { |
| doc = postings.nextDoc(); |
| assertTrue(doc != -1); |
| upto2++; |
| } |
| assertEquals(term.docs[upto2], doc); |
| if (!field.omitTF) { |
| assertEquals(term.positions[upto2].length, postings.freq()); |
| if (random().nextInt(2) == 1) { |
| this.verifyPositions(term.positions[upto2], postings); |
| } |
| } |
| } |
| |
| if (!ended) { |
| assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc()); |
| } |
| } |
| upto++; |
| |
| } while (termsEnum.next() != null); |
| |
| assertEquals(upto, field.terms.length); |
| } |
| } |
| } |
| |
| private static class DataFields extends Fields { |
| private final FieldData[] fields; |
| |
| public DataFields(FieldData[] fields) { |
| // already sorted: |
| this.fields = fields; |
| } |
| |
| @Override |
| public Iterator<String> iterator() { |
| return new Iterator<String>() { |
| int upto = -1; |
| |
| @Override |
| public boolean hasNext() { |
| return upto + 1 < fields.length; |
| } |
| |
| @Override |
| public String next() { |
| upto++; |
| return fields[upto].fieldInfo.name; |
| } |
| |
| @Override |
| public void remove() { |
| throw new UnsupportedOperationException(); |
| } |
| }; |
| } |
| |
| @Override |
| public Terms terms(String field) { |
| // Slow linear search: |
| for (FieldData fieldData : fields) { |
| if (fieldData.fieldInfo.name.equals(field)) { |
| return new DataTerms(fieldData); |
| } |
| } |
| return null; |
| } |
| |
| @Override |
| public int size() { |
| return fields.length; |
| } |
| } |
| |
| private static class DataTerms extends Terms { |
| final FieldData fieldData; |
| |
| public DataTerms(FieldData fieldData) { |
| this.fieldData = fieldData; |
| } |
| |
| @Override |
| public TermsEnum iterator() { |
| return new DataTermsEnum(fieldData); |
| } |
| |
| @Override |
| public long size() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long getSumTotalTermFreq() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long getSumDocFreq() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public int getDocCount() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public boolean hasFreqs() { |
| return fieldData.fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; |
| } |
| |
| @Override |
| public boolean hasOffsets() { |
| return fieldData |
| .fieldInfo |
| .getIndexOptions() |
| .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) |
| >= 0; |
| } |
| |
| @Override |
| public boolean hasPositions() { |
| return fieldData |
| .fieldInfo |
| .getIndexOptions() |
| .compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) |
| >= 0; |
| } |
| |
| @Override |
| public boolean hasPayloads() { |
| return fieldData.fieldInfo.hasPayloads(); |
| } |
| } |
| |
| private static class DataTermsEnum extends BaseTermsEnum { |
| final FieldData fieldData; |
| private int upto = -1; |
| |
| public DataTermsEnum(FieldData fieldData) { |
| this.fieldData = fieldData; |
| } |
| |
| @Override |
| public BytesRef next() { |
| upto++; |
| if (upto == fieldData.terms.length) { |
| return null; |
| } |
| |
| return term(); |
| } |
| |
| @Override |
| public BytesRef term() { |
| return fieldData.terms[upto].text; |
| } |
| |
| @Override |
| public SeekStatus seekCeil(BytesRef text) { |
| // Stupid linear impl: |
| for (int i = 0; i < fieldData.terms.length; i++) { |
| int cmp = fieldData.terms[i].text.compareTo(text); |
| if (cmp == 0) { |
| upto = i; |
| return SeekStatus.FOUND; |
| } else if (cmp > 0) { |
| upto = i; |
| return SeekStatus.NOT_FOUND; |
| } |
| } |
| |
| return SeekStatus.END; |
| } |
| |
| @Override |
| public void seekExact(long ord) { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long ord() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public int docFreq() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public long totalTermFreq() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public PostingsEnum postings(PostingsEnum reuse, int flags) { |
| return new DataPostingsEnum(fieldData.terms[upto]); |
| } |
| |
| @Override |
| public ImpactsEnum impacts(int flags) throws IOException { |
| throw new UnsupportedOperationException(); |
| } |
| } |
| |
| private static class DataPostingsEnum extends PostingsEnum { |
| final TermData termData; |
| int docUpto = -1; |
| int posUpto; |
| |
| public DataPostingsEnum(TermData termData) { |
| this.termData = termData; |
| } |
| |
| @Override |
| public long cost() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public int nextDoc() { |
| docUpto++; |
| if (docUpto == termData.docs.length) { |
| return NO_MORE_DOCS; |
| } |
| posUpto = -1; |
| return docID(); |
| } |
| |
| @Override |
| public int docID() { |
| return termData.docs[docUpto]; |
| } |
| |
| @Override |
| public int advance(int target) { |
| // Slow linear impl: |
| nextDoc(); |
| while (docID() < target) { |
| nextDoc(); |
| } |
| |
| return docID(); |
| } |
| |
| @Override |
| public int freq() { |
| return termData.positions[docUpto].length; |
| } |
| |
| @Override |
| public int nextPosition() { |
| posUpto++; |
| return termData.positions[docUpto][posUpto].pos; |
| } |
| |
| @Override |
| public BytesRef getPayload() { |
| return termData.positions[docUpto][posUpto].payload; |
| } |
| |
| @Override |
| public int startOffset() { |
| throw new UnsupportedOperationException(); |
| } |
| |
| @Override |
| public int endOffset() { |
| throw new UnsupportedOperationException(); |
| } |
| } |
| |
| private void write( |
| SegmentInfo si, final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields) |
| throws Throwable { |
| |
| final Codec codec = si.getCodec(); |
| final SegmentWriteState state = |
| new SegmentWriteState( |
| InfoStream.getDefault(), dir, si, fieldInfos, null, newIOContext(random())); |
| |
| Arrays.sort(fields); |
| FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(state); |
| NormsProducer fakeNorms = |
| new NormsProducer() { |
| |
| @Override |
| public void close() throws IOException {} |
| |
| @Override |
| public NumericDocValues getNorms(FieldInfo field) throws IOException { |
| return new NumericDocValues() { |
| |
| int doc = -1; |
| |
| @Override |
| public int nextDoc() throws IOException { |
| return advance(doc + 1); |
| } |
| |
| @Override |
| public int docID() { |
| return doc; |
| } |
| |
| @Override |
| public long cost() { |
| return si.maxDoc(); |
| } |
| |
| @Override |
| public int advance(int target) throws IOException { |
| if (target >= si.maxDoc()) { |
| return doc = NO_MORE_DOCS; |
| } else { |
| return doc = target; |
| } |
| } |
| |
| @Override |
| public boolean advanceExact(int target) throws IOException { |
| doc = target; |
| return true; |
| } |
| |
| @Override |
| public long longValue() throws IOException { |
| return 1; |
| } |
| }; |
| } |
| |
| @Override |
| public void checkIntegrity() throws IOException {} |
| }; |
| boolean success = false; |
| try { |
| consumer.write(new DataFields(fields), fakeNorms); |
| success = true; |
| } finally { |
| if (success) { |
| IOUtils.close(consumer); |
| } else { |
| IOUtils.closeWhileHandlingException(consumer); |
| } |
| } |
| } |
| |
| public void testDocsOnlyFreq() throws Exception { |
| // tests that when fields are indexed with DOCS_ONLY, the Codec |
| // returns 1 in docsEnum.freq() |
| Directory dir = newDirectory(); |
| Random random = random(); |
| IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random))); |
| // we don't need many documents to assert this, but don't use one document either |
| int numDocs = atLeast(random, 50); |
| for (int i = 0; i < numDocs; i++) { |
| Document doc = new Document(); |
| doc.add(new StringField("f", "doc", Store.NO)); |
| writer.addDocument(doc); |
| } |
| writer.close(); |
| |
| Term term = new Term("f", new BytesRef("doc")); |
| DirectoryReader reader = DirectoryReader.open(dir); |
| for (LeafReaderContext ctx : reader.leaves()) { |
| PostingsEnum de = ctx.reader().postings(term); |
| while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { |
| assertEquals("wrong freq for doc " + de.docID(), 1, de.freq()); |
| } |
| } |
| reader.close(); |
| |
| dir.close(); |
| } |
| } |