| package org.apache.lucene.uninverting; |
| |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| import java.io.IOException; |
| import java.util.Arrays; |
| import java.util.List; |
| import java.util.ArrayList; |
| import java.util.HashSet; |
| import java.util.Set; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.codecs.Codec; |
| import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.document.Document; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.IntField; |
| import org.apache.lucene.document.LongField; |
| import org.apache.lucene.document.StringField; |
| import org.apache.lucene.index.AtomicReader; |
| import org.apache.lucene.index.AtomicReaderContext; |
| import org.apache.lucene.index.DirectoryReader; |
| import org.apache.lucene.index.DocValues; |
| import org.apache.lucene.index.IndexReader; |
| import org.apache.lucene.index.IndexWriter; |
| import org.apache.lucene.index.IndexWriterConfig; |
| import org.apache.lucene.index.MultiFields; |
| import org.apache.lucene.index.NumericDocValues; |
| import org.apache.lucene.index.RandomIndexWriter; |
| import org.apache.lucene.index.SlowCompositeReaderWrapper; |
| import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| import org.apache.lucene.index.TermsEnum.SeekStatus; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util.NumericUtils; |
| import org.apache.lucene.util.StringHelper; |
| import org.apache.lucene.util.TestUtil; |
| |
| // TODO: |
| // - test w/ del docs |
| // - test prefix |
| // - test w/ cutoff |
| // - crank docs way up so we get some merging sometimes |
| |
| public class TestDocTermOrds extends LuceneTestCase { |
| |
| public void testSimple() throws Exception { |
| Directory dir = newDirectory(); |
| final RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy())); |
| Document doc = new Document(); |
| Field field = newTextField("field", "", Field.Store.NO); |
| doc.add(field); |
| field.setStringValue("a b c"); |
| w.addDocument(doc); |
| |
| field.setStringValue("d e f"); |
| w.addDocument(doc); |
| |
| field.setStringValue("a f"); |
| w.addDocument(doc); |
| |
| final IndexReader r = w.getReader(); |
| w.shutdown(); |
| |
| final AtomicReader ar = SlowCompositeReaderWrapper.wrap(r); |
| final DocTermOrds dto = new DocTermOrds(ar, ar.getLiveDocs(), "field"); |
| SortedSetDocValues iter = dto.iterator(ar); |
| |
| iter.setDocument(0); |
| assertEquals(0, iter.nextOrd()); |
| assertEquals(1, iter.nextOrd()); |
| assertEquals(2, iter.nextOrd()); |
| assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); |
| |
| iter.setDocument(1); |
| assertEquals(3, iter.nextOrd()); |
| assertEquals(4, iter.nextOrd()); |
| assertEquals(5, iter.nextOrd()); |
| assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); |
| |
| iter.setDocument(2); |
| assertEquals(0, iter.nextOrd()); |
| assertEquals(5, iter.nextOrd()); |
| assertEquals(SortedSetDocValues.NO_MORE_ORDS, iter.nextOrd()); |
| |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testRandom() throws Exception { |
| Directory dir = newDirectory(); |
| |
| final int NUM_TERMS = atLeast(20); |
| final Set<BytesRef> terms = new HashSet<>(); |
| while(terms.size() < NUM_TERMS) { |
| final String s = TestUtil.randomRealisticUnicodeString(random()); |
| //final String s = _TestUtil.randomSimpleString(random); |
| if (s.length() > 0) { |
| terms.add(new BytesRef(s)); |
| } |
| } |
| final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]); |
| Arrays.sort(termsArray); |
| |
| final int NUM_DOCS = atLeast(100); |
| |
| IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); |
| |
| // Sometimes swap in codec that impls ord(): |
| if (random().nextInt(10) == 7) { |
| // Make sure terms index has ords: |
| Codec codec = TestUtil.alwaysPostingsFormat(PostingsFormat.forName("Lucene41WithOrds")); |
| conf.setCodec(codec); |
| } |
| |
| final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf); |
| |
| final int[][] idToOrds = new int[NUM_DOCS][]; |
| final Set<Integer> ordsForDocSet = new HashSet<>(); |
| |
| for(int id=0;id<NUM_DOCS;id++) { |
| Document doc = new Document(); |
| |
| doc.add(new IntField("id", id, Field.Store.YES)); |
| |
| final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER); |
| while(ordsForDocSet.size() < termCount) { |
| ordsForDocSet.add(random().nextInt(termsArray.length)); |
| } |
| final int[] ordsForDoc = new int[termCount]; |
| int upto = 0; |
| if (VERBOSE) { |
| System.out.println("TEST: doc id=" + id); |
| } |
| for(int ord : ordsForDocSet) { |
| ordsForDoc[upto++] = ord; |
| Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO); |
| if (VERBOSE) { |
| System.out.println(" f=" + termsArray[ord].utf8ToString()); |
| } |
| doc.add(field); |
| } |
| ordsForDocSet.clear(); |
| Arrays.sort(ordsForDoc); |
| idToOrds[id] = ordsForDoc; |
| w.addDocument(doc); |
| } |
| |
| final DirectoryReader r = w.getReader(); |
| w.shutdown(); |
| |
| if (VERBOSE) { |
| System.out.println("TEST: reader=" + r); |
| } |
| |
| for(AtomicReaderContext ctx : r.leaves()) { |
| if (VERBOSE) { |
| System.out.println("\nTEST: sub=" + ctx.reader()); |
| } |
| verify(ctx.reader(), idToOrds, termsArray, null); |
| } |
| |
| // Also test top-level reader: its enum does not support |
| // ord, so this forces the OrdWrapper to run: |
| if (VERBOSE) { |
| System.out.println("TEST: top reader"); |
| } |
| AtomicReader slowR = SlowCompositeReaderWrapper.wrap(r); |
| verify(slowR, idToOrds, termsArray, null); |
| |
| FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey()); |
| |
| r.close(); |
| dir.close(); |
| } |
| |
| public void testRandomWithPrefix() throws Exception { |
| Directory dir = newDirectory(); |
| |
| final Set<String> prefixes = new HashSet<>(); |
| final int numPrefix = TestUtil.nextInt(random(), 2, 7); |
| if (VERBOSE) { |
| System.out.println("TEST: use " + numPrefix + " prefixes"); |
| } |
| while(prefixes.size() < numPrefix) { |
| prefixes.add(TestUtil.randomRealisticUnicodeString(random())); |
| //prefixes.add(_TestUtil.randomSimpleString(random)); |
| } |
| final String[] prefixesArray = prefixes.toArray(new String[prefixes.size()]); |
| |
| final int NUM_TERMS = atLeast(20); |
| final Set<BytesRef> terms = new HashSet<>(); |
| while(terms.size() < NUM_TERMS) { |
| final String s = prefixesArray[random().nextInt(prefixesArray.length)] + TestUtil.randomRealisticUnicodeString(random()); |
| //final String s = prefixesArray[random.nextInt(prefixesArray.length)] + _TestUtil.randomSimpleString(random); |
| if (s.length() > 0) { |
| terms.add(new BytesRef(s)); |
| } |
| } |
| final BytesRef[] termsArray = terms.toArray(new BytesRef[terms.size()]); |
| Arrays.sort(termsArray); |
| |
| final int NUM_DOCS = atLeast(100); |
| |
| IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); |
| |
| // Sometimes swap in codec that impls ord(): |
| if (random().nextInt(10) == 7) { |
| Codec codec = TestUtil.alwaysPostingsFormat(PostingsFormat.forName("Lucene41WithOrds")); |
| conf.setCodec(codec); |
| } |
| |
| final RandomIndexWriter w = new RandomIndexWriter(random(), dir, conf); |
| |
| final int[][] idToOrds = new int[NUM_DOCS][]; |
| final Set<Integer> ordsForDocSet = new HashSet<>(); |
| |
| for(int id=0;id<NUM_DOCS;id++) { |
| Document doc = new Document(); |
| |
| doc.add(new IntField("id", id, Field.Store.YES)); |
| |
| final int termCount = TestUtil.nextInt(random(), 0, 20 * RANDOM_MULTIPLIER); |
| while(ordsForDocSet.size() < termCount) { |
| ordsForDocSet.add(random().nextInt(termsArray.length)); |
| } |
| final int[] ordsForDoc = new int[termCount]; |
| int upto = 0; |
| if (VERBOSE) { |
| System.out.println("TEST: doc id=" + id); |
| } |
| for(int ord : ordsForDocSet) { |
| ordsForDoc[upto++] = ord; |
| Field field = newStringField("field", termsArray[ord].utf8ToString(), Field.Store.NO); |
| if (VERBOSE) { |
| System.out.println(" f=" + termsArray[ord].utf8ToString()); |
| } |
| doc.add(field); |
| } |
| ordsForDocSet.clear(); |
| Arrays.sort(ordsForDoc); |
| idToOrds[id] = ordsForDoc; |
| w.addDocument(doc); |
| } |
| |
| final DirectoryReader r = w.getReader(); |
| w.shutdown(); |
| |
| if (VERBOSE) { |
| System.out.println("TEST: reader=" + r); |
| } |
| |
| AtomicReader slowR = SlowCompositeReaderWrapper.wrap(r); |
| for(String prefix : prefixesArray) { |
| |
| final BytesRef prefixRef = prefix == null ? null : new BytesRef(prefix); |
| |
| final int[][] idToOrdsPrefix = new int[NUM_DOCS][]; |
| for(int id=0;id<NUM_DOCS;id++) { |
| final int[] docOrds = idToOrds[id]; |
| final List<Integer> newOrds = new ArrayList<>(); |
| for(int ord : idToOrds[id]) { |
| if (StringHelper.startsWith(termsArray[ord], prefixRef)) { |
| newOrds.add(ord); |
| } |
| } |
| final int[] newOrdsArray = new int[newOrds.size()]; |
| int upto = 0; |
| for(int ord : newOrds) { |
| newOrdsArray[upto++] = ord; |
| } |
| idToOrdsPrefix[id] = newOrdsArray; |
| } |
| |
| for(AtomicReaderContext ctx : r.leaves()) { |
| if (VERBOSE) { |
| System.out.println("\nTEST: sub=" + ctx.reader()); |
| } |
| verify(ctx.reader(), idToOrdsPrefix, termsArray, prefixRef); |
| } |
| |
| // Also test top-level reader: its enum does not support |
| // ord, so this forces the OrdWrapper to run: |
| if (VERBOSE) { |
| System.out.println("TEST: top reader"); |
| } |
| verify(slowR, idToOrdsPrefix, termsArray, prefixRef); |
| } |
| |
| FieldCache.DEFAULT.purgeByCacheKey(slowR.getCoreCacheKey()); |
| |
| r.close(); |
| dir.close(); |
| } |
| |
| private void verify(AtomicReader r, int[][] idToOrds, BytesRef[] termsArray, BytesRef prefixRef) throws Exception { |
| |
| final DocTermOrds dto = new DocTermOrds(r, r.getLiveDocs(), |
| "field", |
| prefixRef, |
| Integer.MAX_VALUE, |
| TestUtil.nextInt(random(), 2, 10)); |
| |
| |
| final NumericDocValues docIDToID = FieldCache.DEFAULT.getNumerics(r, "id", FieldCache.NUMERIC_UTILS_INT_PARSER, false); |
| /* |
| for(int docID=0;docID<subR.maxDoc();docID++) { |
| System.out.println(" docID=" + docID + " id=" + docIDToID[docID]); |
| } |
| */ |
| |
| if (VERBOSE) { |
| System.out.println("TEST: verify prefix=" + (prefixRef==null ? "null" : prefixRef.utf8ToString())); |
| System.out.println("TEST: all TERMS:"); |
| TermsEnum allTE = MultiFields.getTerms(r, "field").iterator(null); |
| int ord = 0; |
| while(allTE.next() != null) { |
| System.out.println(" ord=" + (ord++) + " term=" + allTE.term().utf8ToString()); |
| } |
| } |
| |
| //final TermsEnum te = subR.fields().terms("field").iterator(); |
| final TermsEnum te = dto.getOrdTermsEnum(r); |
| if (dto.numTerms() == 0) { |
| if (prefixRef == null) { |
| assertNull(MultiFields.getTerms(r, "field")); |
| } else { |
| Terms terms = MultiFields.getTerms(r, "field"); |
| if (terms != null) { |
| TermsEnum termsEnum = terms.iterator(null); |
| TermsEnum.SeekStatus result = termsEnum.seekCeil(prefixRef); |
| if (result != TermsEnum.SeekStatus.END) { |
| assertFalse("term=" + termsEnum.term().utf8ToString() + " matches prefix=" + prefixRef.utf8ToString(), StringHelper.startsWith(termsEnum.term(), prefixRef)); |
| } else { |
| // ok |
| } |
| } else { |
| // ok |
| } |
| } |
| return; |
| } |
| |
| if (VERBOSE) { |
| System.out.println("TEST: TERMS:"); |
| te.seekExact(0); |
| while(true) { |
| System.out.println(" ord=" + te.ord() + " term=" + te.term().utf8ToString()); |
| if (te.next() == null) { |
| break; |
| } |
| } |
| } |
| |
| SortedSetDocValues iter = dto.iterator(r); |
| for(int docID=0;docID<r.maxDoc();docID++) { |
| if (VERBOSE) { |
| System.out.println("TEST: docID=" + docID + " of " + r.maxDoc() + " (id=" + docIDToID.get(docID) + ")"); |
| } |
| iter.setDocument(docID); |
| final int[] answers = idToOrds[(int) docIDToID.get(docID)]; |
| int upto = 0; |
| long ord; |
| while ((ord = iter.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { |
| te.seekExact(ord); |
| final BytesRef expected = termsArray[answers[upto++]]; |
| if (VERBOSE) { |
| System.out.println(" exp=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString()); |
| } |
| assertEquals("expected=" + expected.utf8ToString() + " actual=" + te.term().utf8ToString() + " ord=" + ord, expected, te.term()); |
| } |
| assertEquals(answers.length, upto); |
| } |
| } |
| |
| public void testBackToTheFuture() throws Exception { |
| Directory dir = newDirectory(); |
| IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null)); |
| |
| Document doc = new Document(); |
| doc.add(newStringField("foo", "bar", Field.Store.NO)); |
| iw.addDocument(doc); |
| |
| doc = new Document(); |
| doc.add(newStringField("foo", "baz", Field.Store.NO)); |
| // we need a second value for a doc, or we don't actually test DocTermOrds! |
| doc.add(newStringField("foo", "car", Field.Store.NO)); |
| iw.addDocument(doc); |
| |
| DirectoryReader r1 = DirectoryReader.open(iw, true); |
| |
| iw.deleteDocuments(new Term("foo", "baz")); |
| DirectoryReader r2 = DirectoryReader.open(iw, true); |
| |
| FieldCache.DEFAULT.getDocTermOrds(getOnlySegmentReader(r2), "foo", null); |
| |
| SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(getOnlySegmentReader(r1), "foo", null); |
| assertEquals(3, v.getValueCount()); |
| v.setDocument(1); |
| assertEquals(1, v.nextOrd()); |
| |
| iw.shutdown(); |
| r1.close(); |
| r2.close(); |
| dir.close(); |
| } |
| |
| public void testNumericEncoded32() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null)); |
| |
| Document doc = new Document(); |
| doc.add(new IntField("foo", 5, Field.Store.NO)); |
| iw.addDocument(doc); |
| |
| doc = new Document(); |
| doc.add(new IntField("foo", 5, Field.Store.NO)); |
| doc.add(new IntField("foo", -3, Field.Store.NO)); |
| iw.addDocument(doc); |
| |
| iw.forceMerge(1); |
| iw.shutdown(); |
| |
| DirectoryReader ir = DirectoryReader.open(dir); |
| AtomicReader ar = getOnlySegmentReader(ir); |
| |
| SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT32_TERM_PREFIX); |
| assertEquals(2, v.getValueCount()); |
| |
| v.setDocument(0); |
| assertEquals(1, v.nextOrd()); |
| assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); |
| |
| v.setDocument(1); |
| assertEquals(0, v.nextOrd()); |
| assertEquals(1, v.nextOrd()); |
| assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); |
| |
| BytesRef value = new BytesRef(); |
| v.lookupOrd(0, value); |
| assertEquals(-3, NumericUtils.prefixCodedToInt(value)); |
| |
| v.lookupOrd(1, value); |
| assertEquals(5, NumericUtils.prefixCodedToInt(value)); |
| |
| ir.close(); |
| dir.close(); |
| } |
| |
| public void testNumericEncoded64() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null)); |
| |
| Document doc = new Document(); |
| doc.add(new LongField("foo", 5, Field.Store.NO)); |
| iw.addDocument(doc); |
| |
| doc = new Document(); |
| doc.add(new LongField("foo", 5, Field.Store.NO)); |
| doc.add(new LongField("foo", -3, Field.Store.NO)); |
| iw.addDocument(doc); |
| |
| iw.forceMerge(1); |
| iw.shutdown(); |
| |
| DirectoryReader ir = DirectoryReader.open(dir); |
| AtomicReader ar = getOnlySegmentReader(ir); |
| |
| SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", FieldCache.INT64_TERM_PREFIX); |
| assertEquals(2, v.getValueCount()); |
| |
| v.setDocument(0); |
| assertEquals(1, v.nextOrd()); |
| assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); |
| |
| v.setDocument(1); |
| assertEquals(0, v.nextOrd()); |
| assertEquals(1, v.nextOrd()); |
| assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); |
| |
| BytesRef value = new BytesRef(); |
| v.lookupOrd(0, value); |
| assertEquals(-3, NumericUtils.prefixCodedToLong(value)); |
| |
| v.lookupOrd(1, value); |
| assertEquals(5, NumericUtils.prefixCodedToLong(value)); |
| |
| ir.close(); |
| dir.close(); |
| } |
| |
| public void testSortedTermsEnum() throws IOException { |
| Directory directory = newDirectory(); |
| Analyzer analyzer = new MockAnalyzer(random()); |
| IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer); |
| iwconfig.setMergePolicy(newLogMergePolicy()); |
| RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); |
| |
| Document doc = new Document(); |
| doc.add(new StringField("field", "hello", Field.Store.NO)); |
| iwriter.addDocument(doc); |
| |
| doc = new Document(); |
| doc.add(new StringField("field", "world", Field.Store.NO)); |
| // we need a second value for a doc, or we don't actually test DocTermOrds! |
| doc.add(new StringField("field", "hello", Field.Store.NO)); |
| iwriter.addDocument(doc); |
| |
| doc = new Document(); |
| doc.add(new StringField("field", "beer", Field.Store.NO)); |
| iwriter.addDocument(doc); |
| iwriter.forceMerge(1); |
| |
| DirectoryReader ireader = iwriter.getReader(); |
| iwriter.shutdown(); |
| |
| AtomicReader ar = getOnlySegmentReader(ireader); |
| SortedSetDocValues dv = FieldCache.DEFAULT.getDocTermOrds(ar, "field", null); |
| assertEquals(3, dv.getValueCount()); |
| |
| TermsEnum termsEnum = dv.termsEnum(); |
| |
| // next() |
| assertEquals("beer", termsEnum.next().utf8ToString()); |
| assertEquals(0, termsEnum.ord()); |
| assertEquals("hello", termsEnum.next().utf8ToString()); |
| assertEquals(1, termsEnum.ord()); |
| assertEquals("world", termsEnum.next().utf8ToString()); |
| assertEquals(2, termsEnum.ord()); |
| |
| // seekCeil() |
| assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!"))); |
| assertEquals("hello", termsEnum.term().utf8ToString()); |
| assertEquals(1, termsEnum.ord()); |
| assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer"))); |
| assertEquals("beer", termsEnum.term().utf8ToString()); |
| assertEquals(0, termsEnum.ord()); |
| assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz"))); |
| |
| // seekExact() |
| assertTrue(termsEnum.seekExact(new BytesRef("beer"))); |
| assertEquals("beer", termsEnum.term().utf8ToString()); |
| assertEquals(0, termsEnum.ord()); |
| assertTrue(termsEnum.seekExact(new BytesRef("hello"))); |
| assertEquals("hello", termsEnum.term().utf8ToString()); |
| assertEquals(1, termsEnum.ord()); |
| assertTrue(termsEnum.seekExact(new BytesRef("world"))); |
| assertEquals("world", termsEnum.term().utf8ToString()); |
| assertEquals(2, termsEnum.ord()); |
| assertFalse(termsEnum.seekExact(new BytesRef("bogus"))); |
| |
| // seek(ord) |
| termsEnum.seekExact(0); |
| assertEquals("beer", termsEnum.term().utf8ToString()); |
| assertEquals(0, termsEnum.ord()); |
| termsEnum.seekExact(1); |
| assertEquals("hello", termsEnum.term().utf8ToString()); |
| assertEquals(1, termsEnum.ord()); |
| termsEnum.seekExact(2); |
| assertEquals("world", termsEnum.term().utf8ToString()); |
| assertEquals(2, termsEnum.ord()); |
| ireader.close(); |
| directory.close(); |
| } |
| |
| public void testActuallySingleValued() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwconfig = newIndexWriterConfig(TEST_VERSION_CURRENT, null); |
| iwconfig.setMergePolicy(newLogMergePolicy()); |
| IndexWriter iw = new IndexWriter(dir, iwconfig); |
| |
| Document doc = new Document(); |
| doc.add(new StringField("foo", "bar", Field.Store.NO)); |
| iw.addDocument(doc); |
| |
| doc = new Document(); |
| doc.add(new StringField("foo", "baz", Field.Store.NO)); |
| iw.addDocument(doc); |
| |
| doc = new Document(); |
| iw.addDocument(doc); |
| |
| doc = new Document(); |
| doc.add(new StringField("foo", "baz", Field.Store.NO)); |
| doc.add(new StringField("foo", "baz", Field.Store.NO)); |
| iw.addDocument(doc); |
| |
| iw.forceMerge(1); |
| iw.shutdown(); |
| |
| DirectoryReader ir = DirectoryReader.open(dir); |
| AtomicReader ar = getOnlySegmentReader(ir); |
| |
| SortedSetDocValues v = FieldCache.DEFAULT.getDocTermOrds(ar, "foo", null); |
| assertNotNull(DocValues.unwrapSingleton(v)); // actually a single-valued field |
| assertEquals(2, v.getValueCount()); |
| |
| v.setDocument(0); |
| assertEquals(0, v.nextOrd()); |
| assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); |
| |
| v.setDocument(1); |
| assertEquals(1, v.nextOrd()); |
| assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); |
| |
| v.setDocument(2); |
| assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); |
| |
| v.setDocument(3); |
| assertEquals(1, v.nextOrd()); |
| assertEquals(SortedSetDocValues.NO_MORE_ORDS, v.nextOrd()); |
| |
| BytesRef value = new BytesRef(); |
| v.lookupOrd(0, value); |
| assertEquals("bar", value.utf8ToString()); |
| |
| v.lookupOrd(1, value); |
| assertEquals("baz", value.utf8ToString()); |
| |
| ir.close(); |
| dir.close(); |
| } |
| } |