| Index: lucene/CHANGES.txt |
| =================================================================== |
| --- lucene/CHANGES.txt (revision 1655572) |
| +++ lucene/CHANGES.txt (working copy) |
| @@ -56,6 +56,12 @@ |
| * LUCENE-6193: Collapse identical catch branches in try-catch statements. |
| (shalin) |
| |
| +Bug fixes |
| + |
| +* LUCENE-6207: Fixed consumption of several terms enums on the same |
| + sorted (set) doc values instance at the same time. |
| + (Tom Shally, Robert Muir, Adrien Grand) |
| + |
| ======================= Lucene 5.0.0 ======================= |
| |
| New Features |
| Index: lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java (revision 1655572) |
| +++ lucene/core/src/java/org/apache/lucene/index/SortedDocValuesTermsEnum.java (working copy) |
| @@ -29,7 +29,6 @@ |
| class SortedDocValuesTermsEnum extends TermsEnum { |
| private final SortedDocValues values; |
| private int currentOrd = -1; |
| - private BytesRef term; |
| private final BytesRefBuilder scratch; |
| |
| /** Creates a new TermsEnum over the provided values */ |
| @@ -44,7 +43,6 @@ |
| if (ord >= 0) { |
| currentOrd = ord; |
| scratch.copyBytes(text); |
| - term = scratch.get(); |
| return SeekStatus.FOUND; |
| } else { |
| currentOrd = -ord-1; |
| @@ -52,7 +50,7 @@ |
| return SeekStatus.END; |
| } else { |
| // TODO: hmm can we avoid this "extra" lookup?: |
| - term = values.lookupOrd(currentOrd); |
| + scratch.copyBytes(values.lookupOrd(currentOrd)); |
| return SeekStatus.NOT_FOUND; |
| } |
| } |
| @@ -64,7 +62,6 @@ |
| if (ord >= 0) { |
| currentOrd = ord; |
| scratch.copyBytes(text); |
| - term = scratch.get(); |
| return true; |
| } else { |
| return false; |
| @@ -75,7 +72,7 @@ |
| public void seekExact(long ord) throws IOException { |
| assert ord >= 0 && ord < values.getValueCount(); |
| currentOrd = (int) ord; |
| - term = values.lookupOrd(currentOrd); |
| + scratch.copyBytes(values.lookupOrd(currentOrd)); |
| } |
| |
| @Override |
| @@ -84,13 +81,13 @@ |
| if (currentOrd >= values.getValueCount()) { |
| return null; |
| } |
| - term = values.lookupOrd(currentOrd); |
| - return term; |
| + scratch.copyBytes(values.lookupOrd(currentOrd)); |
| + return scratch.get(); |
| } |
| |
| @Override |
| public BytesRef term() throws IOException { |
| - return term; |
| + return scratch.get(); |
| } |
| |
| @Override |
| Index: lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java |
| =================================================================== |
| --- lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java (revision 1655572) |
| +++ lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java (working copy) |
| @@ -29,7 +29,6 @@ |
| class SortedSetDocValuesTermsEnum extends TermsEnum { |
| private final SortedSetDocValues values; |
| private long currentOrd = -1; |
| - private BytesRef term; |
| private final BytesRefBuilder scratch; |
| |
| /** Creates a new TermsEnum over the provided values */ |
| @@ -44,7 +43,6 @@ |
| if (ord >= 0) { |
| currentOrd = ord; |
| scratch.copyBytes(text); |
| - term = scratch.get(); |
| return SeekStatus.FOUND; |
| } else { |
| currentOrd = -ord-1; |
| @@ -52,7 +50,7 @@ |
| return SeekStatus.END; |
| } else { |
| // TODO: hmm can we avoid this "extra" lookup?: |
| - term = values.lookupOrd(currentOrd); |
| + scratch.copyBytes(values.lookupOrd(currentOrd)); |
| return SeekStatus.NOT_FOUND; |
| } |
| } |
| @@ -64,7 +62,6 @@ |
| if (ord >= 0) { |
| currentOrd = ord; |
| scratch.copyBytes(text); |
| - term = scratch.get(); |
| return true; |
| } else { |
| return false; |
| @@ -75,7 +72,7 @@ |
| public void seekExact(long ord) throws IOException { |
| assert ord >= 0 && ord < values.getValueCount(); |
| currentOrd = (int) ord; |
| - term = values.lookupOrd(currentOrd); |
| + scratch.copyBytes(values.lookupOrd(currentOrd)); |
| } |
| |
| @Override |
| @@ -84,13 +81,13 @@ |
| if (currentOrd >= values.getValueCount()) { |
| return null; |
| } |
| - term = values.lookupOrd(currentOrd); |
| - return term; |
| + scratch.copyBytes(values.lookupOrd(currentOrd)); |
| + return scratch.get(); |
| } |
| |
| @Override |
| public BytesRef term() throws IOException { |
| - return term; |
| + return scratch.get(); |
| } |
| |
| @Override |
| Index: lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50DocValuesFormat.java |
| =================================================================== |
| --- lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50DocValuesFormat.java (revision 1655572) |
| +++ lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50DocValuesFormat.java (working copy) |
| @@ -37,6 +37,7 @@ |
| import org.apache.lucene.index.IndexWriterConfig; |
| import org.apache.lucene.index.RandomIndexWriter; |
| import org.apache.lucene.index.SerialMergeScheduler; |
| +import org.apache.lucene.index.SortedSetDocValues; |
| import org.apache.lucene.index.Term; |
| import org.apache.lucene.index.Terms; |
| import org.apache.lucene.index.TermsEnum; |
| @@ -184,10 +185,13 @@ |
| LeafReader r = context.reader(); |
| Terms terms = r.terms("indexed"); |
| if (terms != null) { |
| - assertEquals(terms.size(), r.getSortedSetDocValues("dv").getValueCount()); |
| + SortedSetDocValues ssdv = r.getSortedSetDocValues("dv"); |
| + assertEquals(terms.size(), ssdv.getValueCount()); |
| TermsEnum expected = terms.iterator(null); |
| TermsEnum actual = r.getSortedSetDocValues("dv").termsEnum(); |
| assertEquals(terms.size(), expected, actual); |
| + |
| + doTestSortedSetEnumAdvanceIndependently(ssdv); |
| } |
| } |
| ir.close(); |
| Index: lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java |
| =================================================================== |
| --- lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java (revision 1655572) |
| +++ lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java (working copy) |
| @@ -26,6 +26,7 @@ |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.HashMap; |
| +import java.util.List; |
| import java.util.Map; |
| import java.util.Map.Entry; |
| import java.util.Set; |
| @@ -57,6 +58,7 @@ |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.Bits; |
| import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.BytesRefBuilder; |
| import org.apache.lucene.util.BytesRefHash; |
| import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.TestUtil; |
| @@ -2945,6 +2947,142 @@ |
| directory.close(); |
| } |
| |
| + public void testSortedEnumAdvanceIndependently() throws IOException { |
| + Directory directory = newDirectory(); |
| + Analyzer analyzer = new MockAnalyzer(random()); |
| + IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); |
| + iwconfig.setMergePolicy(newLogMergePolicy()); |
| + RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); |
| + |
| + Document doc = new Document(); |
| + SortedDocValuesField field = new SortedDocValuesField("field", new BytesRef("2")); |
| + doc.add(field); |
| + iwriter.addDocument(doc); |
| + field.setBytesValue(new BytesRef("1")); |
| + iwriter.addDocument(doc); |
| + field.setBytesValue(new BytesRef("3")); |
| + iwriter.addDocument(doc); |
| + |
| + iwriter.commit(); |
| + iwriter.forceMerge(1); |
| + |
| + DirectoryReader ireader = iwriter.getReader(); |
| + iwriter.close(); |
| + |
| + SortedDocValues dv = getOnlySegmentReader(ireader).getSortedDocValues("field"); |
| + doTestSortedSetEnumAdvanceIndependently(DocValues.singleton(dv)); |
| + |
| + ireader.close(); |
| + directory.close(); |
| + } |
| + |
| + public void testSortedSetEnumAdvanceIndependently() throws IOException { |
| + Directory directory = newDirectory(); |
| + Analyzer analyzer = new MockAnalyzer(random()); |
| + IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer); |
| + iwconfig.setMergePolicy(newLogMergePolicy()); |
| + RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig); |
| + |
| + Document doc = new Document(); |
| + SortedSetDocValuesField field1 = new SortedSetDocValuesField("field", new BytesRef("2")); |
| + SortedSetDocValuesField field2 = new SortedSetDocValuesField("field", new BytesRef("3")); |
| + doc.add(field1); |
| + doc.add(field2); |
| + iwriter.addDocument(doc); |
| + field1.setBytesValue(new BytesRef("1")); |
| + iwriter.addDocument(doc); |
| + field2.setBytesValue(new BytesRef("2")); |
| + iwriter.addDocument(doc); |
| + |
| + iwriter.commit(); |
| + iwriter.forceMerge(1); |
| + |
| + DirectoryReader ireader = iwriter.getReader(); |
| + iwriter.close(); |
| + |
| + SortedSetDocValues dv = getOnlySegmentReader(ireader).getSortedSetDocValues("field"); |
| + doTestSortedSetEnumAdvanceIndependently(dv); |
| + |
| + ireader.close(); |
| + directory.close(); |
| + } |
| + |
| + protected void doTestSortedSetEnumAdvanceIndependently(SortedSetDocValues dv) throws IOException { |
| + if (dv.getValueCount() < 2) { |
| + return; |
| + } |
| + List<BytesRef> terms = new ArrayList<>(); |
| + TermsEnum te = dv.termsEnum(); |
| + terms.add(BytesRef.deepCopyOf(te.next())); |
| + terms.add(BytesRef.deepCopyOf(te.next())); |
| + |
| + // Make sure that calls to next() does not modify the term of the other enum |
| + TermsEnum enum1 = dv.termsEnum(); |
| + TermsEnum enum2 = dv.termsEnum(); |
| + BytesRefBuilder term1 = new BytesRefBuilder(); |
| + BytesRefBuilder term2 = new BytesRefBuilder(); |
| + |
| + term1.copyBytes(enum1.next()); |
| + term2.copyBytes(enum2.next()); |
| + term1.copyBytes(enum1.next()); |
| + |
| + assertEquals(term1.get(), enum1.term()); |
| + assertEquals(term2.get(), enum2.term()); |
| + |
| + // Same for seekCeil |
| + enum1 = dv.termsEnum(); |
| + enum2 = dv.termsEnum(); |
| + term1 = new BytesRefBuilder(); |
| + term2 = new BytesRefBuilder(); |
| + |
| + term2.copyBytes(enum2.next()); |
| + BytesRefBuilder seekTerm = new BytesRefBuilder(); |
| + seekTerm.append(terms.get(0)); |
| + seekTerm.append((byte) 0); |
| + enum1.seekCeil(seekTerm.get()); |
| + term1.copyBytes(enum1.term()); |
| + |
| + assertEquals(term1.get(), enum1.term()); |
| + assertEquals(term2.get(), enum2.term()); |
| + |
| + // Same for seekCeil on an exact value |
| + enum1 = dv.termsEnum(); |
| + enum2 = dv.termsEnum(); |
| + term1 = new BytesRefBuilder(); |
| + term2 = new BytesRefBuilder(); |
| + |
| + term2.copyBytes(enum2.next()); |
| + enum1.seekCeil(terms.get(1)); |
| + term1.copyBytes(enum1.term()); |
| + |
| + assertEquals(term1.get(), enum1.term()); |
| + assertEquals(term2.get(), enum2.term()); |
| + |
| + // Same for seekExact |
| + enum1 = dv.termsEnum(); |
| + enum2 = dv.termsEnum(); |
| + term1 = new BytesRefBuilder(); |
| + term2 = new BytesRefBuilder(); |
| + |
| + term2.copyBytes(enum2.next()); |
| + final boolean found = enum1.seekExact(terms.get(1)); |
| + assertTrue(found); |
| + term1.copyBytes(enum1.term()); |
| + |
| + // Same for seek by ord |
| + enum1 = dv.termsEnum(); |
| + enum2 = dv.termsEnum(); |
| + term1 = new BytesRefBuilder(); |
| + term2 = new BytesRefBuilder(); |
| + |
| + term2.copyBytes(enum2.next()); |
| + enum1.seekExact(1); |
| + term1.copyBytes(enum1.term()); |
| + |
| + assertEquals(term1.get(), enum1.term()); |
| + assertEquals(term2.get(), enum2.term()); |
| + } |
| + |
| protected boolean codecAcceptsHugeBinaryValues(String field) { |
| return true; |
| } |
| @@ -2964,4 +3102,5 @@ |
| protected boolean codecSupportsSortedNumeric() { |
| return true; |
| } |
| + |
| } |