| diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java |
| index e2d7dfd..087e487 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java |
| @@ -20,6 +20,7 @@ package org.apache.lucene.index; |
| import java.io.IOException; |
| |
| import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.automaton.CompiledAutomaton; |
| |
| /** |
| * A per-document byte[] with presorted values. This is fundamentally an |
| @@ -110,4 +111,25 @@ public abstract class SortedDocValues extends BinaryDocValues { |
| return new SortedDocValuesTermsEnum(this); |
| } |
| |
| + /** |
| + * Returns a {@link TermsEnum} over the values, filtered by a {@link CompiledAutomaton} |
| + * The enum supports {@link TermsEnum#ord()}. |
| + */ |
| + public TermsEnum intersect(CompiledAutomaton automaton) throws IOException { |
| + TermsEnum in = termsEnum(); |
| + switch (automaton.type) { |
| + case NONE: |
| + return TermsEnum.EMPTY; |
| + case ALL: |
| + return in; |
| + case SINGLE: |
| + return new SingleTermsEnum(in, automaton.term); |
| + case NORMAL: |
| + return new AutomatonTermsEnum(in, automaton); |
| + default: |
| + // unreachable |
| + throw new RuntimeException("unhandled case"); |
| + } |
| + } |
| + |
| } |
| diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java |
| index 6d02c25..9e1c6a3 100644 |
| --- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java |
| +++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java |
| @@ -20,6 +20,7 @@ package org.apache.lucene.index; |
| import java.io.IOException; |
| |
| import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.automaton.CompiledAutomaton; |
| |
| /** |
| * A multi-valued version of {@link SortedDocValues}. |
| @@ -102,4 +103,25 @@ public abstract class SortedSetDocValues extends DocValuesIterator { |
| public TermsEnum termsEnum() throws IOException { |
| return new SortedSetDocValuesTermsEnum(this); |
| } |
| + |
| + /** |
| + * Returns a {@link TermsEnum} over the values, filtered by a {@link CompiledAutomaton} |
| + * The enum supports {@link TermsEnum#ord()}. |
| + */ |
| + public TermsEnum intersect(CompiledAutomaton automaton) throws IOException { |
| + TermsEnum in = termsEnum(); |
| + switch (automaton.type) { |
| + case NONE: |
| + return TermsEnum.EMPTY; |
| + case ALL: |
| + return in; |
| + case SINGLE: |
| + return new SingleTermsEnum(in, automaton.term); |
| + case NORMAL: |
| + return new AutomatonTermsEnum(in, automaton); |
| + default: |
| + // unreachable |
| + throw new RuntimeException("unhandled case"); |
| + } |
| + } |
| } |
| diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java |
| index d55f212..8cb6665 100644 |
| --- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java |
| +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java |
| @@ -67,6 +67,8 @@ import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.TestUtil; |
| |
| import com.carrotsearch.randomizedtesting.generators.RandomPicks; |
| +import org.apache.lucene.util.automaton.CompiledAutomaton; |
| +import org.apache.lucene.util.automaton.RegExp; |
| |
| import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS; |
| import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS; |
| @@ -906,6 +908,21 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes |
| termsEnum.seekExact(2); |
| assertEquals("world", termsEnum.term().utf8ToString()); |
| assertEquals(2, termsEnum.ord()); |
| + |
| + // NORMAL automaton |
| + termsEnum = dv.intersect(new CompiledAutomaton(new RegExp(".*l.*").toAutomaton())); |
| + assertEquals("hello", termsEnum.next().utf8ToString()); |
| + assertEquals(1, termsEnum.ord()); |
| + assertEquals("world", termsEnum.next().utf8ToString()); |
| + assertEquals(2, termsEnum.ord()); |
| + assertNull(termsEnum.next()); |
| + |
| + // SINGLE automaton |
| + termsEnum = dv.intersect(new CompiledAutomaton(new RegExp("hello").toAutomaton())); |
| + assertEquals("hello", termsEnum.next().utf8ToString()); |
| + assertEquals(1, termsEnum.ord()); |
| + assertNull(termsEnum.next()); |
| + |
| ireader.close(); |
| directory.close(); |
| } |
| @@ -2057,6 +2074,21 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes |
| termsEnum.seekExact(2); |
| assertEquals("world", termsEnum.term().utf8ToString()); |
| assertEquals(2, termsEnum.ord()); |
| + |
| + // NORMAL automaton |
| + termsEnum = dv.intersect(new CompiledAutomaton(new RegExp(".*l.*").toAutomaton())); |
| + assertEquals("hello", termsEnum.next().utf8ToString()); |
| + assertEquals(1, termsEnum.ord()); |
| + assertEquals("world", termsEnum.next().utf8ToString()); |
| + assertEquals(2, termsEnum.ord()); |
| + assertNull(termsEnum.next()); |
| + |
| + // SINGLE automaton |
| + termsEnum = dv.intersect(new CompiledAutomaton(new RegExp("hello").toAutomaton())); |
| + assertEquals("hello", termsEnum.next().utf8ToString()); |
| + assertEquals(1, termsEnum.ord()); |
| + assertNull(termsEnum.next()); |
| + |
| ireader.close(); |
| directory.close(); |
| } |