blob: 37505d0cb9d1a439dc730fcff533d61dc7b7b6ff [file] [log] [blame]
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
index e2d7dfd..087e487 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValues.java
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
/**
* A per-document byte[] with presorted values. This is fundamentally an
@@ -110,4 +111,25 @@ public abstract class SortedDocValues extends BinaryDocValues {
return new SortedDocValuesTermsEnum(this);
}
+ /**
+ * Returns a {@link TermsEnum} over the values, filtered by a {@link CompiledAutomaton}
+ * The enum supports {@link TermsEnum#ord()}.
+ */
+ public TermsEnum intersect(CompiledAutomaton automaton) throws IOException {
+ TermsEnum in = termsEnum();
+ switch (automaton.type) {
+ case NONE:
+ return TermsEnum.EMPTY;
+ case ALL:
+ return in;
+ case SINGLE:
+ return new SingleTermsEnum(in, automaton.term);
+ case NORMAL:
+ return new AutomatonTermsEnum(in, automaton);
+ default:
+ // unreachable
+ throw new RuntimeException("unhandled case");
+ }
+ }
+
}
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
index 6d02c25..9e1c6a3 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValues.java
@@ -20,6 +20,7 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
/**
* A multi-valued version of {@link SortedDocValues}.
@@ -102,4 +103,25 @@ public abstract class SortedSetDocValues extends DocValuesIterator {
public TermsEnum termsEnum() throws IOException {
return new SortedSetDocValuesTermsEnum(this);
}
+
+ /**
+ * Returns a {@link TermsEnum} over the values, filtered by a {@link CompiledAutomaton}
+ * The enum supports {@link TermsEnum#ord()}.
+ */
+ public TermsEnum intersect(CompiledAutomaton automaton) throws IOException {
+ TermsEnum in = termsEnum();
+ switch (automaton.type) {
+ case NONE:
+ return TermsEnum.EMPTY;
+ case ALL:
+ return in;
+ case SINGLE:
+ return new SingleTermsEnum(in, automaton.term);
+ case NORMAL:
+ return new AutomatonTermsEnum(in, automaton);
+ default:
+ // unreachable
+ throw new RuntimeException("unhandled case");
+ }
+ }
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
index d55f212..8cb6665 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java
@@ -67,6 +67,8 @@ import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TestUtil;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
+import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.RegExp;
import static org.apache.lucene.index.SortedSetDocValues.NO_MORE_ORDS;
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
@@ -906,6 +908,21 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
termsEnum.seekExact(2);
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
+
+ // NORMAL automaton
+ termsEnum = dv.intersect(new CompiledAutomaton(new RegExp(".*l.*").toAutomaton()));
+ assertEquals("hello", termsEnum.next().utf8ToString());
+ assertEquals(1, termsEnum.ord());
+ assertEquals("world", termsEnum.next().utf8ToString());
+ assertEquals(2, termsEnum.ord());
+ assertNull(termsEnum.next());
+
+ // SINGLE automaton
+ termsEnum = dv.intersect(new CompiledAutomaton(new RegExp("hello").toAutomaton()));
+ assertEquals("hello", termsEnum.next().utf8ToString());
+ assertEquals(1, termsEnum.ord());
+ assertNull(termsEnum.next());
+
ireader.close();
directory.close();
}
@@ -2057,6 +2074,21 @@ public abstract class BaseDocValuesFormatTestCase extends BaseIndexFileFormatTes
termsEnum.seekExact(2);
assertEquals("world", termsEnum.term().utf8ToString());
assertEquals(2, termsEnum.ord());
+
+ // NORMAL automaton
+ termsEnum = dv.intersect(new CompiledAutomaton(new RegExp(".*l.*").toAutomaton()));
+ assertEquals("hello", termsEnum.next().utf8ToString());
+ assertEquals(1, termsEnum.ord());
+ assertEquals("world", termsEnum.next().utf8ToString());
+ assertEquals(2, termsEnum.ord());
+ assertNull(termsEnum.next());
+
+ // SINGLE automaton
+ termsEnum = dv.intersect(new CompiledAutomaton(new RegExp("hello").toAutomaton()));
+ assertEquals("hello", termsEnum.next().utf8ToString());
+ assertEquals(1, termsEnum.ord());
+ assertNull(termsEnum.next());
+
ireader.close();
directory.close();
}