| diff --git lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java |
| index 0150ed1..1985258 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/spell/HighFrequencyDictionary.java |
| @@ -109,5 +109,15 @@ public class HighFrequencyDictionary implements Dictionary { |
| public boolean hasPayloads() { |
| return false; |
| } |
| + |
| + @Override |
| + public BytesRefIterator contexts() { |
| + return BytesRefIterator.EMPTY; |
| + } |
| + |
| + @Override |
| + public boolean hasContexts() { |
| + return false; |
| + } |
| } |
| } |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java |
| index 96c7cf8..d8caf3d 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/BufferedInputIterator.java |
| @@ -18,10 +18,13 @@ package org.apache.lucene.search.suggest; |
| */ |
| |
| import java.io.IOException; |
| +import java.util.ArrayList; |
| +import java.util.List; |
| |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| import org.apache.lucene.util.BytesRefArray; |
| +import org.apache.lucene.util.BytesRefIterator; |
| import org.apache.lucene.util.Counter; |
| |
| /** |
| @@ -34,6 +37,8 @@ public class BufferedInputIterator implements InputIterator { |
| protected BytesRefArray entries = new BytesRefArray(Counter.newCounter()); |
| /** buffered payload entries */ |
| protected BytesRefArray payloads = new BytesRefArray(Counter.newCounter()); |
| + /** buffered context set entries */ |
| + protected List<BytesRefIterator> contextSets = new ArrayList<>(); |
| /** current buffer position */ |
| protected int curPos = -1; |
| /** buffered weights, parallel with {@link #entries} */ |
| @@ -41,17 +46,22 @@ public class BufferedInputIterator implements InputIterator { |
| private final BytesRef spare = new BytesRef(); |
| private final BytesRef payloadSpare = new BytesRef(); |
| private final boolean hasPayloads; |
| + private final boolean hasContexts; |
| |
| /** Creates a new iterator, buffering entries from the specified iterator */ |
| public BufferedInputIterator(InputIterator source) throws IOException { |
| BytesRef spare; |
| int freqIndex = 0; |
| hasPayloads = source.hasPayloads(); |
| + hasContexts = source.hasContexts(); |
| while((spare = source.next()) != null) { |
| entries.append(spare); |
| if (hasPayloads) { |
| payloads.append(source.payload()); |
| } |
| + if (hasContexts) { |
| + contextSets.add(source.contexts()); |
| + } |
| if (freqIndex >= freqs.length) { |
| freqs = ArrayUtil.grow(freqs, freqs.length+1); |
| } |
| @@ -86,4 +96,17 @@ public class BufferedInputIterator implements InputIterator { |
| public boolean hasPayloads() { |
| return hasPayloads; |
| } |
| + |
| + @Override |
| + public BytesRefIterator contexts() { |
| + if (hasContexts && curPos < contextSets.size()) { |
| + return contextSets.get(curPos); |
| + } |
| + return BytesRefIterator.EMPTY; |
| + } |
| + |
| + @Override |
| + public boolean hasContexts() { |
| + return hasContexts; |
| + } |
| } |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java |
| index 91fdf24..d219108 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentDictionary.java |
| @@ -33,8 +33,8 @@ import org.apache.lucene.util.BytesRefIterator; |
| |
| /** |
| * <p> |
| - * Dictionary with terms, weights and optionally payload information |
| - * taken from stored/indexed fields in a Lucene index. |
| + * Dictionary with terms, weights, payload (optional) and contexts (optional) |
| + * information taken from stored/indexed fields in a Lucene index. |
| * </p> |
| * <b>NOTE:</b> |
| * <ul> |
| @@ -60,6 +60,8 @@ public class DocumentDictionary implements Dictionary { |
| |
| /** Field to read payload from */ |
| protected final String payloadField; |
| + /** Field to read contexts from */ |
| + protected final String contextsField; |
| private final String field; |
| private final String weightField; |
| |
| @@ -79,15 +81,26 @@ public class DocumentDictionary implements Dictionary { |
| * for the entry. |
| */ |
| public DocumentDictionary(IndexReader reader, String field, String weightField, String payloadField) { |
| + this(reader, field, weightField, payloadField, null); |
| + } |
| + |
| + /** |
| + * Creates a new dictionary with the contents of the fields named <code>field</code> |
| + * for the terms, <code>weightField</code> for the weights that will be used for the |
| + * the corresponding terms, <code>payloadField</code> for the corresponding payloads |
| + * for the entry and <code>contextsFeild</code> for associated contexts. |
| + */ |
| + public DocumentDictionary(IndexReader reader, String field, String weightField, String payloadField, String contextsField) { |
| this.reader = reader; |
| this.field = field; |
| this.weightField = weightField; |
| this.payloadField = payloadField; |
| + this.contextsField = contextsField; |
| } |
| |
| @Override |
| public InputIterator getEntryIterator() throws IOException { |
| - return new DocumentInputIterator(payloadField!=null); |
| + return new DocumentInputIterator(payloadField!=null, contextsField!=null); |
| } |
| |
| /** Implements {@link InputIterator} from stored fields. */ |
| @@ -96,10 +109,12 @@ public class DocumentDictionary implements Dictionary { |
| private final int docCount; |
| private final Set<String> relevantFields; |
| private final boolean hasPayloads; |
| + private final boolean hasContexts; |
| private final Bits liveDocs; |
| private int currentDocId = -1; |
| private long currentWeight = 0; |
| private BytesRef currentPayload = null; |
| + private BytesRefIterator currentContexts = BytesRefIterator.EMPTY; |
| private final NumericDocValues weightValues; |
| |
| /** |
| @@ -107,12 +122,13 @@ public class DocumentDictionary implements Dictionary { |
| * index. setting <code>withPayload</code> to false, implies an iterator |
| * over only term and weight. |
| */ |
| - public DocumentInputIterator(boolean hasPayloads) throws IOException { |
| + public DocumentInputIterator(boolean hasPayloads, boolean hasContexts) throws IOException { |
| this.hasPayloads = hasPayloads; |
| + this.hasContexts = hasContexts; |
| docCount = reader.maxDoc() - 1; |
| weightValues = (weightField != null) ? MultiDocValues.getNumericValues(reader, weightField) : null; |
| liveDocs = (reader.leaves().size() > 0) ? MultiFields.getLiveDocs(reader) : null; |
| - relevantFields = getRelevantFields(new String [] {field, weightField, payloadField}); |
| + relevantFields = getRelevantFields(new String [] {field, weightField, payloadField, contextsField}); |
| } |
| |
| @Override |
| @@ -132,6 +148,7 @@ public class DocumentDictionary implements Dictionary { |
| |
| BytesRef tempPayload = null; |
| BytesRef tempTerm = null; |
| + BytesRefIterator tempContexts = BytesRefIterator.EMPTY; |
| |
| if (hasPayloads) { |
| StorableField payload = doc.getField(payloadField); |
| @@ -141,6 +158,27 @@ public class DocumentDictionary implements Dictionary { |
| tempPayload = (payload.binaryValue() != null) ? payload.binaryValue() : new BytesRef(payload.stringValue()); |
| } |
| |
| + if (hasContexts) { |
| + final StorableField[] contextFields = doc.getFields(contextsField); |
| + if (contextFields.length != 0) { |
| + tempContexts = new BytesRefIterator() { |
| + int idx = 0; |
| + @Override |
| + public BytesRef next() throws IOException { |
| + while (idx < contextFields.length) { |
| + StorableField context = contextFields[idx++]; |
| + if (context.binaryValue() == null && context.stringValue() == null) { |
| + continue; |
| + } else { |
| + return (context.binaryValue() != null) ? context.binaryValue() : new BytesRef(context.stringValue()); |
| + } |
| + } |
| + return null; |
| + } |
| + }; |
| + } |
| + } |
| + |
| StorableField fieldVal = doc.getField(field); |
| if (fieldVal == null || (fieldVal.binaryValue() == null && fieldVal.stringValue() == null)) { |
| continue; |
| @@ -148,6 +186,7 @@ public class DocumentDictionary implements Dictionary { |
| tempTerm = (fieldVal.stringValue() != null) ? new BytesRef(fieldVal.stringValue()) : fieldVal.binaryValue(); |
| |
| currentPayload = tempPayload; |
| + currentContexts = tempContexts; |
| currentWeight = getWeight(doc, currentDocId); |
| |
| return tempTerm; |
| @@ -191,5 +230,18 @@ public class DocumentDictionary implements Dictionary { |
| } |
| return relevantFields; |
| } |
| + |
| + @Override |
| + public BytesRefIterator contexts() { |
| + if (hasContexts) { |
| + return currentContexts; |
| + } |
| + return BytesRefIterator.EMPTY; |
| + } |
| + |
| + @Override |
| + public boolean hasContexts() { |
| + return hasContexts; |
| + } |
| } |
| } |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java |
| index d5f720e..83647f4 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/DocumentValueSourceDictionary.java |
| @@ -70,6 +70,17 @@ public class DocumentValueSourceDictionary extends DocumentDictionary { |
| |
| /** |
| * Creates a new dictionary with the contents of the fields named <code>field</code> |
| + * for the terms, <code>payload</code> for the corresponding payloads, <code>contexts</code> |
| + * for the associated contexts and uses the <code>weightsValueSource</code> supplied |
| + * to determine the score. |
| + */ |
| + public DocumentValueSourceDictionary(IndexReader reader, String field, |
| + ValueSource weightsValueSource, String payload, String contexts) { |
| + super(reader, field, null, payload, contexts); |
| + this.weightsValueSource = weightsValueSource; |
| + } |
| + /** |
| + * Creates a new dictionary with the contents of the fields named <code>field</code> |
| * for the terms, <code>payloadField</code> for the corresponding payloads |
| * and uses the <code>weightsValueSource</code> supplied to determine the |
| * score. |
| @@ -93,7 +104,7 @@ public class DocumentValueSourceDictionary extends DocumentDictionary { |
| |
| @Override |
| public InputIterator getEntryIterator() throws IOException { |
| - return new DocumentValueSourceInputIterator(payloadField!=null); |
| + return new DocumentValueSourceInputIterator(payloadField!=null, contextsField!=null); |
| } |
| |
| final class DocumentValueSourceInputIterator extends DocumentDictionary.DocumentInputIterator { |
| @@ -106,9 +117,9 @@ public class DocumentValueSourceDictionary extends DocumentDictionary { |
| /** current leave index */ |
| private int currentLeafIndex = 0; |
| |
| - public DocumentValueSourceInputIterator(boolean hasPayloads) |
| + public DocumentValueSourceInputIterator(boolean hasPayloads, boolean hasContexts) |
| throws IOException { |
| - super(hasPayloads); |
| + super(hasPayloads, hasContexts); |
| leaves = reader.leaves(); |
| starts = new int[leaves.size() + 1]; |
| for (int i = 0; i < leaves.size(); i++) { |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java |
| index 28921be..99672ce 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/FileDictionary.java |
| @@ -19,9 +19,11 @@ package org.apache.lucene.search.suggest; |
| |
| |
| import java.io.*; |
| +import java.util.Set; |
| |
| import org.apache.lucene.search.spell.Dictionary; |
| import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.BytesRefIterator; |
| import org.apache.lucene.util.IOUtils; |
| |
| |
| @@ -209,5 +211,15 @@ public class FileDictionary implements Dictionary { |
| curWeight = (long)Double.parseDouble(weight); |
| } |
| } |
| + |
| + @Override |
| + public BytesRefIterator contexts() { |
| + return BytesRefIterator.EMPTY; |
| + } |
| + |
| + @Override |
| + public boolean hasContexts() { |
| + return false; |
| + } |
| } |
| } |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java |
| index c98825d..a1929f7 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/InputIterator.java |
| @@ -44,6 +44,12 @@ public interface InputIterator extends BytesRefIterator { |
| /** Returns true if the iterator has payloads */ |
| public boolean hasPayloads(); |
| |
| + /** Returns a set of contexts the suggestion is associate with */ |
| + public BytesRefIterator contexts(); |
| + |
| + /** Reurns true if the iterator has context sets */ |
| + public boolean hasContexts(); |
| + |
| /** Singleton InputIterator that iterates over 0 BytesRefs. */ |
| public static final InputIterator EMPTY = new InputIteratorWrapper(BytesRefIterator.EMPTY); |
| |
| @@ -82,5 +88,15 @@ public interface InputIterator extends BytesRefIterator { |
| public boolean hasPayloads() { |
| return false; |
| } |
| + |
| + @Override |
| + public BytesRefIterator contexts() { |
| + return BytesRefIterator.EMPTY; |
| + } |
| + |
| + @Override |
| + public boolean hasContexts() { |
| + return false; |
| + } |
| } |
| } |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java |
| index 4a749e1..020a447 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/Lookup.java |
| @@ -22,6 +22,7 @@ import java.io.InputStream; |
| import java.io.OutputStream; |
| import java.util.Comparator; |
| import java.util.List; |
| +import java.util.Set; |
| |
| import org.apache.lucene.search.spell.Dictionary; |
| import org.apache.lucene.store.DataInput; |
| @@ -55,31 +56,53 @@ public abstract class Lookup { |
| /** the key's payload (null if not present) */ |
| public final BytesRef payload; |
| |
| + /** the key's contexts (null if not present) */ |
| + public final Set<BytesRef> contexts; |
| + |
| /** |
| * Create a new result from a key+weight pair. |
| */ |
| public LookupResult(CharSequence key, long value) { |
| - this(key, value, null); |
| + this(key, null, value, null, null); |
| } |
| |
| /** |
| * Create a new result from a key+weight+payload triple. |
| */ |
| public LookupResult(CharSequence key, long value, BytesRef payload) { |
| - this.key = key; |
| - this.highlightKey = null; |
| - this.value = value; |
| - this.payload = payload; |
| + this(key, null, value, payload, null); |
| } |
| - |
| + |
| /** |
| * Create a new result from a key+highlightKey+weight+payload triple. |
| */ |
| public LookupResult(CharSequence key, Object highlightKey, long value, BytesRef payload) { |
| + this(key, highlightKey, value, payload, null); |
| + } |
| + |
| + /** |
| + * Create a new result from a key+weight+payload+contexts triple. |
| + */ |
| + public LookupResult(CharSequence key, long value, BytesRef payload, Set<BytesRef> contexts) { |
| + this(key, null, value, payload, contexts); |
| + } |
| + |
| + /** |
| + * Create a new result from a key+weight+contexts triple. |
| + */ |
| + public LookupResult(CharSequence key, long value, Set<BytesRef> contexts) { |
| + this(key, null, value, null, contexts); |
| + } |
| + |
| + /** |
| + * Create a new result from a key+highlightKey+weight+payload+contexts triple. |
| + */ |
| + public LookupResult(CharSequence key, Object highlightKey, long value, BytesRef payload, Set<BytesRef> contexts) { |
| this.key = key; |
| this.highlightKey = highlightKey; |
| this.value = value; |
| this.payload = payload; |
| + this.contexts = contexts; |
| } |
| |
| @Override |
| @@ -235,4 +258,5 @@ public abstract class Lookup { |
| * @return ram size of the lookup implementation in bytes |
| */ |
| public abstract long sizeInBytes(); |
| + |
| } |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java |
| index d7011d4..8871c8f 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/SortedInputIterator.java |
| @@ -19,12 +19,17 @@ package org.apache.lucene.search.suggest; |
| |
| import java.io.File; |
| import java.io.IOException; |
| +import java.util.ArrayList; |
| import java.util.Comparator; |
| +import java.util.HashSet; |
| +import java.util.List; |
| +import java.util.Set; |
| |
| import org.apache.lucene.store.ByteArrayDataInput; |
| import org.apache.lucene.store.ByteArrayDataOutput; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.BytesRefIterator; |
| import org.apache.lucene.util.IOUtils; |
| import org.apache.lucene.util.OfflineSorter; |
| import org.apache.lucene.util.OfflineSorter.ByteSequencesReader; |
| @@ -42,11 +47,13 @@ public class SortedInputIterator implements InputIterator { |
| private final ByteSequencesReader reader; |
| private final Comparator<BytesRef> comparator; |
| private final boolean hasPayloads; |
| + private final boolean hasContexts; |
| private boolean done = false; |
| |
| private long weight; |
| private final BytesRef scratch = new BytesRef(); |
| private BytesRef payload = new BytesRef(); |
| + private BytesRefIterator contexts = BytesRefIterator.EMPTY; |
| |
| /** |
| * Creates a new sorted wrapper, using {@link |
| @@ -62,6 +69,7 @@ public class SortedInputIterator implements InputIterator { |
| */ |
| public SortedInputIterator(InputIterator source, Comparator<BytesRef> comparator) throws IOException { |
| this.hasPayloads = source.hasPayloads(); |
| + this.hasContexts = source.hasContexts(); |
| this.source = source; |
| this.comparator = comparator; |
| this.reader = sort(); |
| @@ -80,6 +88,9 @@ public class SortedInputIterator implements InputIterator { |
| if (hasPayloads) { |
| payload = decodePayload(scratch, input); |
| } |
| + if (hasContexts) { |
| + contexts = decodeContexts(scratch, input); |
| + } |
| success = true; |
| return scratch; |
| } |
| @@ -111,6 +122,16 @@ public class SortedInputIterator implements InputIterator { |
| public boolean hasPayloads() { |
| return hasPayloads; |
| } |
| + |
| + @Override |
| + public BytesRefIterator contexts() { |
| + return (hasContexts) ? contexts : BytesRefIterator.EMPTY; |
| + } |
| + |
| + @Override |
| + public boolean hasContexts() { |
| + return hasContexts; |
| + } |
| |
| /** Sortes by BytesRef (ascending) then cost (ascending). */ |
| private final Comparator<BytesRef> tieBreakByCostComparator = new Comparator<BytesRef>() { |
| @@ -134,6 +155,10 @@ public class SortedInputIterator implements InputIterator { |
| decodePayload(leftScratch, input); |
| decodePayload(rightScratch, input); |
| } |
| + if (hasContexts) { |
| + decodeContexts(leftScratch, input); |
| + decodeContexts(rightScratch, input); |
| + } |
| int cmp = comparator.compare(leftScratch, rightScratch); |
| if (cmp != 0) { |
| return cmp; |
| @@ -156,7 +181,7 @@ public class SortedInputIterator implements InputIterator { |
| ByteArrayDataOutput output = new ByteArrayDataOutput(buffer); |
| |
| while ((spare = source.next()) != null) { |
| - encode(writer, output, buffer, spare, source.payload(), source.weight()); |
| + encode(writer, output, buffer, spare, source.payload(), source.contexts(), source.weight()); |
| } |
| writer.close(); |
| new OfflineSorter(tieBreakByCostComparator).sort(tempInput, tempSorted); |
| @@ -187,9 +212,18 @@ public class SortedInputIterator implements InputIterator { |
| } |
| } |
| |
| - /** encodes an entry (bytes+(payload)+weight) to the provided writer */ |
| - protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException { |
| + /** encodes an entry (bytes+(payload)+(contexts)+weight) to the provided writer */ |
| + protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, BytesRefIterator contexts, long weight) throws IOException { |
| int requiredLength = spare.length + 8 + ((hasPayloads) ? 2 + payload.length : 0); |
| + Set<BytesRef> contextSet = new HashSet<>(); |
| + if(hasContexts) { |
| + BytesRef ctxSpare; |
| + while((ctxSpare = contexts.next()) != null) { |
| + contextSet.add(ctxSpare); |
| + requiredLength += 2 + ctxSpare.length; |
| + } |
| + requiredLength += 2; // for length of contexts |
| + } |
| if (requiredLength >= buffer.length) { |
| buffer = ArrayUtil.grow(buffer, requiredLength); |
| } |
| @@ -199,6 +233,13 @@ public class SortedInputIterator implements InputIterator { |
| output.writeBytes(payload.bytes, payload.offset, payload.length); |
| output.writeShort((short) payload.length); |
| } |
| + if (hasContexts) { |
| + for (BytesRef ctx : contextSet) { |
| + output.writeBytes(ctx.bytes, ctx.offset, ctx.length); |
| + output.writeShort((short) ctx.length); |
| + } |
| + output.writeShort((short) contextSet.size()); |
| + } |
| output.writeLong(weight); |
| writer.write(buffer, 0, output.getPosition()); |
| } |
| @@ -211,6 +252,36 @@ public class SortedInputIterator implements InputIterator { |
| return tmpInput.readLong(); |
| } |
| |
| + /** decodes the contexts at the current position */ |
| + protected BytesRefIterator decodeContexts(BytesRef scratch, ByteArrayDataInput tmpInput) { |
| + tmpInput.reset(scratch.bytes); |
| + tmpInput.skipBytes(scratch.length - 2); //skip to context set size |
| + short ctxSetSize = tmpInput.readShort(); |
| + scratch.length -= 2; |
| + final List<BytesRef> contextSet = new ArrayList<>(); |
| + for (short i = 0; i < ctxSetSize; i++) { |
| + tmpInput.setPosition(scratch.length - 2); |
| + short curContextLength = tmpInput.readShort(); |
| + scratch.length -= 2; |
| + tmpInput.setPosition(scratch.length - curContextLength); |
| + BytesRef contextSpare = new BytesRef(curContextLength); |
| + tmpInput.readBytes(contextSpare.bytes, 0, curContextLength); |
| + contextSpare.length = curContextLength; |
| + contextSet.add(contextSpare); |
| + scratch.length -= curContextLength; |
| + } |
| + return new BytesRefIterator() { |
| + int idx = 0; |
| + @Override |
| + public BytesRef next() throws IOException { |
| + if(idx < contextSet.size()) { |
| + return contextSet.get(idx++); |
| + } |
| + return null; |
| + } |
| + }; |
| + } |
| + |
| /** decodes the payload at the current position */ |
| protected BytesRef decodePayload(BytesRef scratch, ByteArrayDataInput tmpInput) { |
| tmpInput.reset(scratch.bytes); |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java |
| index 4403fc1..d1afd42 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/UnsortedInputIterator.java |
| @@ -21,6 +21,7 @@ import java.io.IOException; |
| import java.util.Random; |
| |
| import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.BytesRefIterator; |
| |
| /** |
| * This wrapper buffers the incoming elements and makes sure they are in |
| @@ -75,4 +76,13 @@ public class UnsortedInputIterator extends BufferedInputIterator { |
| } |
| return null; |
| } |
| + |
| + @Override |
| + public BytesRefIterator contexts() { |
| + if (hasContexts() && curPos < contextSets.size()) { |
| + assert currentOrd == ords[curPos]; |
| + return contextSets.get(currentOrd); |
| + } |
| + return BytesRefIterator.EMPTY; |
| + } |
| } |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java |
| index 9333f7a..b0518fd 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java |
| @@ -189,6 +189,10 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { |
| @Override |
| public void build(InputIterator iter) throws IOException { |
| |
| + if(iter.hasContexts()) { |
| + throw new IllegalArgumentException("this suggester doesn't support contexts"); |
| + } |
| + |
| if (searcherMgr != null) { |
| searcherMgr.close(); |
| searcherMgr = null; |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java |
| index 680d9cd..1de2d05 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java |
| @@ -380,6 +380,9 @@ public class AnalyzingSuggester extends Lookup { |
| |
| @Override |
| public void build(InputIterator iterator) throws IOException { |
| + if(iterator.hasContexts()) { |
| + throw new IllegalArgumentException("this suggester doesn't support contexts"); |
| + } |
| String prefix = getClass().getSimpleName(); |
| File directory = OfflineSorter.defaultTempDir(); |
| File tempInput = File.createTempFile(prefix, ".input", directory); |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java |
| index f9b367c..0ec153e 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java |
| @@ -286,7 +286,10 @@ public class FreeTextSuggester extends Lookup { |
| * the weights for the suggestions are ignored. */ |
| public void build(InputIterator iterator, double ramBufferSizeMB) throws IOException { |
| if (iterator.hasPayloads()) { |
| - throw new IllegalArgumentException("payloads are not supported"); |
| + throw new IllegalArgumentException("this suggester doesn't support payloads"); |
| + } |
| + if(iterator.hasContexts()) { |
| + throw new IllegalArgumentException("this suggester doesn't support contexts"); |
| } |
| |
| String prefix = getClass().getSimpleName(); |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java |
| index a24d316..4cb0cee 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionLookup.java |
| @@ -150,6 +150,9 @@ public class FSTCompletionLookup extends Lookup { |
| if (iterator.hasPayloads()) { |
| throw new IllegalArgumentException("this suggester doesn't support payloads"); |
| } |
| + if(iterator.hasContexts()) { |
| + throw new IllegalArgumentException("this suggester doesn't support contexts"); |
| + } |
| File tempInput = File.createTempFile( |
| FSTCompletionLookup.class.getSimpleName(), ".input", OfflineSorter.defaultTempDir()); |
| File tempSorted = File.createTempFile( |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java |
| index 35a8051..4335af5 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/WFSTCompletionLookup.java |
| @@ -26,6 +26,7 @@ import org.apache.lucene.store.DataInput; |
| import org.apache.lucene.store.DataOutput; |
| import org.apache.lucene.util.ArrayUtil; |
| import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.BytesRefIterator; |
| import org.apache.lucene.util.CharsRef; |
| import org.apache.lucene.util.IntsRef; |
| import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter; |
| @@ -97,6 +98,9 @@ public class WFSTCompletionLookup extends Lookup { |
| if (iterator.hasPayloads()) { |
| throw new IllegalArgumentException("this suggester doesn't support payloads"); |
| } |
| + if(iterator.hasContexts()) { |
| + throw new IllegalArgumentException("this suggester doesn't support contexts"); |
| + } |
| count = 0; |
| BytesRef scratch = new BytesRef(); |
| InputIterator iter = new WFSTInputIterator(iterator); |
| @@ -260,7 +264,7 @@ public class WFSTCompletionLookup extends Lookup { |
| } |
| |
| @Override |
| - protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, long weight) throws IOException { |
| + protected void encode(ByteSequencesWriter writer, ByteArrayDataOutput output, byte[] buffer, BytesRef spare, BytesRef payload, BytesRefIterator contexts, long weight) throws IOException { |
| if (spare.length + 4 >= buffer.length) { |
| buffer = ArrayUtil.grow(buffer, spare.length + 4); |
| } |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java |
| index 28cc39a..945fbde 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/jaspell/JaspellLookup.java |
| @@ -56,6 +56,9 @@ public class JaspellLookup extends Lookup { |
| if (iterator.hasPayloads()) { |
| throw new IllegalArgumentException("this suggester doesn't support payloads"); |
| } |
| + if(iterator.hasContexts()) { |
| + throw new IllegalArgumentException("this suggester doesn't support contexts"); |
| + } |
| count = 0; |
| trie = new JaspellTernarySearchTrie(); |
| trie.setMatchAlmostDiff(editDistance); |
| diff --git lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java |
| index 4b4d61e..bc66314 100644 |
| --- lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java |
| +++ lucene/suggest/src/java/org/apache/lucene/search/suggest/tst/TSTLookup.java |
| @@ -55,6 +55,9 @@ public class TSTLookup extends Lookup { |
| if (iterator.hasPayloads()) { |
| throw new IllegalArgumentException("this suggester doesn't support payloads"); |
| } |
| + if(iterator.hasContexts()) { |
| + throw new IllegalArgumentException("this suggester doesn't support contexts"); |
| + } |
| root = new TernaryTreeNode(); |
| |
| // make sure it's sorted and the comparator uses UTF16 sort order |
| diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java |
| index b78c856..1415d82 100644 |
| --- lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java |
| +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentDictionaryTest.java |
| @@ -24,6 +24,7 @@ import org.apache.lucene.search.spell.Dictionary; |
| import org.apache.lucene.search.suggest.DocumentDictionary; |
| import org.apache.lucene.store.Directory; |
| import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.BytesRefIterator; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.junit.Test; |
| |
| @@ -49,9 +50,10 @@ public class DocumentDictionaryTest extends LuceneTestCase { |
| static final String FIELD_NAME = "f1"; |
| static final String WEIGHT_FIELD_NAME = "w1"; |
| static final String PAYLOAD_FIELD_NAME = "p1"; |
| + static final String CONTEXT_FIELD_NAME = "c1"; |
| |
| /** Returns Pair(list of invalid document terms, Map of document term -> document) */ |
| - private Map.Entry<List<String>, Map<String, Document>> generateIndexDocuments(int ndocs, boolean requiresPayload) { |
| + private Map.Entry<List<String>, Map<String, Document>> generateIndexDocuments(int ndocs, boolean requiresPayload, boolean requiresContexts) { |
| Map<String, Document> docs = new HashMap<>(); |
| List<String> invalidDocTerms = new ArrayList<>(); |
| for(int i = 0; i < ndocs ; i++) { |
| @@ -77,6 +79,15 @@ public class DocumentDictionaryTest extends LuceneTestCase { |
| } |
| } |
| |
| + if (requiresContexts || usually()) { |
| + if (usually()) { |
| + for (int j = 0; j < atLeast(2); j++) { |
| + doc.add(new StoredField(CONTEXT_FIELD_NAME, new BytesRef("context_" + i + "_"+ j))); |
| + } |
| + } |
| + // we should allow entries without context |
| + } |
| + |
| // usually have valid weight field in document |
| if (usually()) { |
| Field weight = (rarely()) ? |
| @@ -125,7 +136,7 @@ public class DocumentDictionaryTest extends LuceneTestCase { |
| IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); |
| iwc.setMergePolicy(newLogMergePolicy()); |
| RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); |
| - Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), true); |
| + Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), true, false); |
| Map<String, Document> docs = res.getValue(); |
| List<String> invalidDocTerms = res.getKey(); |
| for(Document doc: docs.values()) { |
| @@ -160,7 +171,7 @@ public class DocumentDictionaryTest extends LuceneTestCase { |
| IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); |
| iwc.setMergePolicy(newLogMergePolicy()); |
| RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); |
| - Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false); |
| + Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false, false); |
| Map<String, Document> docs = res.getValue(); |
| List<String> invalidDocTerms = res.getKey(); |
| for(Document doc: docs.values()) { |
| @@ -191,12 +202,59 @@ public class DocumentDictionaryTest extends LuceneTestCase { |
| } |
| |
| @Test |
| + public void testWithContexts() throws IOException { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); |
| + iwc.setMergePolicy(newLogMergePolicy()); |
| + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); |
| + Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), true, true); |
| + Map<String, Document> docs = res.getValue(); |
| + List<String> invalidDocTerms = res.getKey(); |
| + for(Document doc: docs.values()) { |
| + writer.addDocument(doc); |
| + } |
| + writer.commit(); |
| + writer.close(); |
| + IndexReader ir = DirectoryReader.open(dir); |
| + Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME); |
| + InputIterator inputIterator = dictionary.getEntryIterator(); |
| + BytesRef f; |
| + while((f = inputIterator.next())!=null) { |
| + Document doc = docs.remove(f.utf8ToString()); |
| + assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); |
| + Field weightField = doc.getField(WEIGHT_FIELD_NAME); |
| + assertEquals(inputIterator.weight(), (weightField != null) ? weightField.numericValue().longValue() : 0); |
| + assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); |
| + List<BytesRef> oriCtxs = new ArrayList<>(); |
| + for (StorableField ctxf : doc.getFields(CONTEXT_FIELD_NAME)) { |
| + oriCtxs.add(ctxf.binaryValue()); |
| + } |
| + BytesRef ctx; |
| + BytesRefIterator ctxIterator = inputIterator.contexts(); |
| + int ctxCount = 0; |
| + while((ctx = ctxIterator.next()) != null) { |
| + assertTrue(oriCtxs.contains(ctx)); |
| + ctxCount++; |
| + } |
| + assertEquals(oriCtxs.size(), ctxCount); |
| + } |
| + |
| + for (String invalidTerm : invalidDocTerms) { |
| + assertNotNull(docs.remove(invalidTerm)); |
| + } |
| + assertTrue(docs.isEmpty()); |
| + |
| + ir.close(); |
| + dir.close(); |
| + } |
| + |
| + @Test |
| public void testWithDeletions() throws IOException { |
| Directory dir = newDirectory(); |
| IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); |
| iwc.setMergePolicy(newLogMergePolicy()); |
| RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); |
| - Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false); |
| + Map.Entry<List<String>, Map<String, Document>> res = generateIndexDocuments(atLeast(1000), false, false); |
| Map<String, Document> docs = res.getValue(); |
| List<String> invalidDocTerms = res.getKey(); |
| Random rand = random(); |
| diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java |
| index 0a59e01..c1fc97b 100644 |
| --- lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java |
| +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/DocumentValueSourceDictionaryTest.java |
| @@ -20,9 +20,11 @@ package org.apache.lucene.search.suggest; |
| import java.io.IOException; |
| import java.util.ArrayList; |
| import java.util.HashMap; |
| +import java.util.HashSet; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Random; |
| +import java.util.Set; |
| |
| import org.apache.lucene.analysis.MockAnalyzer; |
| import org.apache.lucene.document.Document; |
| @@ -52,6 +54,7 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase { |
| static final String WEIGHT_FIELD_NAME_2 = "w2"; |
| static final String WEIGHT_FIELD_NAME_3 = "w3"; |
| static final String PAYLOAD_FIELD_NAME = "p1"; |
| + static final String CONTEXTS_FIELD_NAME = "c1"; |
| |
| private Map<String, Document> generateIndexDocuments(int ndocs) { |
| Map<String, Document> docs = new HashMap<>(); |
| @@ -61,12 +64,18 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase { |
| Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i); |
| Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i); |
| Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i); |
| + Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_" + i + "_0")); |
| Document doc = new Document(); |
| doc.add(field); |
| doc.add(payload); |
| doc.add(weight1); |
| doc.add(weight2); |
| doc.add(weight3); |
| + doc.add(contexts); |
| + for(int j = 1; j < atLeast(3); j++) { |
| + contexts.setBytesValue(new BytesRef("ctx_" + i + "_" + j)); |
| + doc.add(contexts); |
| + } |
| docs.put(field.stringValue(), doc); |
| } |
| return docs; |
| @@ -124,6 +133,49 @@ public class DocumentValueSourceDictionaryTest extends LuceneTestCase { |
| ir.close(); |
| dir.close(); |
| } |
| + |
| + @Test |
| + public void testWithContext() throws IOException { |
| + Directory dir = newDirectory(); |
| + IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); |
| + iwc.setMergePolicy(newLogMergePolicy()); |
| + RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); |
| + Map<String, Document> docs = generateIndexDocuments(atLeast(100)); |
| + for(Document doc: docs.values()) { |
| + writer.addDocument(doc); |
| + } |
| + writer.commit(); |
| + writer.close(); |
| + |
| + IndexReader ir = DirectoryReader.open(dir); |
| + ValueSource[] toAdd = new ValueSource[] {new LongFieldSource(WEIGHT_FIELD_NAME_1), new LongFieldSource(WEIGHT_FIELD_NAME_2), new LongFieldSource(WEIGHT_FIELD_NAME_3)}; |
| + Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, new SumFloatFunction(toAdd), PAYLOAD_FIELD_NAME, CONTEXTS_FIELD_NAME); |
| + InputIterator inputIterator = dictionary.getEntryIterator(); |
| + BytesRef f; |
| + while((f = inputIterator.next())!=null) { |
| + Document doc = docs.remove(f.utf8ToString()); |
| + long w1 = doc.getField(WEIGHT_FIELD_NAME_1).numericValue().longValue(); |
| + long w2 = doc.getField(WEIGHT_FIELD_NAME_2).numericValue().longValue(); |
| + long w3 = doc.getField(WEIGHT_FIELD_NAME_3).numericValue().longValue(); |
| + assertTrue(f.equals(new BytesRef(doc.get(FIELD_NAME)))); |
| + assertEquals(inputIterator.weight(), (w1 + w2 + w3)); |
| + assertTrue(inputIterator.payload().equals(doc.getField(PAYLOAD_FIELD_NAME).binaryValue())); |
| + List<BytesRef> originalCtxs = new ArrayList<>(); |
| + for (Field ctxf: doc.getFields(CONTEXTS_FIELD_NAME)) { |
| + originalCtxs.add(ctxf.binaryValue()); |
| + } |
| + BytesRef ctx; |
| + int ctxCount = 0; |
| + while((ctx = inputIterator.contexts().next()) != null) { |
| + assertTrue(originalCtxs.contains(ctx)); |
| + ctxCount++; |
| + } |
| + assertEquals(originalCtxs.size(), ctxCount); |
| + } |
| + assertTrue(docs.isEmpty()); |
| + ir.close(); |
| + dir.close(); |
| + } |
| |
| @Test |
| public void testWithoutPayload() throws IOException { |
| diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java |
| index 009f80c..2ea44bd 100644 |
| --- lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java |
| +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/Input.java |
| @@ -17,6 +17,8 @@ package org.apache.lucene.search.suggest; |
| * limitations under the License. |
| */ |
| |
| +import java.util.Set; |
| + |
| import org.apache.lucene.util.BytesRef; |
| |
| /** corresponds to {@link InputIterator}'s entries */ |
| @@ -25,28 +27,55 @@ public final class Input { |
| public final long v; |
| public final BytesRef payload; |
| public final boolean hasPayloads; |
| + public final Set<BytesRef> contexts; |
| + public final boolean hasContexts; |
| |
| public Input(BytesRef term, long v, BytesRef payload) { |
| - this(term, v, payload, true); |
| + this(term, v, payload, true, null, false); |
| } |
| |
| public Input(String term, long v, BytesRef payload) { |
| - this(new BytesRef(term), v, payload, true); |
| + this(new BytesRef(term), v, payload); |
| + } |
| + |
| + public Input(BytesRef term, long v, Set<BytesRef> contexts) { |
| + this(term, v, null, false, contexts, true); |
| + } |
| + |
| + public Input(String term, long v, Set<BytesRef> contexts) { |
| + this(new BytesRef(term), v, null, false, contexts, true); |
| } |
| |
| public Input(BytesRef term, long v) { |
| - this(term, v, null, false); |
| + this(term, v, null, false, null, false); |
| } |
| |
| public Input(String term, long v) { |
| - this(new BytesRef(term), v, null, false); |
| + this(new BytesRef(term), v, null, false, null, false); |
| } |
| |
| - public Input(BytesRef term, long v, BytesRef payload, boolean hasPayloads) { |
| + public Input(String term, int v, BytesRef payload, Set<BytesRef> contexts) { |
| + this(new BytesRef(term), v, payload, true, contexts, true); |
| + } |
| + |
| + public Input(BytesRef term, long v, BytesRef payload, Set<BytesRef> contexts) { |
| + this(term, v, payload, true, contexts, true); |
| + } |
| + |
| + |
| + |
| + public Input(BytesRef term, long v, BytesRef payload, boolean hasPayloads, Set<BytesRef> contexts, |
| + boolean hasContexts) { |
| this.term = term; |
| this.v = v; |
| this.payload = payload; |
| this.hasPayloads = hasPayloads; |
| + this.contexts = contexts; |
| + this.hasContexts = hasContexts; |
| + } |
| + |
| + public boolean hasContexts() { |
| + return hasContexts; |
| } |
| |
| public boolean hasPayloads() { |
| diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java |
| index edebb37..27e5b48 100644 |
| --- lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java |
| +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/InputArrayIterator.java |
| @@ -17,10 +17,13 @@ package org.apache.lucene.search.suggest; |
| * limitations under the License. |
| */ |
| |
| +import java.io.IOException; |
| import java.util.Arrays; |
| import java.util.Iterator; |
| +import java.util.Set; |
| |
| import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.BytesRefIterator; |
| |
| /** |
| * A {@link InputIterator} over a sequence of {@link Input}s. |
| @@ -28,6 +31,7 @@ import org.apache.lucene.util.BytesRef; |
| public final class InputArrayIterator implements InputIterator { |
| private final Iterator<Input> i; |
| private final boolean hasPayloads; |
| + private final boolean hasContexts; |
| private boolean first; |
| private Input current; |
| private final BytesRef spare = new BytesRef(); |
| @@ -38,8 +42,10 @@ public final class InputArrayIterator implements InputIterator { |
| current = i.next(); |
| first = true; |
| this.hasPayloads = current.hasPayloads; |
| + this.hasContexts = current.hasContexts; |
| } else { |
| this.hasPayloads = false; |
| + this.hasContexts = false; |
| } |
| } |
| |
| @@ -78,4 +84,26 @@ public final class InputArrayIterator implements InputIterator { |
| public boolean hasPayloads() { |
| return hasPayloads; |
| } |
| + |
| + @Override |
| + public BytesRefIterator contexts() { |
| + if (current.contexts != null) { |
| + final Iterator<BytesRef> bytesRefIter = current.contexts.iterator(); |
| + return new BytesRefIterator() { |
| + @Override |
| + public BytesRef next() throws IOException { |
| + if (bytesRefIter.hasNext()) { |
| + return bytesRefIter.next(); |
| + } |
| + return null; |
| + } |
| + }; |
| + } |
| + return BytesRefIterator.EMPTY; |
| + } |
| + |
| + @Override |
| + public boolean hasContexts() { |
| + return hasContexts; |
| + } |
| } |
| \ No newline at end of file |
| diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java |
| index ae09978..8d9298f 100644 |
| --- lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java |
| +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/TestInputIterator.java |
| @@ -19,9 +19,11 @@ package org.apache.lucene.search.suggest; |
| |
| import java.util.AbstractMap.SimpleEntry; |
| import java.util.Comparator; |
| +import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.Map; |
| import java.util.Random; |
| +import java.util.Set; |
| import java.util.TreeMap; |
| |
| import org.apache.lucene.util.BytesRef; |
| @@ -45,21 +47,29 @@ public class TestInputIterator extends LuceneTestCase { |
| Comparator<BytesRef> comparator = random.nextBoolean() ? BytesRef.getUTF8SortedAsUnicodeComparator() : BytesRef.getUTF8SortedAsUTF16Comparator(); |
| TreeMap<BytesRef, SimpleEntry<Long, BytesRef>> sorted = new TreeMap<>(comparator); |
| TreeMap<BytesRef, Long> sortedWithoutPayload = new TreeMap<>(comparator); |
| + TreeMap<BytesRef, SimpleEntry<Long, Set<BytesRef>>> sortedWithContext = new TreeMap<>(comparator); |
| Input[] unsorted = new Input[num]; |
| Input[] unsortedWithoutPayload = new Input[num]; |
| - |
| + Input[] unsortedWithContexts = new Input[num]; |
| + Set<BytesRef> ctxs; |
| for (int i = 0; i < num; i++) { |
| BytesRef key; |
| BytesRef payload; |
| + ctxs = new HashSet<>(); |
| do { |
| key = new BytesRef(TestUtil.randomUnicodeString(random)); |
| payload = new BytesRef(TestUtil.randomUnicodeString(random)); |
| + for(int j = 0; j < atLeast(2); j++) { |
| + ctxs.add(new BytesRef(TestUtil.randomUnicodeString(random))); |
| + } |
| } while (sorted.containsKey(key)); |
| long value = random.nextLong(); |
| sortedWithoutPayload.put(key, value); |
| sorted.put(key, new SimpleEntry<>(value, payload)); |
| + sortedWithContext.put(key, new SimpleEntry<>(value, ctxs)); |
| unsorted[i] = new Input(key, value, payload); |
| unsortedWithoutPayload[i] = new Input(key, value); |
| + unsortedWithContexts[i] = new Input(key, value, ctxs); |
| } |
| |
| // test the sorted iterator wrapper with payloads |
| @@ -74,6 +84,25 @@ public class TestInputIterator extends LuceneTestCase { |
| } |
| assertNull(wrapper.next()); |
| |
| + // test the sorted iterator wrapper with contexts |
| + wrapper = new SortedInputIterator(new InputArrayIterator(unsortedWithContexts), comparator); |
| + Iterator<Map.Entry<BytesRef, SimpleEntry<Long, Set<BytesRef>>>> actualEntries = sortedWithContext.entrySet().iterator(); |
| + while (actualEntries.hasNext()) { |
| + Map.Entry<BytesRef, SimpleEntry<Long, Set<BytesRef>>> entry = actualEntries.next(); |
| + |
| + assertEquals(entry.getKey(), wrapper.next()); |
| + assertEquals(entry.getValue().getKey().longValue(), wrapper.weight()); |
| + Set<BytesRef> actualCtxs = entry.getValue().getValue(); |
| + BytesRef ctx; |
| + int ctxCount = 0; |
| + while((ctx=wrapper.contexts().next())!=null) { |
| + assertTrue(actualCtxs.contains(ctx)); |
| + ctxCount++; |
| + } |
| + assertEquals(actualCtxs.size(), ctxCount); |
| + } |
| + assertNull(wrapper.next()); |
| + |
| // test the unsorted iterator wrapper with payloads |
| wrapper = new UnsortedInputIterator(new InputArrayIterator(unsorted)); |
| TreeMap<BytesRef, SimpleEntry<Long, BytesRef>> actual = new TreeMap<>(); |
| diff --git lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java |
| index 151d465..12017b3 100644 |
| --- lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java |
| +++ lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java |
| @@ -45,6 +45,7 @@ import org.apache.lucene.search.suggest.Input; |
| import org.apache.lucene.search.suggest.InputArrayIterator; |
| import org.apache.lucene.search.suggest.InputIterator; |
| import org.apache.lucene.util.BytesRef; |
| +import org.apache.lucene.util.BytesRefIterator; |
| import org.apache.lucene.util.LineFileDocs; |
| import org.apache.lucene.util.LuceneTestCase; |
| import org.apache.lucene.util.TestUtil; |
| @@ -173,6 +174,16 @@ public class TestFreeTextSuggester extends LuceneTestCase { |
| return false; |
| } |
| |
| + @Override |
| + public BytesRefIterator contexts() { |
| + return BytesRefIterator.EMPTY; |
| + } |
| + |
| + @Override |
| + public boolean hasContexts() { |
| + return false; |
| + } |
| + |
| }); |
| if (VERBOSE) { |
| System.out.println(sug.sizeInBytes() + " bytes"); |
| @@ -364,6 +375,16 @@ public class TestFreeTextSuggester extends LuceneTestCase { |
| public boolean hasPayloads() { |
| return false; |
| } |
| + |
| + @Override |
| + public BytesRefIterator contexts() { |
| + return BytesRefIterator.EMPTY; |
| + } |
| + |
| + @Override |
| + public boolean hasContexts() { |
| + return false; |
| + } |
| }); |
| |
| // Build inefficient but hopefully correct model: |