| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search.suggest.document; |
| |
| import java.io.IOException; |
| import java.util.Collections; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.Set; |
| |
| import org.apache.lucene.analysis.TokenFilter; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
| import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
| |
| /** |
| * {@link SuggestField} which additionally takes in a set of |
| * contexts. Example usage of adding a suggestion with contexts is as follows: |
| * |
| * <pre class="prettyprint"> |
| * document.add( |
| * new ContextSuggestField(name, "suggestion", Arrays.asList("context1", "context2"), 4)); |
| * </pre> |
| * |
| * Use {@link ContextQuery} to boost and/or filter suggestions |
| * at query-time. Use {@link PrefixCompletionQuery}, {@link RegexCompletionQuery} |
| * or {@link FuzzyCompletionQuery} if context boost/filtering |
| * are not needed. |
| * |
| * @lucene.experimental |
| */ |
| public class ContextSuggestField extends SuggestField { |
| |
| /** |
| * Separator used between context value and the suggest field value |
| */ |
| public static final int CONTEXT_SEPARATOR = '\u001D'; |
| static final byte TYPE = 1; |
| |
| private final Set<CharSequence> contexts; |
| |
| /** |
| * Creates a context-enabled suggest field |
| * |
| * @param name field name |
| * @param value field value to get suggestion on |
| * @param weight field weight |
| * @param contexts associated contexts |
| * |
| * @throws IllegalArgumentException if either the name or value is null, |
| * if value is an empty string, if the weight is negative, if value or |
| * contexts contains any reserved characters |
| */ |
| public ContextSuggestField(String name, String value, int weight, CharSequence... contexts) { |
| super(name, value, weight); |
| validate(value); |
| this.contexts = new HashSet<>((contexts != null) ? contexts.length : 0); |
| if (contexts != null) { |
| Collections.addAll(this.contexts, contexts); |
| } |
| } |
| |
| /** |
| * Expert: Sub-classes can inject contexts at |
| * index-time |
| */ |
| protected Iterable<CharSequence> contexts() { |
| return contexts; |
| } |
| |
| @Override |
| protected CompletionTokenStream wrapTokenStream(TokenStream stream) { |
| final Iterable<CharSequence> contexts = contexts(); |
| for (CharSequence context : contexts) { |
| validate(context); |
| } |
| CompletionTokenStream completionTokenStream; |
| if (stream instanceof CompletionTokenStream) { |
| //TODO this is awkward; is there a better way avoiding re-creating the chain? |
| completionTokenStream = (CompletionTokenStream) stream; |
| PrefixTokenFilter prefixTokenFilter = new PrefixTokenFilter(completionTokenStream.inputTokenStream, (char) CONTEXT_SEPARATOR, contexts); |
| completionTokenStream = new CompletionTokenStream(prefixTokenFilter, |
| completionTokenStream.preserveSep, |
| completionTokenStream.preservePositionIncrements, |
| completionTokenStream.maxGraphExpansions); |
| } else { |
| completionTokenStream = new CompletionTokenStream(new PrefixTokenFilter(stream, (char) CONTEXT_SEPARATOR, contexts)); |
| } |
| return completionTokenStream; |
| } |
| |
| @Override |
| protected byte type() { |
| return TYPE; |
| } |
| |
| /** |
| * The {@link PrefixTokenFilter} wraps a {@link TokenStream} and adds a set |
| * prefixes ahead. The position attribute will not be incremented for the prefixes. |
| */ |
| private static final class PrefixTokenFilter extends TokenFilter { |
| |
| private final char separator; |
| private final CharTermAttribute termAttr = addAttribute(CharTermAttribute.class); |
| private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class); |
| private final Iterable<CharSequence> prefixes; |
| |
| private Iterator<CharSequence> currentPrefix; |
| |
| /** |
| * Create a new {@link PrefixTokenFilter} |
| * |
| * @param input {@link TokenStream} to wrap |
| * @param separator Character used separate prefixes from other tokens |
| * @param prefixes {@link Iterable} of {@link CharSequence} which keeps all prefixes |
| */ |
| public PrefixTokenFilter(TokenStream input, char separator, Iterable<CharSequence> prefixes) { |
| super(input); |
| this.prefixes = prefixes; |
| this.currentPrefix = null; |
| this.separator = separator; |
| } |
| |
| @Override |
| public boolean incrementToken() throws IOException { |
| if (currentPrefix != null) { |
| if (!currentPrefix.hasNext()) { |
| return input.incrementToken(); |
| } else { |
| posAttr.setPositionIncrement(0); |
| } |
| } else { |
| currentPrefix = prefixes.iterator(); |
| termAttr.setEmpty(); |
| posAttr.setPositionIncrement(1); |
| } |
| termAttr.setEmpty(); |
| if (currentPrefix.hasNext()) { |
| termAttr.append(currentPrefix.next()); |
| } |
| termAttr.append(separator); |
| return true; |
| } |
| |
| @Override |
| public void reset() throws IOException { |
| super.reset(); |
| currentPrefix = null; |
| } |
| } |
| |
| private void validate(final CharSequence value) { |
| for (int i = 0; i < value.length(); i++) { |
| if (CONTEXT_SEPARATOR == value.charAt(i)) { |
| throw new IllegalArgumentException("Illegal value [" + value + "] UTF-16 codepoint [0x" |
| + Integer.toHexString((int) value.charAt(i))+ "] at position " + i + " is a reserved character"); |
| } |
| } |
| } |
| } |