| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search.suggest.document; |
| |
| import java.io.ByteArrayOutputStream; |
| import java.io.IOException; |
| |
| import org.apache.lucene.analysis.Analyzer; |
| import org.apache.lucene.analysis.TokenStream; |
| import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter; |
| import org.apache.lucene.document.Field; |
| import org.apache.lucene.document.FieldType; |
| import org.apache.lucene.index.IndexOptions; |
| import org.apache.lucene.store.OutputStreamDataOutput; |
| import org.apache.lucene.util.BytesRef; |
| |
| /** |
| * <p> |
| * Field that indexes a string value and a weight as a weighted completion |
| * against a named suggester. |
| * Field is tokenized, not stored and stores documents, frequencies and positions. |
| * Field can be used to provide near real time document suggestions. |
| * </p> |
| * <p> |
| * Besides the usual {@link org.apache.lucene.analysis.Analyzer}s, |
| * {@link CompletionAnalyzer} |
| * can be used to tune suggest field only parameters |
| * (e.g. preserving token separators, preserving position increments |
| * when converting the token stream to an automaton) |
| * </p> |
| * <p> |
| * Example indexing usage: |
| * <pre class="prettyprint"> |
| * document.add(new SuggestField(name, "suggestion", 4)); |
| * </pre> |
| * To perform document suggestions based on the this field, use |
| * {@link SuggestIndexSearcher#suggest(CompletionQuery, int, boolean)} |
| * |
| * @lucene.experimental |
| */ |
| public class SuggestField extends Field { |
| |
| /** Default field type for suggest field */ |
| public static final FieldType FIELD_TYPE = new FieldType(); |
| static { |
| FIELD_TYPE.setTokenized(true); |
| FIELD_TYPE.setStored(false); |
| FIELD_TYPE.setStoreTermVectors(false); |
| FIELD_TYPE.setOmitNorms(false); |
| FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); |
| FIELD_TYPE.freeze(); |
| } |
| |
| static final byte TYPE = 0; |
| |
| private final BytesRef surfaceForm; |
| private final int weight; |
| |
| /** |
| * Creates a {@link SuggestField} |
| * |
| * @param name field name |
| * @param value field value to get suggestions on |
| * @param weight field weight |
| * |
| * @throws IllegalArgumentException if either the name or value is null, |
| * if value is an empty string, if the weight is negative, if value contains |
| * any reserved characters |
| */ |
| public SuggestField(String name, String value, int weight) { |
| super(name, value, FIELD_TYPE); |
| if (weight < 0) { |
| throw new IllegalArgumentException("weight must be >= 0"); |
| } |
| if (value.length() == 0) { |
| throw new IllegalArgumentException("value must have a length > 0"); |
| } |
| for (int i = 0; i < value.length(); i++) { |
| if (isReserved(value.charAt(i))) { |
| throw new IllegalArgumentException("Illegal input [" + value + "] UTF-16 codepoint [0x" |
| + Integer.toHexString((int) value.charAt(i))+ "] at position " + i + " is a reserved character"); |
| } |
| } |
| this.surfaceForm = new BytesRef(value); |
| this.weight = weight; |
| } |
| |
| @Override |
| public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { |
| CompletionTokenStream completionStream = wrapTokenStream(super.tokenStream(analyzer, reuse)); |
| completionStream.setPayload(buildSuggestPayload()); |
| return completionStream; |
| } |
| |
| /** |
| * Wraps a <code>stream</code> with a CompletionTokenStream. |
| * |
| * Subclasses can override this method to change the indexing pipeline. |
| */ |
| protected CompletionTokenStream wrapTokenStream(TokenStream stream) { |
| if (stream instanceof CompletionTokenStream) { |
| return (CompletionTokenStream) stream; |
| } else { |
| return new CompletionTokenStream(stream); |
| } |
| } |
| |
| /** |
| * Returns a byte to denote the type of the field |
| */ |
| protected byte type() { |
| return TYPE; |
| } |
| |
| private BytesRef buildSuggestPayload() { |
| ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); |
| try (OutputStreamDataOutput output = new OutputStreamDataOutput(byteArrayOutputStream)) { |
| output.writeVInt(surfaceForm.length); |
| output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length); |
| output.writeVInt(weight + 1); |
| output.writeByte(type()); |
| } catch (IOException e) { |
| throw new RuntimeException(e); // not possible, it's a ByteArrayOutputStream! |
| } |
| return new BytesRef(byteArrayOutputStream.toByteArray()); |
| } |
| |
| private boolean isReserved(char c) { |
| switch (c) { |
| case ConcatenateGraphFilter.SEP_LABEL: |
| case CompletionAnalyzer.HOLE_CHARACTER: |
| case NRTSuggesterBuilder.END_BYTE: |
| return true; |
| default: |
| return false; |
| } |
| } |
| } |