| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.lucene.search.suggest.document; |
| |
| import java.io.IOException; |
| |
| import org.apache.lucene.codecs.CodecUtil; |
| import org.apache.lucene.codecs.FieldsConsumer; |
| import org.apache.lucene.codecs.FieldsProducer; |
| import org.apache.lucene.codecs.PostingsFormat; |
| import org.apache.lucene.index.FieldInfos; |
| import org.apache.lucene.index.SegmentReadState; |
| import org.apache.lucene.index.SegmentWriteState; |
| import org.apache.lucene.store.DataOutput; |
| import org.apache.lucene.util.fst.FST; |
| |
| /** |
| * <p> |
| * A {@link PostingsFormat} which supports document suggestion based on |
| * indexed {@link SuggestField}s. |
| * Document suggestion is based on an weighted FST which map analyzed |
| * terms of a {@link SuggestField} to its surface form and document id. |
| * </p> |
| * <p> |
| * Files: |
| * <ul> |
| * <li><tt>.lkp</tt>: <a href="#Completiondictionary">Completion Dictionary</a></li> |
| * <li><tt>.cmp</tt>: <a href="#Completionindex">Completion Index</a></li> |
| * </ul> |
| * <p> |
| * <a name="Completionictionary"></a> |
| * <h3>Completion Dictionary</h3> |
| * <p>The .lkp file contains an FST for each suggest field |
| * </p> |
| * <ul> |
| * <li>CompletionDict (.lkp) --> Header, FST<sup>NumSuggestFields</sup>, Footer</li> |
| * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li> |
| * <!-- TODO: should the FST output be mentioned at all? --> |
| * <li>FST --> {@link FST FST<Long, BytesRef>}</li> |
| * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>Notes:</p> |
| * <ul> |
| * <li>Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information |
| * for the Completion implementation.</li> |
| * <li>FST maps all analyzed forms to surface forms of a SuggestField</li> |
| * </ul> |
| * <a name="Completionindex"></a> |
| * <h3>Completion Index</h3> |
| * <p>The .cmp file contains an index into the completion dictionary, so that it can be |
| * accessed randomly.</p> |
| * <ul> |
| * <li>CompletionIndex (.cmp) --> Header, NumSuggestFields, Entry<sup>NumSuggestFields</sup>, Footer</li> |
| * <li>Header --> {@link CodecUtil#writeHeader CodecHeader}</li> |
| * <li>NumSuggestFields --> {@link DataOutput#writeVInt Uint32}</li> |
| * <li>Entry --> FieldNumber, CompletionDictionaryOffset, MinWeight, MaxWeight, Type</li> |
| * <li>FieldNumber --> {@link DataOutput#writeVInt Uint32}</li> |
| * <li>CompletionDictionaryOffset --> {@link DataOutput#writeVLong Uint64}</li> |
| * <li>MinWeight --> {@link DataOutput#writeVLong Uint64}</li> |
| * <li>MaxWeight --> {@link DataOutput#writeVLong Uint64}</li> |
| * <li>Type --> {@link DataOutput#writeByte Byte}</li> |
| * <li>Footer --> {@link CodecUtil#writeFooter CodecFooter}</li> |
| * </ul> |
| * <p>Notes:</p> |
| * <ul> |
| * <li>Header is a {@link CodecUtil#writeHeader CodecHeader} storing the version information |
| * for the Completion implementation.</li> |
| * <li>NumSuggestFields is the number of suggest fields indexed</li> |
| * <li>FieldNumber is the fields number from {@link FieldInfos}. (.fnm)</li> |
| * <li>CompletionDictionaryOffset is the file offset of a field's FST in CompletionDictionary (.lkp)</li> |
| * <li>MinWeight and MaxWeight are the global minimum and maximum weight for the field</li> |
| * <li>Type indicates if the suggester has context or not</li> |
| * </ul> |
| * |
| * @lucene.experimental |
| */ |
| public abstract class CompletionPostingsFormat extends PostingsFormat { |
| |
| static final int COMPLETION_CODEC_VERSION = 1; |
| static final int COMPLETION_VERSION_CURRENT = COMPLETION_CODEC_VERSION; |
| static final String INDEX_EXTENSION = "cmp"; |
| static final String DICT_EXTENSION = "lkp"; |
| |
| /** |
| * An enum that allows to control if suggester FSTs are loaded into memory or read off-heap |
| */ |
| public enum FSTLoadMode { |
| /** |
| * Always read FSTs from disk. |
| * NOTE: If this option is used the FST will be read off-heap even if buffered directory implementations |
| * are used. |
| */ |
| OFF_HEAP, |
| /** |
| * Never read FSTs from disk ie. all suggest fields FSTs are loaded into memory |
| */ |
| ON_HEAP, |
| /** |
| * Automatically make the decision if FSTs are read from disk depending if the segment read from an MMAPDirectory |
| */ |
| AUTO |
| } |
| |
| private final FSTLoadMode fstLoadMode; |
| |
| /** |
| * Used only by core Lucene at read-time via Service Provider instantiation |
| */ |
| public CompletionPostingsFormat(String name) { |
| this(name, FSTLoadMode.ON_HEAP); |
| } |
| |
| /** |
| * Creates a {@link CompletionPostingsFormat} that will |
| * use the provided <code>fstLoadMode</code> to determine |
| * if the completion FST should be loaded on or off heap. |
| */ |
| public CompletionPostingsFormat(String name, FSTLoadMode fstLoadMode) { |
| super(name); |
| this.fstLoadMode = fstLoadMode; |
| } |
| |
| /** |
| * Concrete implementation should specify the delegating postings format |
| */ |
| protected abstract PostingsFormat delegatePostingsFormat(); |
| |
| @Override |
| public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException { |
| PostingsFormat delegatePostingsFormat = delegatePostingsFormat(); |
| if (delegatePostingsFormat == null) { |
| throw new UnsupportedOperationException("Error - " + getClass().getName() |
| + " has been constructed without a choice of PostingsFormat"); |
| } |
| return new CompletionFieldsConsumer(getName(), delegatePostingsFormat, state); |
| } |
| |
| @Override |
| public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException { |
| return new CompletionFieldsProducer(getName(), state, fstLoadMode); |
| } |
| } |