blob: c5cd82fbc64a6a37e67c9a3e2c5429e9a1a37e0e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.spelling.suggest;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.lang.invoke.MethodHandles;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.HighFrequencyDictionary;
import org.apache.lucene.search.spell.SuggestMode;
import org.apache.lucene.search.suggest.FileDictionary;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester;
import org.apache.lucene.search.suggest.fst.WFSTCompletionLookup;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.CloseHook;
import org.apache.solr.core.SolrCore;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.spelling.SolrSpellChecker;
import org.apache.solr.spelling.SpellingOptions;
import org.apache.solr.spelling.SpellingResult;
import org.apache.solr.spelling.Token;
import org.apache.solr.spelling.suggest.fst.FSTLookupFactory;
import org.apache.solr.spelling.suggest.jaspell.JaspellLookupFactory;
import org.apache.solr.spelling.suggest.tst.TSTLookupFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class Suggester extends SolrSpellChecker {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
/** Location of the source data - either a path to a file, or null for the
* current IndexReader.
*/
public static final String LOCATION = "sourceLocation";
/** Fully-qualified class of the {@link Lookup} implementation. */
public static final String LOOKUP_IMPL = "lookupImpl";
/**
* Minimum frequency of terms to consider when building the dictionary.
*/
public static final String THRESHOLD_TOKEN_FREQUENCY = "threshold";
/**
* Name of the location where to persist the dictionary. If this location
* is relative then the data will be stored under the core's dataDir. If this
* is null the storing will be disabled.
*/
public static final String STORE_DIR = "storeDir";
protected String sourceLocation;
protected File storeDir;
protected float threshold;
protected Dictionary dictionary;
protected IndexReader reader;
protected Lookup lookup;
protected String lookupImpl;
protected SolrCore core;
private LookupFactory factory;
@Override
public String init(@SuppressWarnings({"rawtypes"})NamedList config, SolrCore core) {
log.info("init: {}", config);
String name = super.init(config, core);
threshold = config.get(THRESHOLD_TOKEN_FREQUENCY) == null ? 0.0f
: (Float)config.get(THRESHOLD_TOKEN_FREQUENCY);
sourceLocation = (String) config.get(LOCATION);
lookupImpl = (String)config.get(LOOKUP_IMPL);
// support the old classnames without -Factory for config file backwards compatibility.
if (lookupImpl == null || "org.apache.solr.spelling.suggest.jaspell.JaspellLookup".equals(lookupImpl)) {
lookupImpl = JaspellLookupFactory.class.getName();
} else if ("org.apache.solr.spelling.suggest.tst.TSTLookup".equals(lookupImpl)) {
lookupImpl = TSTLookupFactory.class.getName();
} else if ("org.apache.solr.spelling.suggest.fst.FSTLookup".equals(lookupImpl)) {
lookupImpl = FSTLookupFactory.class.getName();
}
factory = core.getResourceLoader().newInstance(lookupImpl, LookupFactory.class);
lookup = factory.create(config, core);
core.addCloseHook(new CloseHook() {
@Override
public void preClose(SolrCore core) {
if (lookup != null && lookup instanceof Closeable) {
try {
((Closeable) lookup).close();
} catch (IOException e) {
log.warn("Could not close the suggester lookup.", e);
}
}
}
@Override
public void postClose(SolrCore core) {}
});
String store = (String)config.get(STORE_DIR);
if (store != null) {
storeDir = new File(store);
if (!storeDir.isAbsolute()) {
storeDir = new File(core.getDataDir() + File.separator + storeDir);
}
if (!storeDir.exists()) {
storeDir.mkdirs();
} else {
// attempt reload of the stored lookup
try {
lookup.load(new FileInputStream(new File(storeDir, factory.storeFileName())));
} catch (IOException e) {
log.warn("Loading stored lookup data failed", e);
}
}
}
return name;
}
@Override
public void build(SolrCore core, SolrIndexSearcher searcher) throws IOException {
log.info("build()");
if (sourceLocation == null) {
reader = searcher.getIndexReader();
dictionary = new HighFrequencyDictionary(reader, field, threshold);
} else {
try {
dictionary = new FileDictionary(new InputStreamReader(
core.getResourceLoader().openResource(sourceLocation), StandardCharsets.UTF_8));
} catch (UnsupportedEncodingException e) {
// should not happen
log.error("should not happen", e);
}
}
lookup.build(dictionary);
if (storeDir != null) {
File target = new File(storeDir, factory.storeFileName());
if(!lookup.store(new FileOutputStream(target))) {
if (sourceLocation == null) {
assert reader != null && field != null;
log.error("Store Lookup build from index on field: {} failed reader has: {} docs", field, reader.maxDoc());
} else {
log.error("Store Lookup build from sourceloaction: {} failed", sourceLocation);
}
} else {
if (log.isInfoEnabled()) {
log.info("Stored suggest data to: {}", target.getAbsolutePath());
}
}
}
}
@Override
public void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException {
log.info("reload()");
if (dictionary == null && storeDir != null) {
// this may be a firstSearcher event, try loading it
FileInputStream is = new FileInputStream(new File(storeDir, factory.storeFileName()));
try {
if (lookup.load(is)) {
return; // loaded ok
}
} finally {
IOUtils.closeWhileHandlingException(is);
}
log.debug("load failed, need to build Lookup again");
}
// loading was unsuccessful - build it again
build(core, searcher);
}
static SpellingResult EMPTY_RESULT = new SpellingResult();
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
log.debug("getSuggestions: {}", options.tokens);
if (lookup == null) {
log.info("Lookup is null - invoke spellchecker.build first");
return EMPTY_RESULT;
}
SpellingResult res = new SpellingResult();
CharsRef scratch = new CharsRef();
for (Token t : options.tokens) {
scratch.chars = t.buffer();
scratch.offset = 0;
scratch.length = t.length();
boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) &&
!(lookup instanceof WFSTCompletionLookup) &&
!(lookup instanceof AnalyzingSuggester);
List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count);
if (suggestions == null) {
continue;
}
if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
Collections.sort(suggestions);
}
for (LookupResult lr : suggestions) {
res.add(t, lr.key.toString(), (int)lr.value);
}
}
return res;
}
}