blob: 9f9190a577333a40694d6ac2141abc56481085e7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.suggest;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.lang.reflect.Constructor;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
import java.util.Random;
import java.util.concurrent.Callable;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.search.suggest.analyzing.AnalyzingSuggester;
import org.apache.lucene.search.suggest.analyzing.BlendedInfixSuggester;
import org.apache.lucene.search.suggest.analyzing.FreeTextSuggester;
import org.apache.lucene.search.suggest.analyzing.FuzzySuggester;
import org.apache.lucene.search.suggest.fst.FSTCompletionLookup;
import org.apache.lucene.search.suggest.fst.WFSTCompletionLookup;
import org.apache.lucene.search.suggest.jaspell.JaspellLookup;
import org.apache.lucene.search.suggest.tst.TSTLookup;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.*;
import org.junit.BeforeClass;
import org.junit.Ignore;
/**
* Benchmarks tests for implementations of {@link Lookup} interface.
*/
@Ignore("COMMENT ME TO RUN BENCHMARKS!")
public class LookupBenchmarkTest extends LuceneTestCase {
@SuppressWarnings("unchecked")
private final List<Class<? extends Lookup>> benchmarkClasses = Arrays.asList(
FuzzySuggester.class,
AnalyzingSuggester.class,
AnalyzingInfixSuggester.class,
JaspellLookup.class,
TSTLookup.class,
FSTCompletionLookup.class,
WFSTCompletionLookup.class,
BlendedInfixSuggester.class,
FreeTextSuggester.class
);
private final static int rounds = 15;
private final static int warmup = 5;
private final int num = 7;
private final boolean onlyMorePopular = false;
private final static Random random = new Random(0xdeadbeef);
/**
* Input term/weight pairs.
*/
private static Input [] dictionaryInput;
/**
* Benchmark term/weight pairs (randomized order).
*/
private static List<Input> benchmarkInput;
/**
* Loads terms and frequencies from Wikipedia (cached).
*/
@BeforeClass
public static void setup() throws Exception {
assert false : "disable assertions before running benchmarks!";
List<Input> input = readTop50KWiki();
Collections.shuffle(input, random);
LookupBenchmarkTest.dictionaryInput = input.toArray(new Input [input.size()]);
Collections.shuffle(input, random);
LookupBenchmarkTest.benchmarkInput = input;
}
static final Charset UTF_8 = StandardCharsets.UTF_8;
/**
* Collect the multilingual input for benchmarks/ tests.
*/
public static List<Input> readTop50KWiki() throws Exception {
List<Input> input = new ArrayList<>();
URL resource = LookupBenchmarkTest.class.getResource("Top50KWiki.utf8");
assert resource != null : "Resource missing: Top50KWiki.utf8";
String line = null;
BufferedReader br = new BufferedReader(new InputStreamReader(resource.openStream(), UTF_8));
while ((line = br.readLine()) != null) {
int tab = line.indexOf('|');
assertTrue("No | separator?: " + line, tab >= 0);
int weight = Integer.parseInt(line.substring(tab + 1));
String key = line.substring(0, tab);
input.add(new Input(key, weight));
}
br.close();
return input;
}
/**
* Test construction time.
*/
public void testConstructionTime() throws Exception {
System.err.println("-- construction time");
for (final Class<? extends Lookup> cls : benchmarkClasses) {
BenchmarkResult result = measure(new Callable<Integer>() {
@Override
public Integer call() throws Exception {
final Lookup lookup = buildLookup(cls, dictionaryInput);
return lookup.hashCode();
}
});
System.err.println(
String.format(Locale.ROOT, "%-15s input: %d, time[ms]: %s",
cls.getSimpleName(),
dictionaryInput.length,
result.average.toString()));
}
}
/**
* Test memory required for the storage.
*/
public void testStorageNeeds() throws Exception {
System.err.println("-- RAM consumption");
for (Class<? extends Lookup> cls : benchmarkClasses) {
Lookup lookup = buildLookup(cls, dictionaryInput);
long sizeInBytes = lookup.ramBytesUsed();
System.err.println(
String.format(Locale.ROOT, "%-15s size[B]:%,13d",
lookup.getClass().getSimpleName(),
sizeInBytes));
}
}
/**
* Create {@link Lookup} instance and populate it.
*/
private Lookup buildLookup(Class<? extends Lookup> cls, Input[] input) throws Exception {
Lookup lookup = null;
if (cls == TSTLookup.class || cls == FSTCompletionLookup.class || cls == WFSTCompletionLookup.class) {
Constructor<? extends Lookup> ctor = cls.getConstructor(Directory.class, String.class);
lookup = ctor.newInstance(FSDirectory.open(createTempDir("LookupBenchmarkTest")), "test");
} else {
try {
lookup = cls.getConstructor().newInstance();
} catch (InstantiationException | NoSuchMethodException e) {
Analyzer a = new MockAnalyzer(random, MockTokenizer.KEYWORD, false);
if (cls == AnalyzingInfixSuggester.class || cls == BlendedInfixSuggester.class) {
Constructor<? extends Lookup> ctor = cls.getConstructor(Directory.class, Analyzer.class);
lookup = ctor.newInstance(FSDirectory.open(createTempDir("LookupBenchmarkTest")), a);
} else if (cls == AnalyzingSuggester.class) {
lookup = new AnalyzingSuggester(FSDirectory.open(createTempDir("LookupBenchmarkTest")), "test", a);
} else if (cls == FuzzySuggester.class) {
lookup = new FuzzySuggester(FSDirectory.open(createTempDir("LookupBenchmarkTest")), "test", a);
} else {
Constructor<? extends Lookup> ctor = cls.getConstructor(Analyzer.class);
lookup = ctor.newInstance(a);
}
}
}
lookup.build(new InputArrayIterator(input));
return lookup;
}
/**
* Test performance of lookup on full hits.
*/
public void testPerformanceOnFullHits() throws Exception {
final int minPrefixLen = 100;
final int maxPrefixLen = 200;
runPerformanceTest(minPrefixLen, maxPrefixLen, num, onlyMorePopular);
}
/**
* Test performance of lookup on longer term prefixes (6-9 letters or shorter).
*/
public void testPerformanceOnPrefixes6_9() throws Exception {
final int minPrefixLen = 6;
final int maxPrefixLen = 9;
runPerformanceTest(minPrefixLen, maxPrefixLen, num, onlyMorePopular);
}
/**
* Test performance of lookup on short term prefixes (2-4 letters or shorter).
*/
public void testPerformanceOnPrefixes2_4() throws Exception {
final int minPrefixLen = 2;
final int maxPrefixLen = 4;
runPerformanceTest(minPrefixLen, maxPrefixLen, num, onlyMorePopular);
}
/**
* Run the actual benchmark.
*/
public void runPerformanceTest(final int minPrefixLen, final int maxPrefixLen,
final int num, final boolean onlyMorePopular) throws Exception {
System.err.println(String.format(Locale.ROOT,
"-- prefixes: %d-%d, num: %d, onlyMorePopular: %s",
minPrefixLen, maxPrefixLen, num, onlyMorePopular));
for (Class<? extends Lookup> cls : benchmarkClasses) {
final Lookup lookup = buildLookup(cls, dictionaryInput);
final List<String> input = new ArrayList<>(benchmarkInput.size());
for (Input tf : benchmarkInput) {
String s = tf.term.utf8ToString();
String sub = s.substring(0, Math.min(s.length(),
minPrefixLen + random.nextInt(maxPrefixLen - minPrefixLen + 1)));
input.add(sub);
}
BenchmarkResult result = measure(new Callable<Integer>() {
@Override
public Integer call() throws Exception {
int v = 0;
for (String term : input) {
v += lookup.lookup(term, onlyMorePopular, num).size();
}
return v;
}
});
System.err.println(
String.format(Locale.ROOT, "%-15s queries: %d, time[ms]: %s, ~kQPS: %.0f",
lookup.getClass().getSimpleName(),
input.size(),
result.average.toString(),
input.size() / result.average.avg));
}
}
/**
* Do the measurements.
*/
private BenchmarkResult measure(Callable<Integer> callable) {
final double NANOS_PER_MS = 1000000;
try {
List<Double> times = new ArrayList<>();
for (int i = 0; i < warmup + rounds; i++) {
final long start = System.nanoTime();
guard = callable.call().intValue();
times.add((System.nanoTime() - start) / NANOS_PER_MS);
}
return new BenchmarkResult(times, warmup, rounds);
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
/** Guard against opts. */
@SuppressWarnings("unused")
private static volatile int guard;
private static class BenchmarkResult {
/** Average time per round (ms). */
public final Average average;
public BenchmarkResult(List<Double> times, int warmup, int rounds) {
this.average = Average.from(times.subList(warmup, times.size()));
}
}
}