blob: a9fb2ce7d35e365931989f37cfea21ce38cd8114 [file] [log] [blame]
package org.apache.lucene.util;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.PForDeltaDocIdSet;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.WAH8DocIdSet;
import org.apache.lucene.util.packed.EliasFanoDocIdSet;
public class TestDocIdSetBenchmark extends LuceneTestCase {
private static class ScoresRegister {
private final Map<Object, Map<Float, Map<Class<? extends DocIdSet>, Float>>> scores;
ScoresRegister() {
scores = new LinkedHashMap<>();
}
public void registerScore(Object key, Float loadFactor, DocIdSet set, float score) {
Map<Float, Map<Class<? extends DocIdSet>, Float>> m1 = scores.get(key);
if (m1 == null) {
m1 = new LinkedHashMap<>();
scores.put(key, m1);
}
Map<Class<? extends DocIdSet>, Float> m2 = m1.get(loadFactor);
if (m2 == null) {
m2 = new LinkedHashMap<>();
m1.put(loadFactor, m2);
}
Float s = m2.get(set.getClass());
if (s != null) {
score = Math.max(s, score);
}
m2.put(set.getClass(), score);
}
public void printChartsTables() {
for (Map.Entry<Object, Map<Float, Map<Class<? extends DocIdSet>, Float>>> entry : scores.entrySet()) {
System.out.println("#### " + entry.getKey());
Map<Float, Map<Class<? extends DocIdSet>, Float>> m2 = entry.getValue();
System.out.print("['log10(loadFactor)', '" + FixedBitSet.class.getSimpleName() + "'");
for (Class<?> cls : m2.values().iterator().next().keySet()) {
if (cls != FixedBitSet.class) {
System.out.print(", '" + cls.getSimpleName() + "'");
}
}
System.out.println("],");
for (Map.Entry<Float, Map<Class<? extends DocIdSet>, Float>> entry2 : m2.entrySet()) {
final float loadFactor = entry2.getKey();
final Map<Class<? extends DocIdSet>, Float> scores = entry2.getValue();
final double fbsScore = scores.get(FixedBitSet.class);
System.out.print("[" + Math.log10(loadFactor) + ",0");
for (Map.Entry<Class<? extends DocIdSet>, Float> score : scores.entrySet()) {
if (score.getKey() != FixedBitSet.class) {
System.out.print("," + Math.log(score.getValue() / fbsScore) / Math.log(2));
}
}
System.out.println("],");
}
}
}
}
private static int DUMMY; // to prevent from JVM optimizations
private static final long SECOND = 1000L * 1000L * 1000L; // in ns
private static final Random RANDOM = new Random();
private static DocIdSetFactory[] FACTORIES = new DocIdSetFactory[] {
new DocIdSetFactory() {
@Override
public DocIdSet copyOf(FixedBitSet set) throws IOException {
return new WAH8DocIdSet.Builder().add(set.iterator()).build();
}
},
new DocIdSetFactory() {
@Override
public DocIdSet copyOf(FixedBitSet set) throws IOException {
return new PForDeltaDocIdSet.Builder().add(set.iterator()).build();
}
},
new DocIdSetFactory() {
@Override
public DocIdSet copyOf(FixedBitSet set) throws IOException {
int numValues = set.cardinality();
int upperBound = set.prevSetBit(set.length() - 1);
if (true || EliasFanoDocIdSet.sufficientlySmallerThanBitSet(numValues, upperBound)) {
final EliasFanoDocIdSet copy = new EliasFanoDocIdSet(numValues, upperBound);
copy.encodeFromDisi(set.iterator());
return copy;
} else {
return set;
}
}
}
};
private static final int MAX_DOC = 1 << 28;
//private static final int MAX_DOC = 1 << 24;
//private static final int MAX_DOC = 1 << 22;
//private static float[] LOAD_FACTORS = new float[] {0.00001f, 0.0001f, 0.001f, 0.01f, 0.1f, 0.5f, 0.9f, 1f};
private static float[] LOAD_FACTORS = new float[] {0.000001f, 0.00001f, 0.0001f, 0.001f, 0.01f, 0.1f, 0.14f, 0.15f, 0.9f, 1f};
//private static float[] LOAD_FACTORS = new float[] {0.1f, 1.0f};
private static abstract class DocIdSetFactory {
public abstract DocIdSet copyOf(FixedBitSet set) throws IOException;
}
protected static FixedBitSet randomSet(int numBits, int numBitsSet) {
assert numBitsSet <= numBits;
final FixedBitSet set = new FixedBitSet(numBits);
if (numBitsSet == numBits) {
set.set(0, numBits);
} else {
for (int i = 0; i < numBitsSet; ++i) {
while (true) {
final int o = RANDOM.nextInt(numBits);
if (!set.get(o)) {
set.set(o);
break;
}
}
}
}
return set;
}
protected static FixedBitSet randomSet(int numBits, float percentSet) {
return randomSet(numBits, (int) (percentSet * numBits));
}
private static int exhaustIterator(DocIdSet set) throws IOException {
int dummy = 0;
final DocIdSetIterator it = set.iterator();
// System.out.println("exhaustIterator set " + set.toString());
// assert it != null : "exhaustIterator set " + set.toString();
if (it != null) {
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
dummy += doc;
}
}
return dummy;
}
private static int exhaustIterator(DocIdSet set, int increment) throws IOException {
int dummy = 0;
final DocIdSetIterator it = set.iterator();
if (it != null) {
for (int doc = -1; doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.advance(doc + increment)) {
dummy += doc;
}
}
return dummy;
}
private static Collection<DocIdSet> sets(int numBits, float load) throws IOException {
final FixedBitSet fixedSet = randomSet(numBits, load);
final List<DocIdSet> sets = new ArrayList<DocIdSet>();
sets.add(fixedSet);
for (DocIdSetFactory factory : FACTORIES) {
sets.add(factory.copyOf(fixedSet));
}
return sets;
}
public static float score(DocIdSet set) throws IOException {
final long start = System.nanoTime();
int dummy = 0;
long score = 0;
while (System.nanoTime() - start < SECOND) {
dummy += exhaustIterator(set);
++score;
}
DUMMY += dummy;
return ((float)score) / (System.nanoTime() - start);
}
public static float score(DocIdSet set, int inc) throws IOException {
final long start = System.nanoTime();
int dummy = 0;
long score = 0;
while (System.nanoTime() - start < SECOND) {
dummy += exhaustIterator(set, inc);
++score;
}
DUMMY += dummy;
return ((float)score) / (System.nanoTime() - start);
}
public static void main(String[] args) throws IOException, InterruptedException {
ScoresRegister reg = new ScoresRegister();
for (float loadFactor : LOAD_FACTORS) {
for (DocIdSet set : sets(MAX_DOC, loadFactor)) {
final long memSize = RamUsageEstimator.sizeOf(set);
reg.registerScore("memory", loadFactor, set, memSize);
System.out.println(set.getClass().getSimpleName() + "\t" + loadFactor + "\t" + memSize);
}
}
System.out.println();
System.out.println("JVM warm-up");
long start = System.nanoTime();
while (System.nanoTime() - start < 30 * SECOND) {
//while (System.nanoTime() - start < 3 * SECOND) {
//final int numBits = 1 << 18;
final int numBits = 1 << 14;
for (float loadFactor : LOAD_FACTORS) {
for (DocIdSet set : sets(numBits, loadFactor)) {
score(set);
score(set, 1);
score(set, 701);
}
}
}
// Start the test
System.out.println("LoadFactor\tBenchmark\tImplementation\tScore");
//for (int i = 0; i < 5; ++i) {
for (int i = 0; i < 3; ++i) {
for (float load : LOAD_FACTORS) {
final Collection<DocIdSet> sets = sets(MAX_DOC, load);
// Free memory so that GC doesn't kick in while the benchmark is running
System.gc();
Thread.sleep(5 * 1000);
for (DocIdSet set : sets) {
final float score = score(set);
reg.registerScore("nextDoc", load, set, score);
System.out.println(load + "\tnextDoc()\t" + set.getClass().getSimpleName() + "\t" + score(set));
}
//for (int inc : new int[] {1, 31, 313, 3571, 33533, 319993}) { // primes
// for (int inc : new int[] {3571, 33533, 319993}) { // primes
for (int inc : new int[] {1, 31, 313, 3571, 33533, 319993}) { // primes
final String key = "advance(" + inc + ")";
for (DocIdSet set : sets) {
final float score = score(set, inc);
reg.registerScore(key, load, set, score);
System.out.println(load + "\t" + key + "\t" + set.getClass().getSimpleName() + "\t" + score);
}
}
}
}
System.out.println("DONE " + DUMMY);
System.out.println("Tables for Google charts:");
reg.printChartsTables();
}
public void testPerf() throws IOException, InterruptedException {
main(null);
}
}