blob: 0925aee3ac670af753e986f03df4e866c3aa8f28 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.similarities;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
/**
* Similarity implementation that randomizes Similarity implementations
* per-field.
* <p>
* The choices are 'sticky', so the selected algorithm is always used
* for the same field.
*/
public class RandomSimilarity extends PerFieldSimilarityWrapper {
final BM25Similarity defaultSim = new BM25Similarity();
final List<Similarity> knownSims;
Map<String,Similarity> previousMappings = new HashMap<>();
final int perFieldSeed;
final boolean shouldQueryNorm;
public RandomSimilarity(Random random) {
perFieldSeed = random.nextInt();
shouldQueryNorm = random.nextBoolean();
knownSims = new ArrayList<>(allSims);
Collections.shuffle(knownSims, random);
}
@Override
public synchronized Similarity get(String field) {
assert field != null;
Similarity sim = previousMappings.get(field);
if (sim == null) {
sim = knownSims.get(Math.max(0, Math.abs(perFieldSeed ^ field.hashCode())) % knownSims.size());
previousMappings.put(field, sim);
}
return sim;
}
// all the similarities that we rotate through
/** The DFR basic models to test. */
static BasicModel[] BASIC_MODELS = {
new BasicModelG(), new BasicModelIF(), new BasicModelIn(), new BasicModelIne(),
};
/** The DFR aftereffects to test. */
static AfterEffect[] AFTER_EFFECTS = {
new AfterEffectB(), new AfterEffectL()
};
/** The DFR normalizations to test. */
static Normalization[] NORMALIZATIONS = {
new NormalizationH1(), new NormalizationH2(),
new NormalizationH3(), new NormalizationZ()
// TODO: if we enable NoNormalization, we have to deal with
// a couple tests (e.g. TestDocBoost, TestSort) that expect length normalization
// new Normalization.NoNormalization()
};
/** The distributions for IB. */
static Distribution[] DISTRIBUTIONS = {
new DistributionLL(), new DistributionSPL()
};
/** Lambdas for IB. */
static Lambda[] LAMBDAS = {
new LambdaDF(), new LambdaTTF()
};
/** Independence measures for DFI */
static Independence[] INDEPENDENCE_MEASURES = {
new IndependenceStandardized(), new IndependenceSaturated(), new IndependenceChiSquared()
};
static List<Similarity> allSims;
static {
allSims = new ArrayList<>();
allSims.add(new ClassicSimilarity());
allSims.add(new BM25Similarity());
allSims.add(new AxiomaticF1EXP());
allSims.add(new AxiomaticF1LOG());
allSims.add(new AxiomaticF2EXP());
allSims.add(new AxiomaticF2LOG());
allSims.add(new BooleanSimilarity());
for (BasicModel basicModel : BASIC_MODELS) {
for (AfterEffect afterEffect : AFTER_EFFECTS) {
for (Normalization normalization : NORMALIZATIONS) {
allSims.add(new DFRSimilarity(basicModel, afterEffect, normalization));
}
}
}
for (Distribution distribution : DISTRIBUTIONS) {
for (Lambda lambda : LAMBDAS) {
for (Normalization normalization : NORMALIZATIONS) {
allSims.add(new IBSimilarity(distribution, lambda, normalization));
}
}
}
allSims.add(new LMDirichletSimilarity());
allSims.add(new LMJelinekMercerSimilarity(0.1f));
allSims.add(new LMJelinekMercerSimilarity(0.7f));
for (Independence independence : INDEPENDENCE_MEASURES) {
allSims.add(new DFISimilarity(independence));
}
}
@Override
public synchronized String toString() {
return "RandomSimilarity(queryNorm=" + shouldQueryNorm + "): " + previousMappings.toString();
}
}