blob: 464b593da80bb78f48b97bcc05ac1dc856d73ae5 [file] [log] [blame]
package org.apache.lucene.facet.sampling;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Parameters for sampling, dictating whether sampling is to take place and how.
*
* @lucene.experimental
*/
public class SamplingParams {
/**
* Default factor by which more results are requested over the sample set.
* @see SamplingParams#getOversampleFactor()
*/
public static final double DEFAULT_OVERSAMPLE_FACTOR = 1d;
/**
* Default ratio between size of sample to original size of document set.
* @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
*/
public static final double DEFAULT_SAMPLE_RATIO = 0.01;
/**
* Default maximum size of sample.
* @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
*/
public static final int DEFAULT_MAX_SAMPLE_SIZE = 10000;
/**
* Default minimum size of sample.
* @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
*/
public static final int DEFAULT_MIN_SAMPLE_SIZE = 100;
/**
* Default sampling threshold, if number of results is less than this number - no sampling will take place
* @see SamplingParams#getSampleRatio()
*/
public static final int DEFAULT_SAMPLING_THRESHOLD = 75000;
private int maxSampleSize = DEFAULT_MAX_SAMPLE_SIZE;
private int minSampleSize = DEFAULT_MIN_SAMPLE_SIZE;
private double sampleRatio = DEFAULT_SAMPLE_RATIO;
private int samplingThreshold = DEFAULT_SAMPLING_THRESHOLD;
private double oversampleFactor = DEFAULT_OVERSAMPLE_FACTOR;
private SampleFixer sampleFixer = null;
/**
* Return the maxSampleSize.
* In no case should the resulting sample size exceed this value.
* @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
*/
public final int getMaxSampleSize() {
return maxSampleSize;
}
/**
* Return the minSampleSize.
* In no case should the resulting sample size be smaller than this value.
* @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
*/
public final int getMinSampleSize() {
return minSampleSize;
}
/**
* @return the sampleRatio
* @see Sampler#getSampleSet(org.apache.lucene.facet.search.ScoredDocIDs)
*/
public final double getSampleRatio() {
return sampleRatio;
}
/**
* Return the samplingThreshold.
* Sampling would be performed only for document sets larger than this.
*/
public final int getSamplingThreshold() {
return samplingThreshold;
}
/**
* @param maxSampleSize
* the maxSampleSize to set
* @see #getMaxSampleSize()
*/
public void setMaxSampleSize(int maxSampleSize) {
this.maxSampleSize = maxSampleSize;
}
/**
* @param minSampleSize
* the minSampleSize to set
* @see #getMinSampleSize()
*/
public void setMinSampleSize(int minSampleSize) {
this.minSampleSize = minSampleSize;
}
/**
* @param sampleRatio
* the sampleRatio to set
* @see #getSampleRatio()
*/
public void setSampleRatio(double sampleRatio) {
this.sampleRatio = sampleRatio;
}
/**
* Set a sampling-threshold
* @see #getSamplingThreshold()
*/
public void setSamplingThreshold(int samplingThreshold) {
this.samplingThreshold = samplingThreshold;
}
/**
* Check validity of sampling settings, making sure that
* <ul>
* <li> <code>minSampleSize <= maxSampleSize <= samplingThreshold </code></li>
* <li> <code>0 < samplingRatio <= 1 </code></li>
* </ul>
*
* @return true if valid, false otherwise
*/
public boolean validate() {
return
samplingThreshold >= maxSampleSize &&
maxSampleSize >= minSampleSize &&
sampleRatio > 0 &&
sampleRatio < 1;
}
/**
* Return the oversampleFactor. When sampling, we would collect that much more
* results, so that later, when selecting top out of these, chances are higher
* to get actual best results. Note that having this value larger than 1 only
* makes sense when using a SampleFixer which finds accurate results, such as
* <code>TakmiSampleFixer</code>. When this value is smaller than 1, it is
* ignored and no oversampling takes place.
*/
public final double getOversampleFactor() {
return oversampleFactor;
}
/**
* @param oversampleFactor the oversampleFactor to set
* @see #getOversampleFactor()
*/
public void setOversampleFactor(double oversampleFactor) {
this.oversampleFactor = oversampleFactor;
}
/**
* @return {@link SampleFixer} to be used while fixing the sampled results, if
* <code>null</code> no fixing will be performed
*/
public SampleFixer getSampleFixer() {
return sampleFixer;
}
/**
* Set a {@link SampleFixer} to be used while fixing the sampled results.
* {@code null} means no fixing will be performed
*/
public void setSampleFixer(SampleFixer sampleFixer) {
this.sampleFixer = sampleFixer;
}
/**
* Returns whether over-sampling should be done. By default returns
* {@code true} when {@link #getSampleFixer()} is not {@code null} and
* {@link #getOversampleFactor()} &gt; 1, {@code false} otherwise.
*/
public boolean shouldOverSample() {
return sampleFixer != null && oversampleFactor > 1d;
}
}