blob: b8541c8c2b754e7b83f2ac370c4cc9c762f760ee [file] [log] [blame]
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java
index d93594d..a5e0a20 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/BaseSimilarityTestCase.java
@@ -193,21 +193,47 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
lowerBound = SmallFloat.byte4ToInt((byte) norm);
}
final long maxDoc;
- if (random.nextBoolean()) {
- // small collection
- maxDoc = TestUtil.nextLong(random, 1, 100000);
- } else {
- // yuge collection
- maxDoc = TestUtil.nextLong(random, 1, MAXDOC_FORTESTING);
+ switch (random.nextInt(6)) {
+ case 0:
+ // 1 doc collection
+ maxDoc = 1;
+ break;
+ case 1:
+ // 2 doc collection
+ maxDoc = 2;
+ break;
+ case 2:
+ // tiny collection
+ maxDoc = TestUtil.nextLong(random, 3, 16);
+ break;
+ case 3:
+ // small collection
+ maxDoc = TestUtil.nextLong(random, 16, 100000);
+ break;
+ case 4:
+ // big collection
+ maxDoc = TestUtil.nextLong(random, 100000, MAXDOC_FORTESTING);
+ break;
+ default:
+ // yuge collection
+ maxDoc = MAXDOC_FORTESTING;
+ break;
}
// TODO: make this a mandatory statistic, or test it with -1
final long docCount;
- if (random.nextBoolean()) {
- // sparse field
- docCount = TestUtil.nextLong(random, 1, maxDoc);
- } else {
- // fully populated
- docCount = maxDoc;
+ switch (random.nextInt(3)) {
+ case 0:
+ // sparsest field
+ docCount = 1;
+ break;
+ case 1:
+ // sparse field
+ docCount = TestUtil.nextLong(random, 1, maxDoc);
+ break;
+ default:
+ // fully populated
+ docCount = maxDoc;
+ break;
}
// random docsize: but can't require docs to have > 2B tokens
long upperBound;
@@ -218,15 +244,22 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
}
// TODO: make this a mandatory statistic, or test it with -1
final long sumDocFreq;
- if (random.nextBoolean()) {
- // shortest possible docs
- sumDocFreq = docCount;
- } else {
- // random docsize
- sumDocFreq = TestUtil.nextLong(random, docCount, upperBound + 1 - lowerBound);
+ switch (random.nextInt(3)) {
+ case 0:
+ // shortest possible docs
+ sumDocFreq = docCount;
+ break;
+ case 1:
+ // biggest possible docs
+ sumDocFreq = upperBound + 1 - lowerBound;
+ break;
+ default:
+ // random docsize
+ sumDocFreq = TestUtil.nextLong(random, docCount, upperBound + 1 - lowerBound);
+ break;
}
final long sumTotalTermFreq;
- switch (random.nextInt(3)) {
+ switch (random.nextInt(4)) {
case 0:
// unsupported (e.g. omitTF)
sumTotalTermFreq = -1;
@@ -235,6 +268,10 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
// no repetition of terms (except to satisfy this norm)
sumTotalTermFreq = sumDocFreq - 1 + lowerBound;
break;
+ case 2:
+ // maximum repetition of terms
+ sumTotalTermFreq = upperBound;
+ break;
default:
// random repetition
assert sumDocFreq - 1 + lowerBound <= upperBound;
@@ -251,29 +288,46 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
*/
static TermStatistics newTerm(Random random, CollectionStatistics corpus) {
final long docFreq;
- if (random.nextBoolean()) {
- // rare term
- docFreq = 1;
- } else {
- // random specificity
- docFreq = TestUtil.nextLong(random, 1, corpus.docCount());
+ switch (random.nextInt(3)) {
+ case 0:
+ // rare term
+ docFreq = 1;
+ break;
+ case 1:
+ // common term
+ docFreq = corpus.docCount();
+ break;
+ default:
+ // random specificity
+ docFreq = TestUtil.nextLong(random, 1, corpus.docCount());
+ break;
}
final long totalTermFreq;
+ // can't require docs to have > 2B tokens
+ long upperBound;
+ try {
+ upperBound = Math.min(corpus.sumTotalTermFreq(), Math.multiplyExact(docFreq, Integer.MAX_VALUE));
+ } catch (ArithmeticException overflow) {
+ upperBound = corpus.sumTotalTermFreq();
+ }
if (corpus.sumTotalTermFreq() == -1) {
// omitTF
totalTermFreq = -1;
- } else if (random.nextBoolean()) {
- // no repetition
- totalTermFreq = docFreq;
} else {
- // random repetition: but can't require docs to have > 2B tokens
- long upperBound;
- try {
- upperBound = Math.min(corpus.sumTotalTermFreq(), Math.multiplyExact(docFreq, Integer.MAX_VALUE));
- } catch (ArithmeticException overflow) {
- upperBound = corpus.sumTotalTermFreq();
+ switch (random.nextInt(3)) {
+ case 0:
+ // no repetition
+ totalTermFreq = docFreq;
+ break;
+ case 1:
+ // maximum repetition
+ totalTermFreq = upperBound;
+ break;
+ default:
+ // random repetition
+ totalTermFreq = TestUtil.nextLong(random, docFreq, upperBound);
+ break;
}
- totalTermFreq = TestUtil.nextLong(random, docFreq, upperBound);
}
return new TermStatistics(TERM, docFreq, totalTermFreq);
}
@@ -317,9 +371,34 @@ public abstract class BaseSimilarityTestCase extends LuceneTestCase {
// there is at least one other document, and those must have at least 1 instance each.
int upperBound = Math.toIntExact(Math.min(term.totalTermFreq() - term.docFreq() + 1, Integer.MAX_VALUE));
if (random.nextBoolean()) {
- freq = TestUtil.nextInt(random, 1, upperBound);
+ // integer freq
+ switch (random.nextInt(3)) {
+ case 0:
+ // smallest freq
+ freq = 1;
+ break;
+ case 1:
+ // largest freq
+ freq = upperBound;
+ break;
+ default:
+ // random freq
+ freq = TestUtil.nextInt(random, 1, upperBound);
+ break;
+ }
} else {
- float freqCandidate = upperBound * random.nextFloat();
+ // float freq
+ float freqCandidate;
+ switch (random.nextInt(2)) {
+ case 0:
+ // smallest freq
+ freqCandidate = Float.MIN_VALUE;
+ break;
+ default:
+ // random freq
+ freqCandidate = upperBound * random.nextFloat();
+ break;
+ }
// we need to be 2nd float value at a minimum, the pairwise test will check MIN_VALUE in this case.
// this avoids testing frequencies of 0 which seem wrong to allow (we should enforce computeSlopFactor etc)
if (freqCandidate <= Float.MIN_VALUE) {