blob: fca64552ae4f58a68e281b034c24211324e9cb4a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.codecs;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.TreeSet;
import org.apache.lucene.index.Impact;
/**
* This class accumulates the (freq, norm) pairs that may produce competitive scores.
*/
public final class CompetitiveImpactAccumulator {
// We speed up accumulation for common norm values with this array that maps
// norm values in -128..127 to the maximum frequency observed for these norm
// values
private final int[] maxFreqs;
// This TreeSet stores competitive (freq,norm) pairs for norm values that fall
// outside of -128..127. It is always empty with the default similarity, which
// encodes norms as bytes.
private final TreeSet<Impact> otherFreqNormPairs;
/** Sole constructor. */
public CompetitiveImpactAccumulator() {
maxFreqs = new int[256];
Comparator<Impact> comparator = new Comparator<Impact>() {
@Override
public int compare(Impact o1, Impact o2) {
// greater freqs compare greater
int cmp = Integer.compare(o1.freq, o2.freq);
if (cmp == 0) {
// greater norms compare lower
cmp = Long.compareUnsigned(o2.norm, o1.norm);
}
return cmp;
}
};
otherFreqNormPairs = new TreeSet<>(comparator);
}
/** Reset to the same state it was in after creation. */
public void clear() {
Arrays.fill(maxFreqs, 0);
otherFreqNormPairs.clear();
assert assertConsistent();
}
/** Accumulate a (freq,norm) pair, updating this structure if there is no
* equivalent or more competitive entry already. */
public void add(int freq, long norm) {
if (norm >= Byte.MIN_VALUE && norm <= Byte.MAX_VALUE) {
int index = Byte.toUnsignedInt((byte) norm);
maxFreqs[index] = Math.max(maxFreqs[index], freq);
} else {
add(new Impact(freq, norm), otherFreqNormPairs);
}
assert assertConsistent();
}
/** Merge {@code acc} into this. */
public void addAll(CompetitiveImpactAccumulator acc) {
int[] maxFreqs = this.maxFreqs;
int[] otherMaxFreqs = acc.maxFreqs;
for (int i = 0; i < maxFreqs.length; ++i) {
maxFreqs[i] = Math.max(maxFreqs[i], otherMaxFreqs[i]);
}
for (Impact entry : acc.otherFreqNormPairs) {
add(entry, otherFreqNormPairs);
}
assert assertConsistent();
}
/** Get the set of competitive freq and norm pairs, ordered by increasing freq and norm. */
public Collection<Impact> getCompetitiveFreqNormPairs() {
List<Impact> impacts = new ArrayList<>();
int maxFreqForLowerNorms = 0;
for (int i = 0; i < maxFreqs.length; ++i) {
int maxFreq = maxFreqs[i];
if (maxFreq > maxFreqForLowerNorms) {
impacts.add(new Impact(maxFreq, (byte) i));
maxFreqForLowerNorms = maxFreq;
}
}
if (otherFreqNormPairs.isEmpty()) {
// Common case: all norms are bytes
return impacts;
}
TreeSet<Impact> freqNormPairs = new TreeSet<>(this.otherFreqNormPairs);
for (Impact impact : impacts) {
add(impact, freqNormPairs);
}
return Collections.unmodifiableSet(freqNormPairs);
}
private void add(Impact newEntry, TreeSet<Impact> freqNormPairs) {
Impact next = freqNormPairs.ceiling(newEntry);
if (next == null) {
// nothing is more competitive
freqNormPairs.add(newEntry);
} else if (Long.compareUnsigned(next.norm, newEntry.norm) <= 0) {
// we already have this entry or more competitive entries in the tree
return;
} else {
// some entries have a greater freq but a less competitive norm, so we
// don't know which one will trigger greater scores, still add to the tree
freqNormPairs.add(newEntry);
}
for (Iterator<Impact> it = freqNormPairs.headSet(newEntry, false).descendingIterator(); it.hasNext(); ) {
Impact entry = it.next();
if (Long.compareUnsigned(entry.norm, newEntry.norm) >= 0) {
// less competitive
it.remove();
} else {
// lesser freq but better norm, further entries are not comparable
break;
}
}
}
@Override
public String toString() {
return new ArrayList<>(getCompetitiveFreqNormPairs()).toString();
}
// Only called by assertions
private boolean assertConsistent() {
for (int freq : maxFreqs) {
assert freq >= 0;
}
int previousFreq = 0;
long previousNorm = 0;
for (Impact impact : otherFreqNormPairs) {
assert impact.norm < Byte.MIN_VALUE || impact.norm > Byte.MAX_VALUE;
assert previousFreq < impact.freq;
assert Long.compareUnsigned(previousNorm, impact.norm) < 0;
previousFreq = impact.freq;
previousNorm = impact.norm;
}
return true;
}
}