blob: 0b13e66e0d56d85d495138708da2bcb54cff50d5 [file] [log] [blame]
use crate::docset::{DocSet, TERMINATED};
use crate::query::Scorer;
use crate::{DocId, Score};
#[inline]
fn is_within<TDocSetExclude: DocSet>(docset: &mut TDocSetExclude, doc: DocId) -> bool {
docset.doc() <= doc && docset.seek(doc) == doc
}
/// Filters a given `DocSet` by removing the docs from a given `DocSet`.
///
/// The excluding docset has no impact on scoring.
pub struct Exclude<TDocSet, TDocSetExclude> {
underlying_docset: TDocSet,
excluding_docset: TDocSetExclude,
}
impl<TDocSet, TDocSetExclude> Exclude<TDocSet, TDocSetExclude>
where
TDocSet: DocSet,
TDocSetExclude: DocSet,
{
/// Creates a new `ExcludeScorer`
pub fn new(
mut underlying_docset: TDocSet,
mut excluding_docset: TDocSetExclude,
) -> Exclude<TDocSet, TDocSetExclude> {
while underlying_docset.doc() != TERMINATED {
let target = underlying_docset.doc();
if !is_within(&mut excluding_docset, target) {
break;
}
underlying_docset.advance();
}
Exclude {
underlying_docset,
excluding_docset,
}
}
}
impl<TDocSet, TDocSetExclude> DocSet for Exclude<TDocSet, TDocSetExclude>
where
TDocSet: DocSet,
TDocSetExclude: DocSet,
{
fn advance(&mut self) -> DocId {
loop {
let candidate = self.underlying_docset.advance();
if candidate == TERMINATED {
return TERMINATED;
}
if !is_within(&mut self.excluding_docset, candidate) {
return candidate;
}
}
}
fn seek(&mut self, target: DocId) -> DocId {
let candidate = self.underlying_docset.seek(target);
if candidate == TERMINATED {
return TERMINATED;
}
if !is_within(&mut self.excluding_docset, candidate) {
return candidate;
}
self.advance()
}
fn doc(&self) -> DocId {
self.underlying_docset.doc()
}
/// `.size_hint()` directly returns the size
/// of the underlying docset without taking in account
/// the fact that docs might be deleted.
fn size_hint(&self) -> u32 {
self.underlying_docset.size_hint()
}
}
impl<TScorer, TDocSetExclude> Scorer for Exclude<TScorer, TDocSetExclude>
where
TScorer: Scorer,
TDocSetExclude: DocSet + 'static,
{
fn score(&mut self) -> Score {
self.underlying_docset.score()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::postings::tests::test_skip_against_unoptimized;
use crate::query::VecDocSet;
use crate::tests::sample_with_seed;
#[test]
fn test_exclude() {
let mut exclude_scorer = Exclude::new(
VecDocSet::from(vec![1, 2, 5, 8, 10, 15, 24]),
VecDocSet::from(vec![1, 2, 3, 10, 16, 24]),
);
let mut els = vec![];
while exclude_scorer.doc() != TERMINATED {
els.push(exclude_scorer.doc());
exclude_scorer.advance();
}
assert_eq!(els, vec![5, 8, 15]);
}
#[test]
fn test_exclude_skip() {
test_skip_against_unoptimized(
|| {
Box::new(Exclude::new(
VecDocSet::from(vec![1, 2, 5, 8, 10, 15, 24]),
VecDocSet::from(vec![1, 2, 3, 10, 16, 24]),
))
},
vec![5, 8, 10, 15, 24],
);
}
#[test]
fn test_exclude_skip_random() {
let sample_include = sample_with_seed(10_000, 0.1, 1);
let sample_exclude = sample_with_seed(10_000, 0.05, 2);
let sample_skip = sample_with_seed(10_000, 0.005, 3);
test_skip_against_unoptimized(
|| {
Box::new(Exclude::new(
VecDocSet::from(sample_include.clone()),
VecDocSet::from(sample_exclude.clone()),
))
},
sample_skip,
);
}
}