blob: c080936ba35f9443f962247316affb3d19d4df27 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
use strict;
use warnings;
package LucyX::Search::Filter;
BEGIN { our @ISA = qw( Lucy::Search::Query ) }
our $VERSION = '0.004001';
$VERSION = eval $VERSION;
use Carp;
use Storable qw( nfreeze thaw );
use Scalar::Util qw( blessed weaken );
use bytes;
no bytes;
# Inside-out member vars.
our %query;
our %cached_bits;
sub new {
my ( $either, %args ) = @_;
my $query = delete $args{query};
confess("required parameter query is not a Lucy::Search::Query")
unless ( blessed($query)
&& $query->isa('Lucy::Search::Query') );
my $self = $either->SUPER::new(%args);
$self->_init_cache;
$query{$$self} = $query;
$self->set_boost(0);
return $self;
}
sub DESTROY {
my $self = shift;
delete $query{$$self};
delete $cached_bits{$$self};
$self->SUPER::DESTROY;
}
sub make_compiler {
my ( $self, %args ) = @_;
my $subordinate = delete $args{subordinate};
my $compiler
= LucyX::Search::FilterCompiler->new( %args, parent => $self );
$compiler->normalize unless $subordinate;
return $compiler;
}
sub serialize {
my ( $self, $outstream ) = @_;
$self->SUPER::serialize($outstream);
my $frozen = nfreeze( $query{$$self} );
$outstream->write_c32( bytes::length($frozen) );
$outstream->print($frozen);
}
sub deserialize {
my ( $self, $instream ) = @_;
$self->SUPER::deserialize($instream);
my $len = $instream->read_c32;
my $frozen;
$instream->read( $frozen, $len );
$query{$$self} = thaw($frozen);
return $self;
}
sub equals {
my ( $self, $other ) = @_;
return 0 unless $other->isa(__PACKAGE__);
return 0 unless $query{$$self}->equals( $query{$$other} );
return 0 unless $self->get_boost == $other->get_boost;
return 1;
}
sub to_string {
my $self = shift;
return 'Filter(' . $query{$$self}->to_string . ')';
}
sub _bits {
my ( $self, $seg_reader ) = @_;
my $cached_bits = $self->_fetch_cached_bits($seg_reader);
# Fill the cache.
if ( !defined $cached_bits ) {
$cached_bits = Lucy::Object::BitVector->new(
capacity => $seg_reader->doc_max + 1 );
$self->_store_cached_bits( $seg_reader, $cached_bits );
my $collector = Lucy::Search::Collector::BitCollector->new(
bit_vector => $cached_bits );
my $polyreader = Lucy::Index::PolyReader->new(
schema => $seg_reader->get_schema,
folder => $seg_reader->get_folder,
snapshot => $seg_reader->get_snapshot,
sub_readers => [$seg_reader],
);
my $searcher
= Lucy::Search::IndexSearcher->new( index => $polyreader );
# Perform the search.
$searcher->collect(
query => $query{$$self},
collector => $collector,
);
}
return $cached_bits;
}
# Store a cached BitVector associated with a particular SegReader. Store a
# weak reference to the SegReader as an indicator of cache validity.
sub _store_cached_bits {
my ( $self, $seg_reader, $bits ) = @_;
my $pair = { seg_reader => $seg_reader, bits => $bits };
weaken( $pair->{seg_reader} );
$cached_bits{$$self}{ $seg_reader->hash_sum } = $pair;
}
# Retrieve a cached BitVector associated with a particular SegReader. As a
# side effect, clear away any BitVectors which are no longer valid because
# their SegReaders have gone away.
sub _fetch_cached_bits {
my ( $self, $seg_reader ) = @_;
my $cached_bits = $cached_bits{$$self};
# Sweep.
while ( my ( $hash_sum, $pair ) = each %$cached_bits ) {
# If weak ref has decomposed into undef, SegReader is gone... so
# delete.
next if defined $pair->{seg_reader};
delete $cached_bits->{$hash_sum};
}
# Fetch.
my $pair = $cached_bits->{ $seg_reader->hash_sum };
return $pair->{bits} if defined $pair;
return;
}
# Kill any existing cached filters.
sub _init_cache {
my $self = shift;
$cached_bits{$$self} = {};
}
# Testing only.
sub _cached_count {
my $self = shift;
return scalar grep { defined $cached_bits{$$self}{$_}{seg_reader} }
keys %{ $cached_bits{$$self} };
}
package LucyX::Search::FilterCompiler;
our $VERSION = '0.004001';
$VERSION = eval $VERSION;
BEGIN { our @ISA = qw( Lucy::Search::Compiler ) }
sub new {
my ( $class, %args ) = @_;
$args{similarity} ||= $args{searcher}->get_schema->get_similarity;
return $class->SUPER::new(%args);
}
sub make_matcher {
my ( $self, %args ) = @_;
my $seg_reader = $args{reader};
my $bits = $self->get_parent->_bits($seg_reader);
return LucyX::Search::FilterMatcher->new(
bits => $bits,
doc_max => $seg_reader->doc_max,
);
}
package LucyX::Search::FilterMatcher;
our $VERSION = '0.004001';
$VERSION = eval $VERSION;
BEGIN { our @ISA = qw( Lucy::Search::Matcher ) }
1;
__END__
=head1 NAME
LucyX::Search::Filter - Build a caching filter based on results of a Query.
=head1 SYNOPSIS
my %category_filters;
for my $category (qw( sweet sour salty bitter )) {
my $cat_query = Lucy::Search::TermQuery->new(
field => 'category',
term => $category,
);
$category_filters{$category} = LucyX::Search::Filter->new(
query => $cat_query,
);
}
while ( my $cgi = CGI::Fast->new ) {
my $user_query = $cgi->param('q');
my $filter = $category_filters{ $cgi->param('category') };
my $and_query = Lucy::Search::ANDQuery->new;
$and_query->add_child($user_query);
$and_query->add_child($filter);
my $hits = $searcher->hits( query => $and_query );
...
=head1 DESCRIPTION
A Filter is a L<Lucy::Search::Query> subclass that can be used to filter
the results of another Query. The effect is very similar to simply using the
wrapped inner query, but there are two important differences:
=over
=item
A Filter does not contribute to the score of the documents it matches.
=item
A Filter caches its results, so it is more efficient if you use it more than
once.
=back
To obtain logically equivalent results to the Filter but avoid the caching,
substitute the wrapped query but use set_boost() to set its C<boost> to 0.
=head1 METHODS
=head2 new
my $filter = LucyX::Search::Filter->new(
query => $query;
);
Constructor. Takes one hash-style parameter, C<query>, which must be an
object belonging to a subclass of L<Lucy::Search::Query>.
=head1 BUGS
Filters do not cache when used in a search cluster with LucyX::Remote's
SearchServer and SearchClient.
=cut