blob: 76456bf778d4c43670f4ba43206e973f42d802a1 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
use strict;
use warnings;
package PrefixQuery;
use base qw( Lucy::Search::Query );
use Carp;
use Scalar::Util qw( blessed );
our $VERSION = '0.004001';
$VERSION = eval $VERSION;
# Inside-out member vars and hand-rolled accessors.
my %query_string;
my %field;
sub get_query_string { my $self = shift; return $query_string{$$self} }
sub get_field { my $self = shift; return $field{$$self} }
sub new {
my ( $class, %args ) = @_;
my $query_string = delete $args{query_string};
my $field = delete $args{field};
my $self = $class->SUPER::new(%args);
confess("'query_string' param is required")
unless defined $query_string;
confess("Invalid query_string: '$query_string'")
unless $query_string =~ /\*\s*$/;
confess("'field' param is required")
unless defined $field;
$query_string{$$self} = $query_string;
$field{$$self} = $field;
return $self;
}
sub DESTROY {
my $self = shift;
delete $query_string{$$self};
delete $field{$$self};
$self->SUPER::DESTROY;
}
sub equals {
my ( $self, $other ) = @_;
return 0 unless blessed($other);
return 0 unless $other->isa("PrefixQuery");
return 0 unless $field{$$self} eq $field{$$other};
return 0 unless $query_string{$$self} eq $query_string{$$other};
return 1;
}
sub to_string {
my $self = shift;
return "$field{$$self}:$query_string{$$self}";
}
sub make_compiler {
my ( $self, %args ) = @_;
my $subordinate = delete $args{subordinate};
my $compiler = PrefixCompiler->new( %args, parent => $self );
$compiler->normalize unless $subordinate;
return $compiler;
}
package PrefixCompiler;
use base qw( Lucy::Search::Compiler );
sub make_matcher {
my ( $self, %args ) = @_;
my $seg_reader = $args{reader};
# Retrieve low-level components LexiconReader and PostingListReader.
my $lex_reader
= $seg_reader->obtain("Lucy::Index::LexiconReader");
my $plist_reader
= $seg_reader->obtain("Lucy::Index::PostingListReader");
# Acquire a Lexicon and seek it to our query string.
my $substring = $self->get_parent->get_query_string;
$substring =~ s/\*.\s*$//;
my $field = $self->get_parent->get_field;
my $lexicon = $lex_reader->lexicon( field => $field );
return unless $lexicon;
$lexicon->seek($substring);
# Accumulate PostingLists for each matching term.
my @posting_lists;
while ( defined( my $term = $lexicon->get_term ) ) {
last unless $term =~ /^\Q$substring/;
my $posting_list = $plist_reader->posting_list(
field => $field,
term => $term,
);
if ($posting_list) {
push @posting_lists, $posting_list;
}
last unless $lexicon->next;
}
return unless @posting_lists;
return PrefixMatcher->new( posting_lists => \@posting_lists );
}
package PrefixMatcher;
use base qw( Lucy::Search::Matcher );
# Inside-out member vars.
my %doc_ids;
my %tally;
my %tick;
sub new {
my ( $class, %args ) = @_;
my $posting_lists = delete $args{posting_lists};
my $self = $class->SUPER::new(%args);
# Cheesy but simple way of interleaving PostingList doc sets.
my %all_doc_ids;
for my $posting_list (@$posting_lists) {
while ( my $doc_id = $posting_list->next ) {
$all_doc_ids{$doc_id} = undef;
}
}
my @doc_ids = sort { $a <=> $b } keys %all_doc_ids;
$doc_ids{$$self} = \@doc_ids;
$tick{$$self} = -1;
$tally{$$self} = Lucy::Search::Tally->new;
$tally{$$self}->set_score(1.0); # fixed score of 1.0
return $self;
}
sub DESTROY {
my $self = shift;
delete $doc_ids{$$self};
delete $tick{$$self};
delete $tally{$$self};
$self->SUPER::DESTROY;
}
sub next {
my $self = shift;
my $doc_ids = $doc_ids{$$self};
my $tick = ++$tick{$$self};
return 0 if $tick >= scalar @$doc_ids;
return $doc_ids->[$tick];
}
sub get_doc_id {
my $self = shift;
my $tick = $tick{$$self};
my $doc_ids = $doc_ids{$$self};
return $tick < scalar @$doc_ids ? $doc_ids->[$tick] : 0;
}
sub tally {
my $self = shift;
return $tally{$$self};
}
1;
__END__
__POD__
=head1 SAMPLE CLASS
PrefixQuery - Sample subclass of Lucy::Query, supporting trailing
wildcards.
=head1 SYNOPSIS
my $prefix_query = PrefixQuery->new(
field => 'content',
query_string => 'foo*',
);
my $hits = $searcher->hits( query => $prefix_query );
=head1 DESCRIPTION
See L<Lucy::Docs::Cookbook::CustomQuery>.
=cut