blob: edfa28fb7846b9b6d72cf473fa67c4b3e8c7fcb3 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
use strict;
use warnings;
use lib 'buildlib';
package MatchOnlySim;
use base qw( Lucy::Index::Similarity );
sub make_posting {
my $self = shift;
return Lucy::Index::Posting::MatchPosting->new( similarity => $self );
}
package MatchSchema::MatchOnly;
use base qw( Lucy::Plan::FullTextType );
use Lucy::Index::Posting::MatchPosting;
sub make_similarity { MatchOnlySim->new }
package MatchSchema;
use base qw( Lucy::Plan::Schema );
use Lucy::Analysis::RegexTokenizer;
sub new {
my $self = shift->SUPER::new(@_);
my $type = MatchSchema::MatchOnly->new(
analyzer => Lucy::Analysis::RegexTokenizer->new );
$self->spec_field( name => 'content', type => $type );
return $self;
}
package main;
use Lucy::Test::TestUtils qw( get_uscon_docs );
use Test::More tests => 6;
my $uscon_docs = get_uscon_docs();
my $match_folder = make_index( MatchSchema->new, $uscon_docs );
my $score_folder = make_index( Lucy::Test::TestSchema->new, $uscon_docs );
my $match_searcher
= Lucy::Search::IndexSearcher->new( index => $match_folder );
my $score_searcher
= Lucy::Search::IndexSearcher->new( index => $score_folder );
for (qw( land of the free )) {
my $match_got = hit_ids_array( $match_searcher, $_ );
my $score_got = hit_ids_array( $score_searcher, $_ );
is_deeply( $match_got, $score_got, "same hits for '$_'" );
}
my $qstring = '"the legislature"';
my $should_have_hits = hit_ids_array( $score_searcher, $qstring );
my $should_be_empty = hit_ids_array( $match_searcher, $qstring );
ok( scalar @$should_have_hits, "successfully scored phrase $qstring" );
ok( !scalar @$should_be_empty, "no hits matched for phrase $qstring" );
sub make_index {
my ( $schema, $docs ) = @_;
my $folder = Lucy::Store::RAMFolder->new;
my $indexer = Lucy::Index::Indexer->new(
schema => $schema,
index => $folder,
);
$indexer->add_doc( { content => $_->{bodytext} } ) for values %$docs;
$indexer->commit;
return $folder;
}
sub hit_ids_array {
my ( $searcher, $query_string ) = @_;
my $query = $searcher->glean_query($query_string);
my $bit_vec
= Lucy::Object::BitVector->new( capacity => $searcher->doc_max + 1 );
my $bit_collector
= Lucy::Search::Collector::BitCollector->new( bit_vector => $bit_vec,
);
$searcher->collect( query => $query, collector => $bit_collector );
return $bit_vec->to_array->to_arrayref;
}