blob: 6e29bcd64a2910fca8b5f07e77094bab3ab52039 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
use strict;
use warnings;
use lib 'buildlib';
use Lucy::Test;
package PlainSchema;
use base qw( Lucy::Plan::Schema );
use Lucy::Analysis::RegexTokenizer;
sub new {
my $self = shift->SUPER::new(@_);
my $tokenizer = Lucy::Analysis::RegexTokenizer->new( pattern => '\S+' );
my $type = Lucy::Plan::FullTextType->new( analyzer => $tokenizer, );
$self->spec_field( name => 'content', type => $type );
return $self;
}
package StopSchema;
use base qw( Lucy::Plan::Schema );
sub new {
my $self = shift->SUPER::new(@_);
my $whitespace_tokenizer
= Lucy::Analysis::RegexTokenizer->new( token_re => qr/\S+/ );
my $stopfilter
= Lucy::Analysis::SnowballStopFilter->new( stoplist => { x => 1 } );
my $polyanalyzer = Lucy::Analysis::PolyAnalyzer->new(
analyzers => [ $whitespace_tokenizer, $stopfilter, ], );
my $type = Lucy::Plan::FullTextType->new( analyzer => $polyanalyzer, );
$self->spec_field( name => 'content', type => $type );
return $self;
}
package MyTermQuery;
use base qw( Lucy::Search::TermQuery );
package MyPhraseQuery;
use base qw( Lucy::Search::PhraseQuery );
package MyANDQuery;
use base qw( Lucy::Search::ANDQuery );
package MyORQuery;
use base qw( Lucy::Search::ORQuery );
package MyNOTQuery;
use base qw( Lucy::Search::NOTQuery );
package MyReqOptQuery;
use base qw( Lucy::Search::RequiredOptionalQuery );
package MyQueryParser;
use base qw( Lucy::Search::QueryParser );
sub make_term_query { shift; MyTermQuery->new(@_) }
sub make_phrase_query { shift; MyPhraseQuery->new(@_) }
sub make_and_query { shift; MyANDQuery->new( children => shift ) }
sub make_or_query { shift; MyORQuery->new( children => shift ) }
sub make_not_query { shift; MyNOTQuery->new( negated_query => shift ) }
sub make_req_opt_query { shift; MyReqOptQuery->new(@_) }
package main;
use Test::More tests => 224;
use Lucy::Util::StringHelper qw( utf8_flag_on utf8ify );
use Lucy::Test::TestUtils qw( create_index );
my $folder = Lucy::Store::RAMFolder->new;
my $stop_folder = Lucy::Store::RAMFolder->new;
my $plain_schema = PlainSchema->new;
my $stop_schema = StopSchema->new;
my @docs = ( 'x', 'y', 'z', 'x a', 'x a b', 'x a b c', 'x foo a b c d', );
my $indexer = Lucy::Index::Indexer->new(
index => $folder,
schema => $plain_schema,
);
my $stop_indexer = Lucy::Index::Indexer->new(
index => $stop_folder,
schema => $stop_schema,
);
for (@docs) {
$indexer->add_doc( { content => $_ } );
$stop_indexer->add_doc( { content => $_ } );
}
$indexer->commit;
$stop_indexer->commit;
my $OR_parser = Lucy::Search::QueryParser->new( schema => $plain_schema, );
my $AND_parser = Lucy::Search::QueryParser->new(
schema => $plain_schema,
default_boolop => 'AND',
);
$OR_parser->set_heed_colons(1);
$AND_parser->set_heed_colons(1);
my $OR_stop_parser
= Lucy::Search::QueryParser->new( schema => $stop_schema, );
my $AND_stop_parser = Lucy::Search::QueryParser->new(
schema => $stop_schema,
default_boolop => 'AND',
);
$OR_stop_parser->set_heed_colons(1);
$AND_stop_parser->set_heed_colons(1);
my $searcher = Lucy::Search::IndexSearcher->new( index => $folder );
my $stop_searcher = Lucy::Search::IndexSearcher->new( index => $stop_folder );
my @logical_tests = (
'b' => [ 3, 3, 3, 3, ],
'(a)' => [ 4, 4, 4, 4, ],
'"a"' => [ 4, 4, 4, 4, ],
'"(a)"' => [ 0, 0, 0, 0, ],
'("a")' => [ 4, 4, 4, 4, ],
'a b' => [ 4, 3, 4, 3, ],
'a (b)' => [ 4, 3, 4, 3, ],
'a "b"' => [ 4, 3, 4, 3, ],
'a ("b")' => [ 4, 3, 4, 3, ],
'a "(b)"' => [ 4, 0, 4, 0, ],
'(a b)' => [ 4, 3, 4, 3, ],
'"a b"' => [ 3, 3, 3, 3, ],
'("a b")' => [ 3, 3, 3, 3, ],
'"(a b)"' => [ 0, 0, 0, 0, ],
'a b c' => [ 4, 2, 4, 2, ],
'a (b c)' => [ 4, 2, 4, 2, ],
'a "b c"' => [ 4, 2, 4, 2, ],
'a ("b c")' => [ 4, 2, 4, 2, ],
'a "(b c)"' => [ 4, 0, 4, 0, ],
'"a b c"' => [ 2, 2, 2, 2, ],
'-x' => [ 0, 0, 0, 0, ],
'x -c' => [ 3, 3, 0, 0, ],
'x "-c"' => [ 5, 0, 0, 0, ],
'x +c' => [ 2, 2, 2, 2, ],
'x "+c"' => [ 5, 0, 0, 0, ],
'+x +c' => [ 2, 2, 2, 2, ],
'+x -c' => [ 3, 3, 0, 0, ],
'-x +c' => [ 0, 0, 2, 2, ],
'-x -c' => [ 0, 0, 0, 0, ],
'x y' => [ 6, 0, 1, 1, ],
'x a d' => [ 5, 1, 4, 1, ],
'x "a d"' => [ 5, 0, 0, 0, ],
'"x a"' => [ 3, 3, 4, 4, ],
'x AND y' => [ 0, 0, 1, 1, ],
'x OR y' => [ 6, 6, 1, 1, ],
'x AND NOT y' => [ 5, 5, 0, 0, ],
'x (b OR c)' => [ 5, 3, 3, 3, ],
'x AND (b OR c)' => [ 3, 3, 3, 3, ],
'x OR (b OR c)' => [ 5, 5, 3, 3, ],
'x (y OR c)' => [ 6, 2, 3, 3, ],
'x AND (y OR c)' => [ 2, 2, 3, 3, ],
'a AND NOT (b OR "c d")' => [ 1, 1, 1, 1, ],
'a AND NOT "a b"' => [ 1, 1, 1, 1, ],
'a AND NOT ("a b" OR "c d")' => [ 1, 1, 1, 1, ],
'+"b c" -d' => [ 1, 1, 1, 1, ],
'"a b" +d' => [ 1, 1, 1, 1, ],
'x AND NOT (b OR (c AND d))' => [ 2, 2, 0, 0, ],
'-(+notthere)' => [ 0, 0, 0, 0 ],
'content:b' => [ 3, 3, 3, 3, ],
'bogusfield:a' => [ 0, 0, 0, 0, ],
'bogusfield:a content:b' => [ 3, 0, 3, 0, ],
'content:b content:c' => [ 3, 2, 3, 2 ],
'content:(b c)' => [ 3, 2, 3, 2 ],
'bogusfield:(b c)' => [ 0, 0, 0, 0 ],
);
my $i = 0;
while ( $i < @logical_tests ) {
my $qstring = $logical_tests[$i];
$i++;
my $query = $OR_parser->parse($qstring);
my $hits = $searcher->hits( query => $query );
is( $hits->total_hits, $logical_tests[$i][0], "OR: $qstring" );
$query = $AND_parser->parse($qstring);
$hits = $searcher->hits( query => $query );
is( $hits->total_hits, $logical_tests[$i][1], "AND: $qstring" );
$query = $OR_stop_parser->parse($qstring);
$hits = $stop_searcher->hits( query => $query );
is( $hits->total_hits, $logical_tests[$i][2], "stoplist-OR: $qstring" );
$query = $AND_stop_parser->parse($qstring);
$hits = $stop_searcher->hits( query => $query );
is( $hits->total_hits, $logical_tests[$i][3],
"stoplist-AND: $qstring" );
$i++;
}
my $motorhead = "Mot\xF6rhead";
utf8ify($motorhead);
my $unicode_folder = create_index($motorhead);
$searcher = Lucy::Search::IndexSearcher->new( index => $unicode_folder );
my $hits = $searcher->hits( query => 'Mot' );
is( $hits->total_hits, 0, "Pre-test - indexing worked properly" );
$hits = $searcher->hits( query => $motorhead );
is( $hits->total_hits, 1, "QueryParser parses UTF-8 strings correctly" );
my $custom_parser = MyQueryParser->new( schema => PlainSchema->new );
isa_ok( $custom_parser->parse('foo'), 'MyTermQuery' );
isa_ok( $custom_parser->parse('"foo bar"'), 'MyPhraseQuery' );
isa_ok( $custom_parser->parse('foo AND bar'), 'MyANDQuery' );
isa_ok( $custom_parser->parse('foo OR bar'), 'MyORQuery' );
isa_ok( $custom_parser->tree('NOT foo'), 'MyNOTQuery' );
isa_ok( $custom_parser->parse('+foo bar'), 'MyReqOptQuery' );