blob: 070e51a7bc6ffc504edc07eb366a5147a536619c [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
use strict;
use warnings;
use Test::More tests => 36;
use Lucy::Test;
use List::Util qw( shuffle );
my $hit_q = Lucy::Search::HitQueue->new( wanted => 10 );
my @docs_and_scores = (
[ 0, 1.0 ],
[ 5, 0.1 ],
[ 10, 0.1 ],
[ 1000, 0.9 ],
[ 2000, 1.0 ],
[ 3000, 1.0 ],
);
my @match_docs = map {
Lucy::Search::MatchDoc->new(
doc_id => $_->[0],
score => $_->[1],
)
} @docs_and_scores;
my @correct_order = sort {
$b->get_score <=> $a->get_score
or $a->get_doc_id <=> $b->get_doc_id
} @match_docs;
my @correct_docs = map { $_->get_doc_id } @correct_order;
my @correct_scores = map { $_->get_score } @correct_order;
$hit_q->insert($_) for @match_docs;
my $got = $hit_q->pop_all;
my @scores = map { $_->get_score } @$got;
is_deeply( \@scores, \@correct_scores, "rank by scores first" );
my @doc_ids = map { $_->get_doc_id } @$got;
is_deeply( \@doc_ids, \@correct_docs, "rank by doc_id after score" );
my $schema = Lucy::Plan::Schema->new;
my $type = Lucy::Plan::StringType->new( sortable => 1 );
$schema->spec_field( name => 'letter', type => $type );
$schema->spec_field( name => 'number', type => $type );
$schema->spec_field( name => 'id', type => $type );
my $folder = Lucy::Store::RAMFolder->new;
my $indexer = Lucy::Index::Indexer->new(
index => $folder,
schema => $schema,
);
my @letters = 'a' .. 'z';
my @numbers = 1 .. 5;
my @docs;
for my $id ( 1 .. 100 ) {
my $doc = {
letter => $letters[ rand @letters ],
number => $numbers[ rand @numbers ],
id => $id,
};
push @docs, $doc;
$indexer->add_doc($doc);
}
$indexer->commit;
my $seg_reader = Lucy::Index::IndexReader->open( index => $folder );
my $by_letter = Lucy::Search::SortSpec->new(
rules => [
Lucy::Search::SortRule->new( field => 'letter' ),
Lucy::Search::SortRule->new( type => 'doc_id' ),
]
);
$hit_q = Lucy::Search::HitQueue->new(
wanted => 100,
schema => $schema,
sort_spec => $by_letter,
);
for my $doc_id ( shuffle( 1 .. 100 ) ) {
my $match_doc = Lucy::Search::MatchDoc->new(
doc_id => $doc_id,
score => 1.0,
values => [ $docs[ $doc_id - 1 ]{letter} ],
);
$hit_q->insert($match_doc);
}
my @wanted = map { $_->{id} }
sort { $a->{letter} cmp $b->{letter} || $a->{id} <=> $b->{id} } @docs;
my @got = map { $_->get_doc_id } @{ $hit_q->pop_all };
is_deeply( \@got, \@wanted, "sort by letter then doc id" );
my $by_num_then_letter = Lucy::Search::SortSpec->new(
rules => [
Lucy::Search::SortRule->new( field => 'number' ),
Lucy::Search::SortRule->new( field => 'letter', reverse => 1 ),
Lucy::Search::SortRule->new( type => 'doc_id' ),
]
);
$hit_q = Lucy::Search::HitQueue->new(
wanted => 100,
schema => $schema,
sort_spec => $by_num_then_letter,
);
for my $doc_id ( shuffle( 1 .. 100 ) ) {
my $doc = $docs[ $doc_id - 1 ];
my $match_doc = Lucy::Search::MatchDoc->new(
doc_id => $doc_id,
score => 1.0,
values => [ $doc->{number}, $doc->{letter} ],
);
$hit_q->insert($match_doc);
}
@wanted = map { $_->{id} }
sort {
$a->{number} <=> $b->{number}
|| $b->{letter} cmp $a->{letter}
|| $a->{id} <=> $b->{id}
} @docs;
@got = map { $_->get_doc_id } @{ $hit_q->pop_all };
is_deeply( \@got, \@wanted, "sort by num then letter reversed then doc id" );
my $by_num_then_score = Lucy::Search::SortSpec->new(
rules => [
Lucy::Search::SortRule->new( field => 'number' ),
Lucy::Search::SortRule->new( type => 'score' ),
Lucy::Search::SortRule->new( type => 'doc_id' ),
]
);
$hit_q = Lucy::Search::HitQueue->new(
wanted => 100,
schema => $schema,
sort_spec => $by_num_then_score,
);
for my $doc_id ( shuffle( 1 .. 100 ) ) {
my $match_doc = Lucy::Search::MatchDoc->new(
doc_id => $doc_id,
score => $doc_id,
values => [ $docs[ $doc_id - 1 ]{number} ],
);
$hit_q->insert($match_doc);
}
@wanted = map { $_->{id} }
sort { $a->{number} <=> $b->{number} || $b->{id} <=> $a->{id} } @docs;
@got = map { $_->get_doc_id } @{ $hit_q->pop_all };
is_deeply( \@got, \@wanted, "sort by num then score then doc id" );
$hit_q = Lucy::Search::HitQueue->new(
wanted => 100,
schema => $schema,
sort_spec => $by_num_then_score,
);
for my $doc_id ( shuffle( 1 .. 100 ) ) {
my $fields = $docs[ $doc_id - 1 ];
my $values = Lucy::Object::VArray->new( capacity => 1 );
$values->push( Lucy::Object::CharBuf->new( $fields->{number} ) );
my $match_doc = Lucy::Search::MatchDoc->new(
doc_id => $doc_id,
score => $doc_id,
values => $values,
);
$hit_q->insert($match_doc);
}
@wanted = map { $_->{id} }
sort { $a->{number} <=> $b->{number} || $b->{id} <=> $a->{id} } @docs;
@got = map { $_->get_doc_id } @{ $hit_q->pop_all };
is_deeply( \@got, \@wanted, "sort by value when no reader set" );
@docs_and_scores = ();
for ( 1 .. 30 ) {
push @docs_and_scores, [ int( rand(10000) ) + 1, rand(10) ];
}
@docs_and_scores = sort { $b->[1] <=> $a->[1] } @docs_and_scores;
@doc_ids = map { $_->[0] } @docs_and_scores;
@match_docs = map {
Lucy::Search::MatchDoc->new(
doc_id => $_->[0],
score => $_->[1],
)
} sort { $a->[0] <=> $b->[0] } @docs_and_scores;
for my $size ( 0 .. $#match_docs ) {
$hit_q = Lucy::Search::HitQueue->new( wanted => $size, );
$hit_q->insert($_) for @match_docs;
if ($size) {
@wanted = @doc_ids[ 0 .. $size - 1 ];
}
else {
@wanted = ();
}
@got = map { $_->get_doc_id } @{ $hit_q->pop_all };
is_deeply( \@got, \@wanted, "random docs and scores, size $size" );
}