| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| |
| use strict; |
| use warnings; |
| use lib 'buildlib'; |
| |
| package NonMergingIndexManager; |
| use base qw( Lucy::Index::IndexManager ); |
| |
| sub recycle { |
| return Lucy::Object::VArray->new( capacity => 0 ); |
| } |
| |
| package SortSchema; |
| use base qw( Lucy::Plan::Schema ); |
| |
| sub new { |
| my $self = shift->SUPER::new(@_); |
| my $fulltext_type = Lucy::Plan::FullTextType->new( |
| analyzer => Lucy::Analysis::RegexTokenizer->new, |
| sortable => 1, |
| ); |
| my $string_type = Lucy::Plan::StringType->new( sortable => 1 ); |
| my $unsortable = Lucy::Plan::StringType->new; |
| $self->spec_field( name => 'name', type => $fulltext_type ); |
| $self->spec_field( name => 'speed', type => $string_type ); |
| $self->spec_field( name => 'weight', type => $string_type ); |
| $self->spec_field( name => 'home', type => $string_type ); |
| $self->spec_field( name => 'cat', type => $string_type ); |
| $self->spec_field( name => 'wheels', type => $string_type ); |
| $self->spec_field( name => 'unused', type => $string_type ); |
| $self->spec_field( name => 'nope', type => $unsortable ); |
| return $self; |
| } |
| |
| package main; |
| use Lucy::Test; |
| use Test::More tests => 57; |
| |
| # Force frequent flushes. |
| Lucy::Index::SortWriter::set_default_mem_thresh(100); |
| |
| my $airplane = { |
| name => 'airplane', |
| speed => '0200', |
| weight => '8000', |
| home => 'air', |
| cat => 'vehicle', |
| wheels => 3, |
| nope => 'nyet', |
| }; |
| my $bike = { |
| name => 'bike', |
| speed => '0015', |
| weight => '0025', |
| home => 'land', |
| cat => 'vehicle', |
| wheels => 2, |
| }; |
| my $car = { |
| name => 'car', |
| speed => '0070', |
| weight => '3000', |
| home => 'land', |
| cat => 'vehicle', |
| wheels => 4, |
| }; |
| my $dirigible = { |
| name => 'dirigible', |
| speed => '0040', |
| weight => '0000', |
| home => 'air', |
| cat => 'vehicle', |
| # no "wheels" field -- test NULL/undef |
| }; |
| my $elephant = { |
| name => 'elephant', |
| speed => '0020', |
| weight => '6000', |
| home => 'land', |
| cat => 'vehicle', |
| # no "wheels" field -- test NULL/undef |
| }; |
| |
| my $folder = Lucy::Store::RAMFolder->new; |
| my $schema = SortSchema->new; |
| my $indexer = Lucy::Index::Indexer->new( |
| index => $folder, |
| schema => $schema, |
| ); |
| |
| # Add vehicles. |
| $indexer->add_doc($_) for ( $airplane, $bike, $car ); |
| |
| $indexer->commit; |
| |
| my $polyreader = Lucy::Index::IndexReader->open( index => $folder ); |
| my $seg_reader = $polyreader->get_seg_readers->[0]; |
| my $sort_reader = $seg_reader->obtain("Lucy::Index::SortReader"); |
| my $doc_reader = $seg_reader->obtain("Lucy::Index::DocReader"); |
| my $segment = $seg_reader->get_segment; |
| |
| for my $field (qw( name speed weight home cat wheels )) { |
| my $field_num = $segment->field_num($field); |
| ok( $folder->exists("seg_1/sort-$field_num.ord"), |
| "sort files written for $field" ); |
| my $sort_cache = $sort_reader->fetch_sort_cache($field); |
| for ( 1 .. $seg_reader->doc_max ) { |
| is( $sort_cache->value( ord => $sort_cache->ordinal($_) ), |
| $doc_reader->fetch_doc($_)->{$field}, |
| "correct cached value doc $_ " |
| ); |
| } |
| } |
| |
| for my $field (qw( unused nope )) { |
| my $field_num = $segment->field_num($field); |
| ok( !$folder->exists("seg_1/sort-$field_num.ord"), |
| "no sort files written for $field" ); |
| } |
| |
| # Add a second segment. |
| $indexer = Lucy::Index::Indexer->new( |
| index => $folder, |
| schema => $schema, |
| manager => NonMergingIndexManager->new, |
| ); |
| $indexer->add_doc($dirigible); |
| $indexer->commit; |
| |
| # Consolidate everything, to test merging. |
| $indexer = Lucy::Index::Indexer->new( |
| index => $folder, |
| schema => $schema, |
| ); |
| $indexer->delete_by_term( field => 'name', term => 'bike' ); |
| $indexer->add_doc($elephant); |
| $indexer->optimize; |
| $indexer->commit; |
| |
| my $num_old_seg_files = scalar grep {m/seg_[12]/} @{ $folder->list_r }; |
| is( $num_old_seg_files, 0, "all files from earlier segments zapped" ); |
| |
| $polyreader = Lucy::Index::IndexReader->open( index => $folder ); |
| $seg_reader = $polyreader->get_seg_readers->[0]; |
| $sort_reader = $seg_reader->obtain("Lucy::Index::SortReader"); |
| $doc_reader = $seg_reader->obtain("Lucy::Index::DocReader"); |
| $segment = $seg_reader->get_segment; |
| |
| for my $field (qw( name speed weight home cat wheels )) { |
| my $field_num = $segment->field_num($field); |
| ok( $folder->exists("seg_3/sort-$field_num.ord"), |
| "sort files written for $field" ); |
| my $sort_cache = $sort_reader->fetch_sort_cache($field); |
| for ( 1 .. $seg_reader->doc_max ) { |
| is( $sort_cache->value( ord => $sort_cache->ordinal($_) ), |
| $doc_reader->fetch_doc($_)->{$field}, |
| "correct cached value field $field doc $_ " |
| ); |
| } |
| } |