blob: f667a58d5cf69bffa1437336eebc306ac30afe8f [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
use strict;
package LucyX::Index::ZlibDocWriter;
use base qw( Lucy::Index::DataWriter );
use Carp;
use Scalar::Util qw( blessed );
use Compress::Zlib qw( compress );
use Clownfish::Util::StringHelper qw( cat_bytes );
use Clownfish qw( to_perl );
use bytes;
no bytes;
our $VERSION = '0.004001';
$VERSION = eval $VERSION;
# Inside-out member vars.
our %ix_out;
our %dat_out;
# Inherit constructor.
sub _lazy_init {
my $self = shift;
# Get outstreams. Skip past non-doc #0.
my $folder = $self->get_folder;
my $ix_file = $self->get_segment->get_name . "/zdocs.ix";
my $dat_file = $self->get_segment->get_name . "/zdocs.dat";
$ix_out{$$self} = $folder->open_out($ix_file)
or confess Clownfish->error;
$dat_out{$$self} = $folder->open_out($dat_file)
or confess Clownfish->error;
$ix_out{$$self}->write_i64(0);
}
sub add_inverted_doc {
my ( $self, %args ) = @_;
_lazy_init($self) unless $ix_out{$$self};
my $inverter = $args{inverter};
my $ix_out = $ix_out{$$self};
my $dat_out = $dat_out{$$self};
# Check doc id.
my $expected = $ix_out->tell / 8;
confess("Expected doc id $expected, got '$args{doc_id}'")
unless $args{doc_id} == $expected;
my $to_compress = "";
my $count = 0;
my $schema = $self->get_schema;
$inverter->iterate;
while ( $inverter->next ) {
next unless $inverter->get_type->stored;
my $name = $inverter->get_field_name;
my $value = $inverter->get_value;
cat_bytes( $to_compress, pack( "w", bytes::length($name) ) );
cat_bytes( $to_compress, $name );
cat_bytes( $to_compress, pack( "w", bytes::length($value) ) );
cat_bytes( $to_compress, $value );
$count++;
}
# Prepend count of fields to store in this Doc.
$to_compress = pack( "w", $count ) . $to_compress;
# Write file pointer to index file. Write compressed serialized string to
# main file.
$ix_out->write_i64( $dat_out->tell );
$dat_out->print( compress($to_compress) );
}
sub add_segment {
my ( $self, %args ) = @_;
my $seg_reader = $args{reader};
my $doc_map = $args{doc_map};
my $doc_max = $seg_reader->doc_max;
# Bail if the supplied segment is empty. */
return unless $doc_max;
_lazy_init($self) unless $ix_out{$$self};
my $ix_out = $ix_out{$$self};
my $dat_out = $dat_out{$$self};
my $doc_reader = $seg_reader->obtain("Lucy::Index::DocReader");
confess("Not a ZlibDocReader")
unless ( blessed($doc_reader)
and $doc_reader->isa("LucyX::Index::ZlibDocReader") );
for ( my $i = 1; $i <= $doc_max; $i++ ) {
next unless $doc_map->get($i);
my $buf;
$doc_reader->read_record( $i, \$buf );
$ix_out->write_i64( $dat_out->tell );
$dat_out->print($buf);
}
}
sub finish {
my $self = shift;
my $ix_out = $ix_out{$$self};
my $dat_out = $dat_out{$$self};
if ($ix_out) {
# Write one extra file pointer so that we can always derive record
# length.
$ix_out->write_i64( $dat_out->tell );
# Close streams and store metadata.
$ix_out->close;
$dat_out->close;
my $segment = $self->get_segment;
$segment->store_metadata(
key => 'zdocs',
metadata => $self->metadata,
);
}
}
sub format {1}
sub DESTROY {
my $self = shift;
delete $ix_out{$$self};
delete $dat_out{$$self};
$self->SUPER::DESTROY;
}
1;
__END__
__POD__
=head1 NAME
LucyX::Index::ZlibDocWriter - Compressed doc storage.
=head1 DESCRIPTION
This is a proof-of-concept class to demonstrate alternate implementations for
fetching documents. It is unsupported.
=cut