| KinoSearch documentation | Contained in the KinoSearch distribution. |
KSx::Index::ByteBufDocWriter - Write a Doc as a fixed-width byte array.
Create an Architecture subclass which overrides register_doc_writer() and register_doc_reader():
package MyArchitecture;
use base qw( KinoSearch::Plan::Architecture );
use KSx::Index::ByteBufDocReader;
use KSx::Index::ByteBufDocWriter;
sub register_doc_writer {
my ( $self, $seg_writer ) = @_;
my $doc_writer = KSx::Index::ByteBufDocWriter->new(
width => 16,
field => 'value',
snapshot => $seg_writer->get_snapshot,
segment => $seg_writer->get_segment,
polyreader => $seg_writer->get_polyreader,
);
$seg_writer->register(
api => "KinoSearch::Index::DocReader",
component => $doc_writer,
);
$seg_writer->add_writer($doc_writer);
}
sub register_doc_reader {
my ( $self, $seg_reader ) = @_;
my $doc_reader = KSx::Index::ByteBufDocReader->new(
width => 16,
field => 'value',
schema => $seg_reader->get_schema,
folder => $seg_reader->get_folder,
segments => $seg_reader->get_segments,
seg_tick => $seg_reader->get_seg_tick,
snapshot => $seg_reader->get_snapshot,
);
$seg_reader->register(
api => 'KinoSearch::Index::DocReader',
component => $doc_reader,
);
}
package MySchema;
use base qw( KinoSearch::Plan::Schema );
sub architecture { MyArchitecture->new }
Proceed as normal in your indexer app, making sure that every supplied document supplies a valid value for the field in question:
$indexer->add_doc({
title => $title,
content => $content,
id => $id, # <---- Must meet spec.
});
Then, in your search app:
my $searcher = KinoSearch::Search::IndexSearcher->new(
index => '/path/to/index',
);
my $hits = $searcher->hits( query => $query );
while ( my $id = $hits->next ) {
my $real_doc = $external_document_source->fetch( $doc->{value} );
...
}
This is a proof-of-concept class to demonstrate alternate implementations for fetching documents. It is unsupported.
Copyright 2009-2011 Marvin Humphrey
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| KinoSearch documentation | Contained in the KinoSearch distribution. |
use strict; use warnings; package KSx::Index::ByteBufDocWriter; use base qw( KinoSearch::Index::DataWriter ); use Carp; use Scalar::Util qw( blessed ); use bytes; no bytes; # Inside-out member vars. our %field; our %width; our %outstream; sub new { my ( $either, %args ) = @_; my $width = delete $args{width}; my $field = delete $args{field}; my $self = $either->SUPER::new(%args); confess("Missing required param 'width'") unless defined $width; confess("Missing required param 'field'") unless defined $field; if ( $width < 1 ) { confess("'width' must be at least 1") } $field{$$self} = $field; $width{$$self} = $width; return $self; } sub _lazy_init { my $self = shift; # Get outstream. Skip past non-doc #0. my $folder = $self->get_folder; my $filename = $self->get_segment->get_name . "/bytebufdocs.dat"; my $outstream = $outstream{$$self} = $folder->open_out($filename) or confess KinoSearch->error; my $nulls = "\0" x $width{$$self}; $outstream->print($nulls); return $outstream; } sub add_inverted_doc { my ( $self, %args ) = @_; my $outstream = $outstream{$$self} || _lazy_init($self); my $fields = $args{inverter}->get_doc->get_fields; my $width = $width{$$self}; my $field = $field{$$self}; if ( bytes::length( $fields->{$field} ) != $width ) { confess("Width of '$fields->{$field}' not $width"); } $outstream->print( $fields->{$field} ); } sub add_segment { my ( $self, %args ) = @_; my $seg_reader = $args{reader}; my $doc_map = $args{doc_map}; my $doc_max = $seg_reader->doc_max; # Bail if the supplied segment is empty. */ return unless $doc_max; my $outstream = $outstream{$$self} || _lazy_init($self); my $doc_reader = $seg_reader->obtain("KinoSearch::Index::DocReader"); confess("Not a ByteBufDocReader") unless ( blessed($doc_reader) and $doc_reader->isa("KSx::Index::ByteBufDocReader") ); for ( my $i = 1; $i <= $doc_max; $i++ ) { next unless $doc_map->get($i); my $buf; $doc_reader->read_record( $i, \$buf ); $outstream->print($buf); } } sub finish { my $self = shift; my $outstream = $outstream{$$self}; if ($outstream) { $outstream->close; my $segment = $self->get_segment; $segment->store_metadata( key => 'bytebufdocs', metadata => $self->metadata ); } } sub format {1} sub DESTROY { my $self = shift; delete $field{$$self}; delete $width{$$self}; delete $outstream{$$self}; $self->SUPER::DESTROY; } 1; __END__ __POD__