| SWISH-Prog documentation | Contained in the SWISH-Prog distribution. |
SWISH::Prog::Indexer - base indexer class
use SWISH::Prog::Indexer;
my $indexer = SWISH::Prog::Indexer->new(
invindex => SWISH::Prog::InvIndex->new,
config => SWISH::Prog::Config->new,
count => 0,
clobber => 1,
flush => 10000,
started => time()
);
$indexer->start;
for my $doc (@list_of_docs) {
$indexer->process($doc);
}
$indexer->finish;
SWISH::Prog::Indexer is a base class implementing the simplest of indexing APIs. It is intended to be subclassed, along with InvIndex, for each IR backend library.
Constructor. See the SYNOPSIS for default options.
params may include the following keys, each of which is also an accessor method:
Overrite any existing InvIndex.
A SWISH::Prog::Config object or file name.
The number of indexed docs at which in-memory changes should be written to disk.
A SWISH::Prog::InvIndex object.
Dry run mode, just prints info on stderr but does not build index.
Override base method to initialize object.
Opens the invindex() objet and sets the started() time to time().
doc should be a SWISH::Prog::Doc-derived object.
process() should implement whatever the particular IR library API requires.
Closes the invindex().
Returns the number of documents processed.
The time at which the Indexer object was created. Returns a Unix epoch integer.
Peter Karman, <perl@peknet.com>
Please report any bugs or feature requests to bug-swish-prog at rt.cpan.org, or through
the web interface at http://rt.cpan.org/NoAuth/ReportBug.html?Queue=SWISH-Prog.
I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.
You can find documentation for this module with the perldoc command.
perldoc SWISH::Prog
You can also look for information at:
Copyright 2008-2009 by Peter Karman
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| SWISH-Prog documentation | Contained in the SWISH-Prog distribution. |
package SWISH::Prog::Indexer; use strict; use warnings; use base qw( SWISH::Prog::Class ); use Scalar::Util qw( blessed ); use Carp; use Data::Dump qw( dump ); use SWISH::Prog::Config; our $VERSION = '0.51'; __PACKAGE__->mk_accessors( qw( invindex config count clobber flush started test_mode ));
sub init { my $self = shift; $self->SUPER::init(@_); if ( exists $self->{config} and defined $self->{config} and !blessed( $self->{config} ) and $self->{config} !~ m/<swish>|\.xml$/ ) { $self->{config} = $self->verify_isa_swish_prog_config( $self->{config} ); } $self->{config} ||= SWISH::Prog::Config->new; return $self; }
sub start { my $self = shift; if ( !defined $self->invindex ) { croak "Missing invindex object"; } if ( !blessed( $self->invindex ) or !$self->invindex->can('open') ) { croak "Invalid invindex: either not blessed object or does not implement 'open' method"; } $self->invindex->open; $self->{started} = time(); }
sub process { my $self = shift; my $doc = shift; unless ( $doc && blessed($doc) && $doc->isa('SWISH::Prog::Doc') ) { croak "SWISH::Prog::Doc object required"; } $self->start unless $self->started; $self->{count}++; return $doc; }
sub finish { my $self = shift; $self->invindex->close; }
# NOTE in _verify_swish3_config() below, # if config is already in swish3 format, must # override param value with SWISH::Prog::Config object # after adding to SWISH::3::Config object so that the # aggregator using this Indexer is happy. sub _verify_swish3_config { my $self = shift; if ( !exists $self->{config} ) { return; } #carp dump $self->{config}; # isa object if ( blessed( $self->{config} ) ) { $self->{config} = $self->verify_isa_swish_prog_config( $self->{config} ); my $swish_3_config = $self->{config}->ver2_to_ver3(); $self->{s3}->config->add($swish_3_config); } # xml string elsif ( $self->{config} =~ m/<swish>|[\n\r]/ ) { $self->{s3}->config->add( $self->{config} ); $self->{config} = SWISH::Prog::Config->new(); } # file elsif ( -r $self->{config} ) { # swish3 format if ( $self->{config} =~ m/\.xml/ ) { $self->{s3}->config->add( $self->{config} ); $self->{config} = SWISH::Prog::Config->new(); } # swish2 format else { $self->{config} = $self->verify_isa_swish_prog_config( $self->{config} ); my $swish_3_config = $self->{config}->ver2_to_ver3(); $self->{s3}->config->add($swish_3_config); } } # no support else { croak "Unsupported config format (not a XML string, filename or SWISH::Prog::Config object): $self->{config}"; } return $self->{config}; } 1; __END__