| SWISH-Prog documentation | Contained in the SWISH-Prog distribution. |
SWISH::Prog - information retrieval application framework
use SWISH::Prog;
my $program = SWISH::Prog->new(
invindex => 'path/to/myindex',
aggregator => 'fs',
indexer => 'native',
config => 'some/swish/config/file',
filter => sub { print $_[0]->url . "\n" },
);
$program->run('some/dir');
print $program->count . " documents indexed\n";
NOTE: As of version 0.20 this API has been completely redesigned from previous versions.
SWISH::Prog is a full-text search framework based on Swish-e. SWISH::Prog handles document and data aggregation and indexing.
The name "SWISH::Prog" comes from the Swish-e -S prog feature. "prog" is short for "program". SWISH::Prog makes it easy to write indexing and search programs.
The API is a work in progress and subject to change.
All of the following methods may be overridden when subclassing this module.
Overrides base SWISH::Prog::Class init() method.
Get the SWISH::Prog::Aggregator object. You should set this in new().
Execute the program. This is an alias for index().
Add items in collection to the invindex().
Returns the aggregator's config() object.
Returns the indexer's invindex.
Returns the indexer.
Returns the indexer's count. NOTE This is the number of documents actually indexed, not counting the number of documents considered and discarded by the aggregator. If you want the number of documents the aggregator looked at, regardless of whether they were indexed, use the aggregator's count() method.
Dry run mode, just prints info on stderr but does not build index. This flag is set in new() and passed to the indexer and aggregator.
Peter Karman, <perl@peknet.com>
Please report any bugs or feature requests to bug-swish-prog at rt.cpan.org, or through
the web interface at http://rt.cpan.org/NoAuth/ReportBug.html?Queue=SWISH-Prog.
I will be notified, and then you'll
automatically be notified of progress on your bug as I make changes.
You can find documentation for this module with the perldoc command.
perldoc SWISH::Prog
You can also look for information at:
Copyright 2008-2009 by Peter Karman
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
SWISH::Prog::Doc, SWISH::Prog::Headers, SWISH::Prog::Indexer, SWISH::Prog::InvIndex, SWISH::Prog::Utils, SWISH::Prog::Aggregator, SWISH::Prog::Config
| SWISH-Prog documentation | Contained in the SWISH-Prog distribution. |
package SWISH::Prog; use 5.008003; use strict; use warnings; use base qw( SWISH::Prog::Class ); use Carp; use Data::Dump qw( dump ); use Scalar::Util qw( blessed ); use SWISH::Prog::Config; use SWISH::Prog::InvIndex; our $VERSION = '0.51'; __PACKAGE__->mk_accessors(qw( aggregator test_mode )); # each $swishProg hasa aggregator, which hasa indexer and hasa invindex
# allow for short names. we map to class->new my %ashort = ( fs => 'SWISH::Prog::Aggregator::FS', mail => 'SWISH::Prog::Aggregator::Mail', mailfs => 'SWISH::Prog::Aggregator::MailFS', dbi => 'SWISH::Prog::Aggregator::DBI', spider => 'SWISH::Prog::Aggregator::Spider', object => 'SWISH::Prog::Aggregator::Object', ); my %ishort = ( native => 'SWISH::Prog::Native::Indexer', xapian => 'SWISH::Prog::Xapian::Indexer', ks => 'SWISH::Prog::KSx::Indexer', lucy => 'SWISH::Prog::Lucy::Indexer', dbi => 'SWISH::Prog::DBI::Indexer', ); sub init { my $self = shift; my %arg = @_; my $filter = delete $arg{filter}; # no such method. just convenience. $self->SUPER::init(%arg); # search mode requires only invindex if ( $self->{query} && !$self->{indexer} && !$self->{aggregator} ) { return; } # need to make sure we have an aggregator # indexer and/or config might already be set in aggregator # but if set here, we override. my ( $aggregator, $indexer ); # ok if undef my $config = $self->{config}; # get indexer $indexer = $self->{indexer} || 'native'; if ( $self->{aggregator} and blessed( $self->{aggregator} ) ) { $indexer = $self->{aggregator}->indexer; $config = $self->{aggregator}->config; } if ( !blessed($indexer) ) { if ( exists $ishort{$indexer} ) { $indexer = $ishort{$indexer}; } $self->debug and warn "creating indexer: $indexer"; eval "require $indexer"; if ($@) { croak "invalid indexer $indexer: $@"; } $indexer = $indexer->new( debug => $self->debug, invindex => $self->{invindex}, # may be undef verbose => $self->verbose, config => $config, # may be undef test_mode => $self->test_mode, ); } elsif ( !$indexer->isa('SWISH::Prog::Indexer') ) { croak "$indexer is not a SWISH::Prog::Indexer-derived object"; } $aggregator = $self->{aggregator} || 'fs'; if ( !blessed($aggregator) ) { if ( exists $ashort{$aggregator} ) { $aggregator = $ashort{$aggregator}; } $self->debug and warn "creating aggregator: $aggregator"; eval "require $aggregator"; if ($@) { croak "invalid aggregator $aggregator: $@"; } $aggregator = $aggregator->new( indexer => $indexer, debug => $self->debug, verbose => $self->verbose, test_mode => $self->test_mode, ); } elsif ( !$aggregator->isa('SWISH::Prog::Aggregator') ) { croak "$aggregator is not a SWISH::Prog::Aggregator-derived object"; } if ($filter) { $aggregator->set_filter($filter); } $self->{aggregator} = $aggregator; $self->{indexer} = $indexer; $indexer->{test_mode} = $self->{test_mode} unless exists $indexer->{test_mode}; $aggregator->{test_mode} = $self->{test_mode} unless exists $aggregator->{test_mode}; $self->debug and carp dump $self; return $self; }
*run = \&index;
sub index { my $self = shift; my $aggregator = $self->aggregator or croak 'aggregator required'; unless ( $aggregator->isa('SWISH::Prog::Aggregator') ) { croak "aggregator is not a SWISH::Prog::Aggregator"; } $aggregator->indexer->start; $aggregator->crawl(@_); $aggregator->indexer->finish; return $aggregator->indexer->count; }
sub config { my $self = shift; if ( $self->aggregator ) { return $self->aggregator->config; } if ( $self->indexer ) { return $self->indexer->config; } return undef; }
sub invindex { my $self = shift; if ( $self->aggregator ) { return $self->indexer->invindex; } return blessed( $self->{invindex} ) ? $self->{invindex} : SWISH::Prog::InvIndex->new( path => $self->{invindex} ); }
sub indexer { shift->aggregator->indexer; }
sub count { shift->indexer->count; }
1; __END__