| Plagger documentation | Contained in the Plagger distribution. |
Plagger::Plugin::Search::Rast - Search Feed updates by Rast
- module: Search::Rast
config:
encode: euc_jp
replace: 1
dir: /home/yappo/plagger-rast
This plugin indexes feed entries to Rast, the embedded search engine written in C. It requires to install Rast.pm module via http://tech.yappo.jp/rast/
Kazuhiro Osawa
| Plagger documentation | Contained in the Plagger distribution. |
package Plagger::Plugin::Search::Rast; use strict; use base qw( Plagger::Plugin ); use Encode (); use POSIX; use Rast; sub register { my($self, $context) = @_; $context->register_hook( $self, 'publish.feed' => \&feed, 'publish.finalize' => \&finalize, 'searcher.search' => \&search, ); } sub init { my($self) = @_; $self->SUPER::init(@_); my $dir = $self->conf->{dir}; $self->{encode} = $self->conf->{encode} eq 'euc_jp' ? 'euc_jp' : 'utf8'; unless (-e $dir && -d _) { my $ret = Rast->create($dir, { encoding => $self->{encode}, preserve_text => 1, properties => [ [ 'feedlink', RAST_TYPE_STRING, RAST_PROPERTY_FLAG_SEARCH | RAST_PROPERTY_FLAG_TEXT_SEARCH ], [ 'permalink', RAST_TYPE_STRING, RAST_PROPERTY_FLAG_SEARCH | RAST_PROPERTY_FLAG_TEXT_SEARCH ], [ 'title', RAST_TYPE_STRING, RAST_PROPERTY_FLAG_TEXT_SEARCH | RAST_PROPERTY_FLAG_FULL_TEXT_SEARCH ], [ 'author', RAST_TYPE_STRING, RAST_PROPERTY_FLAG_SEARCH | RAST_PROPERTY_FLAG_TEXT_SEARCH ], [ 'date', RAST_TYPE_DATE, RAST_PROPERTY_FLAG_SEARCH ], [ 'tags', RAST_TYPE_STRING, RAST_PROPERTY_FLAG_TEXT_SEARCH ] ], }); unless ($ret) { Plagger->context->error("create index error $dir"); return; } Plagger->context->log(info => "create index $dir"); } $self->{rast} = Rast->open($dir, RAST_DB_RDWR); } sub feed { my($self, $context, $args) = @_; my $rast = $self->{rast}; return unless $rast; my $dir = $self->conf->{dir}; my $feed = $args->{feed}; for my $entry ($feed->entries) { next unless $entry->text; my $result = $rast->search('feedlink = ' . $feed->link . ' & permalink = ' . $entry->permalink, { need_summary => 1, properties => ['permalink'] }); unless ($result) { $context->error('search error ' . $entry->permalink); return; } my $tags; my $time = eval { $entry->date->epoch } || time; my $options = [ $feed->link, $entry->permalink, $self->encode($entry->title) || '', $self->encode($entry->author) || '', POSIX::strftime('%Y-%m-%dT%H:%M:%S', localtime($time)), $self->encode(join(' ', @{ $entry->tags })) ]; my $text = $self->encode($entry->text); unless ($result->hit_count) { my $id = $rast->register($text, $options); $context->log(info => "add new docid = $id: " . $entry->permalink); } elsif ($self->conf->{replace}) { my $row = $result->fetch; my $id = $rast->update($text, $options, $row->{doc_id}); $context->log(info => "replace: old docid = " . $row->{doc_id} . " to new docid = $id: " . $entry->permalink); } } } sub encode { my ($self, $str) = @_; utf8::decode($str) unless utf8::is_utf8($str); return Encode::encode($self->{encode}, $str); } sub decode { my ($self, $str) = @_; return Encode::decode($self->{encode}, $str); } sub finalize { my($self, $context) = @_; return unless $self->{rast}; $self->{rast}->close; } sub search { my($self, $context, $args) = @_; my $result = $self->{rast}->search($self->encode($args->{query}), { need_summary => 1, properties => [qw/feedlink permalink title author/], }); my $feed = Plagger::Feed->new; $feed->type('search:Rast'); $feed->title("Search: $args->{query}"); while (my $row = $result->fetch) { my $entry = Plagger::Entry->new; $entry->link($row->{properties}->[0]); $entry->permalink($row->{properties}->[1]); $entry->title($self->decode($row->{properties}->[2])); $entry->author($self->decode($row->{properties}->[3])) ; $entry->body($self->decode($row->{summary})); $feed->add_entry($entry); } return $feed; } 1; __END__