Plagger::Plugin::Search::Rast - Search Feed updates by Rast


Plagger documentation Contained in the Plagger distribution.

Index


Code Index:

NAME

Top

Plagger::Plugin::Search::Rast - Search Feed updates by Rast

SYNOPSIS

Top

  - module: Search::Rast
    config:
      encode: euc_jp
      replace: 1
      dir: /home/yappo/plagger-rast

DESCRIPTION

Top

This plugin indexes feed entries to Rast, the embedded search engine written in C. It requires to install Rast.pm module via http://tech.yappo.jp/rast/

AUTHOR

Top

Kazuhiro Osawa

SEE ALSO

Top

Plagger, http://projects.netlab.jp/rast/, http://tech.yappo.jp/rast/


Plagger documentation Contained in the Plagger distribution.

package Plagger::Plugin::Search::Rast;
use strict;
use base qw( Plagger::Plugin );

use Encode ();
use POSIX;
use Rast;

sub register {
    my($self, $context) = @_;
    $context->register_hook(
        $self,
        'publish.feed'     => \&feed,
        'publish.finalize' => \&finalize,
        'searcher.search'  => \&search,
    );
}


sub init {
    my($self) = @_;
    $self->SUPER::init(@_);

    my $dir = $self->conf->{dir};
    $self->{encode} = $self->conf->{encode} eq 'euc_jp' ? 'euc_jp' : 'utf8';

    unless (-e $dir && -d _) {
	my $ret = Rast->create($dir, {
	    encoding => $self->{encode},
	    preserve_text => 1,
	    properties => [
                   [
                    'feedlink',
                    RAST_TYPE_STRING,
                    RAST_PROPERTY_FLAG_SEARCH | RAST_PROPERTY_FLAG_TEXT_SEARCH
                    ],
                   [
                    'permalink',
                    RAST_TYPE_STRING,
                    RAST_PROPERTY_FLAG_SEARCH | RAST_PROPERTY_FLAG_TEXT_SEARCH
                    ],
                   [
                    'title',
                    RAST_TYPE_STRING,
                    RAST_PROPERTY_FLAG_TEXT_SEARCH | RAST_PROPERTY_FLAG_FULL_TEXT_SEARCH
                    ],
                   [
                    'author',
                    RAST_TYPE_STRING,
                    RAST_PROPERTY_FLAG_SEARCH | RAST_PROPERTY_FLAG_TEXT_SEARCH
                    ],
                   [
                    'date',
                    RAST_TYPE_DATE,
                    RAST_PROPERTY_FLAG_SEARCH
                    ],
                   [
                    'tags',
                    RAST_TYPE_STRING,
                    RAST_PROPERTY_FLAG_TEXT_SEARCH
                    ]
                   ],
                });
	unless ($ret) {
	    Plagger->context->error("create index error $dir");
	    return;
	}
	Plagger->context->log(info => "create index $dir");
    }
    $self->{rast} = Rast->open($dir, RAST_DB_RDWR);
}

sub feed {
    my($self, $context, $args) = @_;
 
    my $rast = $self->{rast};
    return unless $rast;
    my $dir = $self->conf->{dir};

    my $feed = $args->{feed};
    for my $entry ($feed->entries) {
	next unless $entry->text;

	my $result = $rast->search('feedlink = ' . $feed->link . ' & permalink = ' . $entry->permalink, {
	    need_summary => 1,
	    properties => ['permalink']
	    });
	unless ($result) {
	    $context->error('search error ' . $entry->permalink);
	    return;
	}

	my $tags;
	my $time = eval { $entry->date->epoch } || time;
	my $options = [ 
			$feed->link, 
			$entry->permalink, 
			$self->encode($entry->title) || '', 
			$self->encode($entry->author) || '', 
			POSIX::strftime('%Y-%m-%dT%H:%M:%S', localtime($time)),
			$self->encode(join(' ', @{ $entry->tags }))
			];

	my $text = $self->encode($entry->text);
	unless ($result->hit_count) {
	    my $id = $rast->register($text, $options);
	    $context->log(info => "add new docid = $id: " . $entry->permalink);
	} elsif ($self->conf->{replace}) {
	    my $row = $result->fetch;
	    my $id = $rast->update($text, $options, $row->{doc_id});
	    $context->log(info => "replace: old docid = " . $row->{doc_id} . " to new docid = $id: " . $entry->permalink);
	}
    }
}

sub encode {
    my ($self, $str) = @_;
    utf8::decode($str) unless utf8::is_utf8($str);
    return Encode::encode($self->{encode}, $str);
}

sub decode {
    my ($self, $str) = @_;
    return Encode::decode($self->{encode}, $str);
}

sub finalize {
    my($self, $context) = @_;
    return unless $self->{rast};
    $self->{rast}->close;
}

sub search {
    my($self, $context, $args) = @_;

    my $result = $self->{rast}->search($self->encode($args->{query}), {
        need_summary => 1,
        properties => [qw/feedlink permalink title author/],
    });

    my $feed = Plagger::Feed->new;
    $feed->type('search:Rast');
    $feed->title("Search: $args->{query}");

    while (my $row = $result->fetch) {
        my $entry = Plagger::Entry->new;

        $entry->link($row->{properties}->[0]);
        $entry->permalink($row->{properties}->[1]);
        $entry->title($self->decode($row->{properties}->[2])); 
        $entry->author($self->decode($row->{properties}->[3])) ;
        $entry->body($self->decode($row->{summary}));
        $feed->add_entry($entry);
    }

    return $feed;
}

1;

__END__