App::Hachero::Plugin::Summarize::Scraper - gets title or something via Web::Scraper


App-Hachero documentation Contained in the App-Hachero distribution.

Index


Code Index:

NAME

Top

App::Hachero::Plugin::Summarize::Scraper - gets title or something via Web::Scraper

SYNOPSIS

Top

  ---
  plugins:
    - module: Summarize::Scraper
      config:
        result_key: URI
        result:
            uri_from: path
            result_to: title
        uri:
            host: 'www.example.com'
        scraper:
            process: 
                - '//title'
                - 'title'
                - 'TEXT'
            result: 'title'

DESCRIPTION

Top

gets title or something via Web::Scraper

implemented hooks

* initialize
* summarize

AUTHOR

Top

Nobuo Danjou <nobuo.danjou@gmail.com>

SEE ALSO

Top

Web::Scraper

App::Hachero

App::Hachero::Result


App-Hachero documentation Contained in the App-Hachero distribution.

package App::Hachero::Plugin::Summarize::Scraper;
use strict;
use warnings;
use base qw(App::Hachero::Plugin::Base);
use URI;
use Web::Scraper;

sub summarize :Hook {
    my ($self, $context) = @_;

    my $config = $self->config->{config};
    my $scraper = scraper {
        process @{$config->{scraper}->{process}};
        result $config->{scraper}->{result};
    };

    for my $r ($context->result->{$config->{result_key}}->values) {
        my $uri = $r->{$config->{result}->{uri_from}};
        $uri = URI->new($uri) unless ref $uri;
        for my $meth (keys %{$config->{uri}}) {
            $uri->$meth($config->{uri}->{$meth});
        }
        my $result = eval {$scraper->scrape($uri)} or next;
        $r->{$config->{result}->{result_to}} = $result;
    }
}

1;
__END__