WWW::Scraper::ISBN::AmazonDE_Driver - Search driver for the (DE) Amazon online catalog.


WWW-Scraper-ISBN-AmazonDE_Driver documentation Contained in the WWW-Scraper-ISBN-AmazonDE_Driver distribution.

Index


Code Index:

NAME

Top

WWW::Scraper::ISBN::AmazonDE_Driver - Search driver for the (DE) Amazon online catalog.

VERSION

Top

Version 0.21

SYNOPSIS

Top

See parent class documentation (WWW::Scraper::ISBN::Driver)

DESCRIPTION

Top

Searches for book information from the (DE) Amazon online catalog. This module is a mere paste and translation of WWW::Scraper::ISBN::AmazonFR_Driver.

FUNCTIONS

Top

AUTHOR

Top

Renee Baecker, <module at renee-baecker.de>

BUGS

Top

Please report any bugs or feature requests to bug-www-scraper-isbn-amazonde_driver at rt.cpan.org, or through the web interface at http://rt.cpan.org/NoAuth/ReportBug.html?Queue=WWW::Scraper::ISBN::AmazonDE_Driver. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.

SUPPORT

Top

You can find documentation for this module with the perldoc command.

    perldoc WWW::Scraper::ISBN::AmazonDE_Driver

You can also look for information at:

* AnnoCPAN: Annotated CPAN documentation

http://annocpan.org/dist/WWW::Scraper::ISBN::AmazonDE_Driver

* CPAN Ratings

http://cpanratings.perl.org/d/WWW::Scraper::ISBN::AmazonDE_Driver

* RT: CPAN's request tracker

http://rt.cpan.org/NoAuth/Bugs.html?Dist=WWW::Scraper::ISBN::AmazonDE_Driver

* Search CPAN

http://search.cpan.org/dist/WWW-Scraper-ISBN-AmazonDE_Driver

ACKNOWLEDGEMENTS

Top

COPYRIGHT & LICENSE

Top


WWW-Scraper-ISBN-AmazonDE_Driver documentation Contained in the WWW-Scraper-ISBN-AmazonDE_Driver distribution.
package WWW::Scraper::ISBN::AmazonDE_Driver;

use warnings;
use strict;

use WWW::Scraper::ISBN::Driver;
use base qw(WWW::Scraper::ISBN::Driver);
use WWW::Mechanize;
use Web::Scraper;

use constant    AMAZON => 'http://www.amazon.de/';
use constant    SEARCH => 'http://www.amazon.de/';
use constant    DIRECT => 'http://www.amazon.de/gp/product/';

our $DEBUG = $ENV{ISBN_DRIVER_DEBUG};

our $VERSION = '0.21';

sub search {
    my ($self,$isbn) = @_;
    
    $self->found(0);
    $self->book(undef);

    my $mechanize = WWW::Mechanize->new();
    $mechanize->agent_alias( 'Linux Mozilla' );

#    $mechanize->get( SEARCH );
#    return    $self->handler('Error loading amazon.de form web page (unreachable?)')
#        unless($mechanize->success());
#
    my ($index,$input) = (0,0);

#    $mechanize->form_name('site-search')
#        or return $self->handler('Error parsing amazon.de form');

#    my $keyword ='search-alias=stripbooks';
#    $mechanize->set_fields( 
#        'field-keywords' => $isbn, 
#        'url'            => $keyword 
#    );
#    $mechanize->submit();

#    return    $self->handler('Error about form submission (form changed?)') 
#        unless($mechanize->success());

    (my $norm_isbn = $isbn) =~ s/[^0-9]//g;
    my $url = DIRECT . $norm_isbn;
    $mechanize->get( $url );

    return $self->handler( "No success when trying to get $url" )
        unless $mechanize->success;

    my $content = $mechanize->content();

    #$DEBUG and warn $content;
    
    my $scraper = scraper {
        process "title"                    , title       => 'TEXT';
        process "meta[name='description']" , content     => '@content';
        process 'script'                   , 'scripts[]' => sub { 
                my $script = join '', @{$_->content_array_ref};
                $script =~ /registerImage\("original_image"/ ? $script : ();
            };
    };
    
    my $sresult = $scraper->scrape( $content );
    
    my ($thumb,$image) = $sresult->{scripts}->[0] =~ /original_image","([^"]+)"\s*,\s*"<a \s href="\+'"'\+"([^"]*)"/;
    my ($pub) = $content =~ m{<li><b>Verlag:</b>\s*(.*?)</li>}msx;

    my $data = {
        content    => $sresult->{content},
        thumb_link => $thumb,
        image_link => $image,
        published  => $pub,
        title      => $sresult->{title},
    };

    return $self->handler("Could not extract data from amazon.de result page.")
        unless(defined $data);

    # trim top and tail
    foreach (keys %$data) { 
        next unless defined $data->{$_};
        $data->{$_} =~ s/^\s+//;
        $data->{$_} =~ s/\s+$//;
    }

#    ($data->{title},$data->{author}) = 
#        ($data->{content} =~ 
#                  /
#                  Amazon.de\s*:\s*
#                  (.+?)
#                  \s*:\s*([^:]+)\s*:
#                  /x);
#                  #\s*(?:(?:English\sBooks?)|Bücher|B&amp;uuml;cher|B&uuml;cher).*
#    #$data->{title} =~ s!\(.*?\)$!!;

     my @tmp_info = split /:/, $data->{content};
     @{ $data }{ qw/title author/ } = map{ s/^\s*//; $_ }@tmp_info[0,-2];
     #my @tmp_info = split /:/, $data->{content};
     #@{ $data }{ qw/title author/ } = map{ s/^\s*//; $_ }@tmp_info[0,-3];

    ($data->{publisher},$data->{pubdate}) = 
        ($data->{published} =~ /\s*(.*?)(?:;.*?)?\s+\(([^)]*)/);

    my $bk = {
        'isbn'        => $isbn,
        'author'      => $data->{author},
        'title'       => $data->{title},
        'image_link'  => $data->{image_link},
        'thumb_link'  => $data->{thumb_link},
        'publisher'   => $data->{publisher},
        'pubdate'     => $data->{pubdate},
        'book_link'   => $mechanize->uri()
    };
    
    $self->book($bk);
    $self->found(1);
    return $self->book;
}

1; # End of WWW::Scraper::ISBN::AmazonDE_Driver