| WWW-Scraper-ISBN-AmazonDE_Driver documentation | Contained in the WWW-Scraper-ISBN-AmazonDE_Driver distribution. |
WWW::Scraper::ISBN::AmazonDE_Driver - Search driver for the (DE) Amazon online catalog.
Version 0.21
See parent class documentation (WWW::Scraper::ISBN::Driver)
Searches for book information from the (DE) Amazon online catalog. This module is a mere paste and translation of WWW::Scraper::ISBN::AmazonFR_Driver.
Renee Baecker, <module at renee-baecker.de>
Please report any bugs or feature requests to
bug-www-scraper-isbn-amazonde_driver at rt.cpan.org, or through the web interface at
http://rt.cpan.org/NoAuth/ReportBug.html?Queue=WWW::Scraper::ISBN::AmazonDE_Driver.
I will be notified, and then you'll automatically be notified of progress on
your bug as I make changes.
You can find documentation for this module with the perldoc command.
perldoc WWW::Scraper::ISBN::AmazonDE_Driver
You can also look for information at:
http://annocpan.org/dist/WWW::Scraper::ISBN::AmazonDE_Driver
http://cpanratings.perl.org/d/WWW::Scraper::ISBN::AmazonDE_Driver
http://rt.cpan.org/NoAuth/Bugs.html?Dist=WWW::Scraper::ISBN::AmazonDE_Driver
http://search.cpan.org/dist/WWW-Scraper-ISBN-AmazonDE_Driver
Copyright 2007 - 2010 Renee Baecker, all rights reserved.
This program is free software; you can redistribute it and/or modify it under the terms of Artistic License 2.0.
| WWW-Scraper-ISBN-AmazonDE_Driver documentation | Contained in the WWW-Scraper-ISBN-AmazonDE_Driver distribution. |
package WWW::Scraper::ISBN::AmazonDE_Driver; use warnings; use strict; use WWW::Scraper::ISBN::Driver; use base qw(WWW::Scraper::ISBN::Driver); use WWW::Mechanize; use Web::Scraper; use constant AMAZON => 'http://www.amazon.de/'; use constant SEARCH => 'http://www.amazon.de/'; use constant DIRECT => 'http://www.amazon.de/gp/product/'; our $DEBUG = $ENV{ISBN_DRIVER_DEBUG};
our $VERSION = '0.21';
sub search { my ($self,$isbn) = @_; $self->found(0); $self->book(undef); my $mechanize = WWW::Mechanize->new(); $mechanize->agent_alias( 'Linux Mozilla' ); # $mechanize->get( SEARCH ); # return $self->handler('Error loading amazon.de form web page (unreachable?)') # unless($mechanize->success()); # my ($index,$input) = (0,0); # $mechanize->form_name('site-search') # or return $self->handler('Error parsing amazon.de form'); # my $keyword ='search-alias=stripbooks'; # $mechanize->set_fields( # 'field-keywords' => $isbn, # 'url' => $keyword # ); # $mechanize->submit(); # return $self->handler('Error about form submission (form changed?)') # unless($mechanize->success()); (my $norm_isbn = $isbn) =~ s/[^0-9]//g; my $url = DIRECT . $norm_isbn; $mechanize->get( $url ); return $self->handler( "No success when trying to get $url" ) unless $mechanize->success; my $content = $mechanize->content(); #$DEBUG and warn $content; my $scraper = scraper { process "title" , title => 'TEXT'; process "meta[name='description']" , content => '@content'; process 'script' , 'scripts[]' => sub { my $script = join '', @{$_->content_array_ref}; $script =~ /registerImage\("original_image"/ ? $script : (); }; }; my $sresult = $scraper->scrape( $content ); my ($thumb,$image) = $sresult->{scripts}->[0] =~ /original_image","([^"]+)"\s*,\s*"<a \s href="\+'"'\+"([^"]*)"/; my ($pub) = $content =~ m{<li><b>Verlag:</b>\s*(.*?)</li>}msx; my $data = { content => $sresult->{content}, thumb_link => $thumb, image_link => $image, published => $pub, title => $sresult->{title}, }; return $self->handler("Could not extract data from amazon.de result page.") unless(defined $data); # trim top and tail foreach (keys %$data) { next unless defined $data->{$_}; $data->{$_} =~ s/^\s+//; $data->{$_} =~ s/\s+$//; } # ($data->{title},$data->{author}) = # ($data->{content} =~ # / # Amazon.de\s*:\s* # (.+?) # \s*:\s*([^:]+)\s*: # /x); # #\s*(?:(?:English\sBooks?)|Bücher|B&uuml;cher|Bücher).* # #$data->{title} =~ s!\(.*?\)$!!; my @tmp_info = split /:/, $data->{content}; @{ $data }{ qw/title author/ } = map{ s/^\s*//; $_ }@tmp_info[0,-2]; #my @tmp_info = split /:/, $data->{content}; #@{ $data }{ qw/title author/ } = map{ s/^\s*//; $_ }@tmp_info[0,-3]; ($data->{publisher},$data->{pubdate}) = ($data->{published} =~ /\s*(.*?)(?:;.*?)?\s+\(([^)]*)/); my $bk = { 'isbn' => $isbn, 'author' => $data->{author}, 'title' => $data->{title}, 'image_link' => $data->{image_link}, 'thumb_link' => $data->{thumb_link}, 'publisher' => $data->{publisher}, 'pubdate' => $data->{pubdate}, 'book_link' => $mechanize->uri() }; $self->book($bk); $self->found(1); return $self->book; }
1; # End of WWW::Scraper::ISBN::AmazonDE_Driver