| Bundle-WWW-Scraper-Auction documentation | Contained in the Bundle-WWW-Scraper-Auction distribution. |
WWW::Scraper::eBay - Scrapes www.eBay.com
require WWW::Scraper;
$search = new WWW::Scraper('eBay');
This class is an eBay extension of WWW::Scraper. It handles making and interpreting eBay searches http://www.eBay.com.
WWW::Scraper::eBay is written and maintained
by Glenn Wood, http://search.cpan.org/search?mode=author&query=GLENNWOOD.
Copyright (c) 2001 Glenn Wood All rights reserved.
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| Bundle-WWW-Scraper-Auction documentation | Contained in the Bundle-WWW-Scraper-Auction distribution. |
package WWW::Scraper::eBay; use strict; use vars qw($VERSION @ISA); @ISA = qw(WWW::Scraper); $VERSION = sprintf("%d.%02d", q$Revision: 1.01 $ =~ /(\d+)\.(\d+)/); use WWW::Scraper(qw(2.27 generic_option addURL trimTags trimLFs)); my $scraperRequest = { 'type' => 'FORM' ,'formNameOrNumber' => 'search_form' ,'submitButton' => undef # This is the basic URL on which to build the query. ,'url' => 'http://pages.ebay.com/search/items/basicsearch.html' # This is the Scraper attributes => native input fields mapping ,'nativeQuery' => 'query' ,'nativeDefaults' => { 'query' => undef } ,'fieldTranslations' => { '*' => { '*' => '*' } } # Some more options for the Scraper operation. ,'cookies' => 0 }; my $scraperFrame = [ 'HTML', [ [ 'COUNT', '([,0-9]+)</b>\s+items found\s+for'] ,[ 'BODY', '</form>', undef, [ #[ 'NEXT', 2, \&findNextForm ] # it used to be a form . . . [ 'NEXT', 1, 'Next >' ] ,[ 'BODY', '<!-- eBayCacheStart -->', '<!-- eBayCacheEnd -->', [ [ 'TABLE', '#0' ] ,[ 'HIT*' , 'Auction', [ #try again! [ 'TRYUNTIL', 2, 'url', [ [ 'TABLE', [ [ 'TR', [ # <img height="15" width="64" border="0" alt="Pic" src="http://pics.ebay.com/aw/pics/lst/_p__64x15.gif"> #[ 'TD',[ [ 'REGEX', '<img\s+.*?src=([^ >)', 'thumbNailUrl'] ] ] [ 'TD' ] # The thumbnail url is in there somewhere! ,[ 'TD',[ [ 'A', 'url', 'title' ] ] ] ,[ 'TD', 'price', \&parsePrice ] ,[ 'TD', 'bids', \&trimLFs ] ,[ 'TD', 'endsPDT', \&trimLFs ] # this regex never matches; just lets us declare fields. #,[ 'REGEX', 'neverMatch', 'isNew', 'itemNumber' ] #, 'isBillpoint'] ] ] ] ] #try again! ] ] ] ] ,[ 'TABLE', '#2' ] ,[ 'HIT*' , 'Auction', [ #try again! [ 'TRYUNTIL', 2, 'url', [ [ 'TABLE', [ [ 'TR', [ # <img height="15" width="64" border="0" alt="Pic" src="http://pics.ebay.com/aw/pics/lst/_p__64x15.gif"> #[ 'TD',[ [ 'REGEX', '<img\s+.*?src=([^ >)', 'thumbNailUrl'] ] ] [ 'TD' ] # The thumbnail url is in there somewhere! ,[ 'TD',[ [ 'A', 'url', 'title' ] ] ] ,[ 'TD', 'price', \&parsePrice ] ,[ 'TD', 'bids', \&trimLFs ] ,[ 'TD', 'endsPDT', \&trimLFs ] # this regex never matches; just lets us declare fields. #,[ 'REGEX', 'neverMatch', 'isNew', 'itemNumber' ] #, 'isBillpoint'] ] ] ] ] #try again! ] ] ] ] #,[ 'BOGUS', -2 ] # eBay's last 2 hits are bogus ("return to top", etc.). ] ] ] ] ] ]; sub testParameters { return { 'SKIP' => '' ,'TODO' => "Implement 'TRYUNTIL' Scraper frame option - helps for skipping 'hits' that aren't actually hits." ,'testNativeQuery' => 'turntable' ,'expectedOnePage' => 9 ,'expectedMultiPage' => 25 ,'expectedBogusPage' => 0 }; } # Access methods for the structural declarations of this Scraper engine. sub scraperRequest { $scraperRequest } sub scraperFrame { $_[0]->SUPER::SetScraperFrame($scraperFrame); return $scraperFrame } sub scraperDetail{ undef } my $defaultScraperForm_url = ['http://pages.ebay.com/search/items/search.html', 0, 'query', undef]; sub import { my $package = shift; my @exports = grep { "HASH" ne ref($_) } @_; my @options = grep { "HASH" eq ref($_) } @_; foreach (@options) { if ( $_->{'scraperBaseURL'} ) { $scraperRequest->{'url'} = $_->{'scraperBaseURL'}; # new form $$defaultScraperForm_url[0] = $_->{'scraperBaseURL'}; # old form } } @_ = ($package, @exports); goto &Exporter::import; } # Sometimes there's just a NEXT form, sometimes there's a PREV form and a NEXT form . . . sub findNextForm { my ($self, $hit, $dat) = @_; my $next_content = $dat; while ( my ($sub_content, $frm) = $self->getMarkedText('FORM', \$next_content) ) { last unless $sub_content; # Reconstruct the form that contains the NEXT data. my @forms = HTML::Form->parse("<form $frm>$sub_content</form>", $self->{'_base_url'}); my $form = $forms[0]; my $submit_button; for ( $form->inputs() ) { if ( $_->value() eq 'Next' ) { $submit_button = $_; last; } } if ( $submit_button ) { my $req = $submit_button->click($form); # return $req->uri(); } } return ''; } # eBay's title sometimes includes other things, such as "new" link and "billpoint" link #<td valign=top width=52%><font size=3><a href="http://cgi.ebay.com/ws/eBayISAPI.dll?ViewItem&item=1383008995">UNITED AUDIO TURNTABLE DUAL 1209 MODEL</a></font> #<BR><img height=1 width=200 border=0 alt="" src="http://pics.ebay.com/aw/pics/s.gif"></td> sub parseItemTitle { my ($self, $hit, $dat) = @_; my $next_content = $dat; my ($sub_content, $frm); my ($isNew, $isBillpoint) = (0,0); while ( ($sub_content, $frm) = $self->getMarkedText('A', \$next_content) ) { last unless $sub_content; $isNew |= ($sub_content =~ m{alt="New!"})?1:0; $isBillpoint |= ($sub_content =~ m{alt="eBay Online Payments by Billpoint"})?1:0; last unless $sub_content =~ m{<img}i; } $hit->plug_elem('title', $sub_content); $hit->plug_elem('isNew', $isNew); # $hit->plug_elem('isBillpoint', $isBillpoint); # need to match Billpoint *after* matching title. my $url = $frm; $url =~ s{a\s+href=(['"])(.*)$1}{$2}; $url =~ m{item=(\d+)$}; $hit->plug_elem('itemNumber', $1); return $url; } # eBay's price sometimes contains multiple values ("Buy it Now") sub parsePrice { my ($self, $hit, $dat) = @_; for my $price ( split /<BR>/, $dat) { $price = $self->trimLFs($hit, $price); next unless $price; $hit->plug_elem('price', $price); } return undef; # we already plugged the values into the $hit. } 1; __END__
#####################################################################