| Bundle-WWW-Scraper-Job documentation | Contained in the Bundle-WWW-Scraper-Job distribution. |
WWW::Scraper::BAJobs - Scrapes BAJobs.com
require WWW::Scraper;
$search = new WWW::Scraper('BAJobs');
This class is an BAJobs specialization of WWW::Search. It handles making and interpreting BAJobs searches http://www.BAJobs.com.
This class exports no public interface; all interaction should be done through WWW::Search objects.
None at this time (2001.05.06)
Specifies who to query with the BAJobs protocol.
The default is at
http://www.BAJobs.com/cgi-bin/job-search.
WWW::Scraper::BAJobs is written and maintained
by Glenn Wood, http://search.cpan.org/search?mode=author&query=GLENNWOOD.
Copyright (c) 2001 Glenn Wood All rights reserved.
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| Bundle-WWW-Scraper-Job documentation | Contained in the Bundle-WWW-Scraper-Job distribution. |
package WWW::Scraper::BAJobs; ##################################################################### use strict; use vars qw($VERSION @ISA); @ISA = qw(WWW::Scraper); $VERSION = sprintf("%d.%02d", q$Revision: 1.00 $ =~ /(\d+)\.(\d+)/); use Carp (); use WWW::Scraper(qw(1.48 generic_option addURL trimTags)); use WWW::Scraper::FieldTranslation; use LWP::UserAgent; use HTML::Form; use HTTP::Cookies; # As of 2002.01.26, this is what BAJobs "Refine your search" <FORM> looks like. #<form action="/jobseeker/usersearch.jsp" method=post> # <input type="hidden" name="searchKeywordsMethod" value=1> # <input type="hidden" name="wholeWord" value="true"> # <input type="hidden" name="displayResultsPerPage" value="20"> # <input type="hidden" name="displaySortOrder" value="1"> # <input type="hidden" name="postingAge" value="7"> # <input type="hidden" name="countyList" value=""> # <input type="hidden" name="workTermTypeList" value=""> # <input type="hidden" name="jobPostingCategoryList" value=""> # <input type="hidden" name="industryCategoryList" value=""> # <p><b><font color=006699 face="arial,helvetica,sans-serif">Refine Your Search</font></b> # <br> # <input type=text name="searchKeywords" value=" Perl " size=40> <input type=submit value="Search"> #</form> my $scraperRequest = { 'type' => 'POST' # 'POST' - we used to use 'FORM', which works fine, too, but this way's a little faster. # This is the basic URL on which to build the query. ,'url' => 'http://www.bajobs.com/jobseeker/usersearch.jsp?' #,'url' => 'http://www.bajobs.com/jobseeker/search.jsp' # This one is the location of the <FORM> ,'nativeQuery' => 'searchKeywords' ,'nativeDefaults' => { 'searchKeywordsMethod' => 1 ,'wholeWord' => 'true' ,'displayResultsPerPage' => '100' ,'displaySortOrder' => 1 ,'postingAge' => '7' ,'countyList' => '' ,'workTermTypeList' => '' ,'jobPostingCategoryList' => '' ,'industryCategoryList' => '' } ,'defaultRequestClass' => 'Job' ,'fieldTranslations' => { '*' => { 'skills' => 'searchKeywords' ,'payrate' => undef ,'locations' => new WWW::Scraper::FieldTranslation('BAJobs', 'Job', 'locations') ,'*' => '*' } } # Some more options for the Scraper operation. ,'cookies' => 1 }; my $scraperFrame = [ 'HTML', [ [ 'COUNT', 'Job Postings.*?[- 0-9]+.*?of.*?<b>([,0-9]+)</b></font> total'] ,[ 'BODY', '<!-- top prev/next -->', '<!-- end top prev/next -->', [ [ 'NEXT', 1, '<b>NEXT</b>' ] ] #, \&fixNext ] ] ] ,[ 'BODY', '<!-- job list -->', '', [ [ 'TABLE', '#0' , [ [ 'TR' ] , # There's an actual title row! Imagine that! [ 'HIT*' , [ [ 'TR', [ [ 'TD', [ [ 'A', 'corpURL', 'corporateBackground' ] ] ] ,[ 'TD', 'postingDate' ] ,[ 'A', 'url', 'title' ] ,[ 'TD', 'company' ] ,[ 'TD', '_clear_gif_' ] ,[ 'TD', 'location' ] ] ] ] ] ] ] ] ] ] ]; sub init { my ($self) = @_; $self->searchEngineHome('http://www.BAJobs.com'); $self->searchEngineLogo('<IMG SRC="http://www.bajobs.com/graphics/bajlogo118x80.gif">'); return $self; } sub testParameters { my ($self) = @_; if ( ref $self ) { $self->{'isTesting'} = 1; } # 'POST' style scraperFrames can't be tested cause of a bug in WWW::Search(2.2[56]) ! my $isNotTestable = WWW::Scraper::isGlennWood()?'':''; return { 'SKIP' => $isNotTestable ,'testNativeQuery' => 'Sales' ,'expectedOnePage' => 9 ,'displayResultsPerPage' => 10 ,'expectedMultiPage' => 11 ,'expectedBogusPage' => 0 }; } # Access methods for the structural declarations of this Scraper engine. sub scraperRequest { $scraperRequest } sub scraperFrame { $_[0]->SUPER::scraperFrame($scraperFrame); } sub scraperDetail{ undef } 1;