WWW::Scraper::BAJobs - Scrapes BAJobs.com


Bundle-WWW-Scraper-Job documentation Contained in the Bundle-WWW-Scraper-Job distribution.

Index


Code Index:

NAME

Top

WWW::Scraper::BAJobs - Scrapes BAJobs.com

SYNOPSIS

Top

    require WWW::Scraper;
    $search = new WWW::Scraper('BAJobs');




DESCRIPTION

Top

This class is an BAJobs specialization of WWW::Search. It handles making and interpreting BAJobs searches http://www.BAJobs.com.

This class exports no public interface; all interaction should be done through WWW::Search objects.

OPTIONS

Top

None at this time (2001.05.06)

search_url=URL

Specifies who to query with the BAJobs protocol. The default is at http://www.BAJobs.com/cgi-bin/job-search.

search_debug, search_parse_debug, search_ref Specified at WWW::Search.

SEARCH FIELDS

Top

displayResultsPerPage - Results per Page

"5" => 5
"10" => 10
"20" => 20
"50" => 50
"100" => 100

postingAge - Age of Posting

"0" => any time
"1" => 1 day
"3" => 3 days
"7" => 1 week
"8" => 2 weeks
"10" => 1 month

workTermTypeIDs - Work Term

"1" => Full Time
"2" => Part Time
"3" => Contract
"4" => Temporary/Seasonal
"5" => Internship

countyIDs - Job Location-County

"0" => Any
"1" => Alameda
"2" => Contra Costa
"3" => Marin
"4" => Napa
"5" => San Benito
"6" => San Francisco
"7" => San Mateo
"8" => Santa Clara
"9" => Santa Cruz
"10" => Solano
"11" => Sonoma
"12" => Other

jobPostingCategoryIDs => Job Category

"0" => Any
"1" => Accounting/Finance
"2" => Administrative/Clerical
"3" => Advertising
"4" => Aerospace/Aviation
"5" => Agricultural
"6" => Architecture
"7" => Arts/Entertainment
"8" => Assembly
"9" => Audio/Visual
"10" => Automotive
"11" => Banking/Financial Services
"12" => Biotechnology
"13" => Bookkeeping
"14" => Business Development
"15" => Child Care Services
"16" => Colleges & Universities
"17" => Communications/Media
"18" => Computer
"19" => Computer - Hardware
"20" => Computer - Software
"21" => Construction
"22" => Consulting/Professional Services
"23" => Customer Service/Support
"24" => Data Entry/Processing
"25" => Education/Training
"26" => Engineering
"27" => Engineering - Civil
"28" => Engineering - Hardware
"29" => Engineering - Software
"30" => Environmental
"31" => Executive/Management
"32" => Fund Raising/Development
"33" => Government/Civil Service
"34" => Graphic Design
"35" => Health Care/Health Services
"36" => Hospitality/Tourism
"37" => Human Resources
"38" => Information Technology
"39" => Insurance
"40" => Internet/E-Commerce
"41" => Law Enforcement/Security
"43" => Maintenance/Custodial
"44" => Manufacturing
"45" => Marketing
"46" => Miscellaneous
"47" => Non-Profit
"48" => Pharmaceutical
"49" => Printing/Publishing
"50" => Property Management/Facilities
"51" => Public Relations
"74" => Purchasing
"52" => QA/QC
"53" => Radio/Television/Film/Video
"54" => Real Estate
"57" => Receptionist
"55" => Recruiting/Staffing
"56" => Research
"58" => Restaurant/Food Service
"59" => Retail
"60" => Sales
"61" => Sales - Inside/Telemarketing
"62" => Sales - Outside
"63" => Security/Investment
"64" => Shipping/Receiving
"65" => Social Work/Services
"66" => Technical Support
"67" => Telecommunications
"68" => Training
"69" => Transportation
"70" => Travel
"71" => Warehouse
"72" => Web Design
"73" => Writer

AUTHOR

Top

WWW::Scraper::BAJobs is written and maintained by Glenn Wood, http://search.cpan.org/search?mode=author&query=GLENNWOOD.

COPYRIGHT

Top


Bundle-WWW-Scraper-Job documentation Contained in the Bundle-WWW-Scraper-Job distribution.

package WWW::Scraper::BAJobs;

#####################################################################

use strict;
use vars qw($VERSION @ISA);
@ISA = qw(WWW::Scraper);
$VERSION = sprintf("%d.%02d", q$Revision: 1.00 $ =~ /(\d+)\.(\d+)/);

use Carp ();
use WWW::Scraper(qw(1.48 generic_option addURL trimTags));
use WWW::Scraper::FieldTranslation;

use LWP::UserAgent;
use HTML::Form;
use HTTP::Cookies;

# As of 2002.01.26, this is what BAJobs "Refine your search" <FORM> looks like.
#<form action="/jobseeker/usersearch.jsp" method=post>
#  <input type="hidden" name="searchKeywordsMethod" value=1>
#  <input type="hidden" name="wholeWord" value="true">
#  <input type="hidden" name="displayResultsPerPage" value="20">
#  <input type="hidden" name="displaySortOrder" value="1">
#  <input type="hidden" name="postingAge" value="7">
#  <input type="hidden" name="countyList" value="">
#  <input type="hidden" name="workTermTypeList" value="">
#  <input type="hidden" name="jobPostingCategoryList" value="">
#  <input type="hidden" name="industryCategoryList" value="">
#  <p><b><font color=006699 face="arial,helvetica,sans-serif">Refine Your Search</font></b>
#  <br>
#  <input type=text name="searchKeywords" value=" Perl " size=40> &nbsp; &nbsp; <input type=submit value="Search">
#</form>

my $scraperRequest = 
   { 
      'type' => 'POST'  # 'POST' - we used to use 'FORM', which works fine, too, but this way's a little faster.
     
     # This is the basic URL on which to build the query.
     ,'url' => 'http://www.bajobs.com/jobseeker/usersearch.jsp?'
     #,'url' => 'http://www.bajobs.com/jobseeker/search.jsp' # This one is the location of the <FORM>
     
     ,'nativeQuery' => 'searchKeywords'
     
     ,'nativeDefaults' =>
                            {
                                 'searchKeywordsMethod' => 1
                                ,'wholeWord' => 'true'
                                ,'displayResultsPerPage' => '100'
                                ,'displaySortOrder' => 1
                                ,'postingAge' => '7'
                                ,'countyList' => ''
                                ,'workTermTypeList' => ''
                                ,'jobPostingCategoryList' => ''
                                ,'industryCategoryList' => ''
                            }
     ,'defaultRequestClass' => 'Job'
     ,'fieldTranslations' =>
                      { '*' => 
                              {    'skills'    => 'searchKeywords'
                                  ,'payrate'   => undef
                                  ,'locations' => new WWW::Scraper::FieldTranslation('BAJobs', 'Job', 'locations')
                                  ,'*'         => '*'
                              }
                      }
      # Some more options for the Scraper operation.
     ,'cookies' => 1
   };

my $scraperFrame =
        [ 'HTML', 
           [ 
               [ 'COUNT', 'Job Postings.*?[- 0-9]+.*?of.*?<b>([,0-9]+)</b></font> total']
              ,[ 'BODY', '<!-- top prev/next -->', '<!-- end top prev/next -->',
                 [ 
               [ 'NEXT', 1, '<b>NEXT</b>' ]
                ] #, \&fixNext ] ]
               ]
              ,[ 'BODY', '<!-- job list -->', '',
                 [  
                    [ 'TABLE', '#0' ,
                       [
                          [ 'TR' ] , # There's an actual title row! Imagine that!
                          [ 'HIT*' ,
                            [  
                               [ 'TR',
                                  [
                                     [ 'TD', [ [ 'A', 'corpURL', 'corporateBackground' ] ] ]
                                    ,[ 'TD', 'postingDate' ]
                                    ,[ 'A', 'url', 'title' ]
                                    ,[ 'TD', 'company' ]
                                    ,[ 'TD', '_clear_gif_' ]
                                    ,[ 'TD', 'location' ]
                                  ]
                               ]
                            ]
                          ] 
                       ]
                    ]
                 ]
              ]
           ]
        ];


sub init {
    my ($self) = @_;
    $self->searchEngineHome('http://www.BAJobs.com');
    $self->searchEngineLogo('<IMG SRC="http://www.bajobs.com/graphics/bajlogo118x80.gif">');
    return $self;
}

sub testParameters {
    my ($self) = @_;

    if ( ref $self ) {
        $self->{'isTesting'} = 1;
    }
    
    # 'POST' style scraperFrames can't be tested cause of a bug in WWW::Search(2.2[56]) !
    my $isNotTestable = WWW::Scraper::isGlennWood()?'':'';
    return { 
             'SKIP' => $isNotTestable
            ,'testNativeQuery' => 'Sales'
            ,'expectedOnePage' => 9
            ,'displayResultsPerPage' => 10
            ,'expectedMultiPage' => 11
            ,'expectedBogusPage' => 0
           };
}


# Access methods for the structural declarations of this Scraper engine.
sub scraperRequest { $scraperRequest }
sub scraperFrame { $_[0]->SUPER::scraperFrame($scraperFrame); }
sub scraperDetail{ undef }


1;