WWW::Scraper::Brainpower - Scrapes Brainpower.com


Bundle-WWW-Scraper-Job documentation Contained in the Bundle-WWW-Scraper-Job distribution.

Index


Code Index:

NAME

Top

WWW::Scraper::Brainpower - Scrapes Brainpower.com

SYNOPSIS

Top

    use WWW::Scraper;
    use WWW::Scraper::Response::Job;

    $search = new WWW::Scraper('Brainpower');

    $search->setup_query($query, {options});

    while ( my $response = $scraper->next_response() ) {
        # $response is a WWW::Scraper::Response::Job.
    }

DESCRIPTION

Top

Brainpower extends WWW::Scraper.

It handles making and interpreting Brainpower searches of http://www.Brainpower.com.

OPTIONS

Top

title

ALL => Any Designation
AP => Application Programmer
BA => Business Analyst
CS => Communications Programmer
DBA => DataBase Administrator
DSP => DataBase Programmer
GCD => Graphic Designer
HAD => Hardware/ASIC Programmer
JD => Java Developer
LAN => LAN/Network Administrator
PML => Project Manager/leader
QAT => Quality Assurance/Tester
SPS => Systems Programmer
SYA => Systems Administrator
TR => Technical Recruiter
TW => Technical Writer
WEB => Web Developer

skills

This is the query string. You do not explicitly set this; it's set by Scraper.

searchType

A RADIO button.

0 - All of the words
1 - Any of the words

rate

Hourly rate, limit 3 digits. Optional.

state (MULTIPLE - maximum 5 states)

80 => All US States
1 => Alabama
2 => Alaska
3 => Arizona
4 => Arkansas
5 => California(North)
6 => California(South)
7 => Colorado
8 => Connecticut
9 => Delaware
10 => District of Columbia
11 => Florida
12 => Georgia
13 => Hawaii
14 => Idaho
15 => Illinois
16 => Indiana
17 => Iowa
18 => Kansas
19 => Kentucky
20 => Louisiana
21 => Maine
22 => Maryland
23 => Massachusetts
24 => Michigan
25 => Minnesota
26 => Mississippi
27 => Missouri
28 => Montana
29 => Nebraska
30 => Nevada
31 => New Hampshire
32 => New Jersey
33 => New Mexico
34 => New York
35 => North Carolina
36 => North Dakota
37 => Ohio
38 => Oklahoma
39 => Oregon
40 => Pennsylvania
41 => Rhode Island
42 => South Carolina
43 => South Dakota
44 => Tennessee
45 => Texas
46 => Utah
47 => Vermont
48 => Virginia
49 => Washington
50 => West Virginia
51 => Wisconsin
52 => Wyoming

AUTHOR

Top

WWW::Scraper::Brainpower is written and maintained by Glenn Wood, http://search.cpan.org/search?mode=author&query=GLENNWOOD.

COPYRIGHT

Top


Bundle-WWW-Scraper-Job documentation Contained in the Bundle-WWW-Scraper-Job distribution.

package WWW::Scraper::Brainpower;

#####################################################################
use strict;
use vars qw(@ISA $VERSION);
@ISA = qw(WWW::Scraper);
$VERSION = sprintf("%d.%02d", q$Revision: 1.04 $ =~ /(\d+)\.(\d+)/);

use WWW::Scraper(qw(2.18 trimTags trimLFs removeScriptsInHTML cleanupHeadBody));
use WWW::Scraper::FieldTranslation(1.00);

my $scraperRequest = 
   { 
      'type' => 'QUERY'       # Type of query generation is 'QUERY'
      # This is the basic URL on which to build the query.
      ,'url' => 'http://www.brainpower.com/IndListProject.asp?'
      # This is the Scraper attributes => native input fields mapping
      ,'nativeQuery' => 'skills'
      ,'nativeDefaults' =>
                      {    'navItem' => 'searchProjects'  # This is a hidden field, presumably declares "search"
                          ,'submit1' => 1                 # This is the actual submit button.
                          #,'pageSize' => 100              # pageSize has no effect on Brainpower.com
                          ,'title'   => 'ALL'             # All job designations.
                          #,'title' => 'AP'               # Application Programmer.
                          ,'searchType' => 1              # searchType = ANY words.
                          ,'state'      => 80             # All US States
                          #,'state' => 5                  # California (North)
                          ,'rate' => ''
                      }
      ,'defaultRequestClass' => 'Job'
      ,'fieldTranslations' =>
              { '*' => 
                      {    'skills'    => 'skills'
                          ,'payrate'   => \&translatePayrate
                          ,'locations' => new WWW::Scraper::FieldTranslation('Brainpower', 'Job', 'locations')
                          ,'*'         => '*'
                      }
              }
      # Some more options for the Scraper operation.
     ,'cookies' => 1
   };

my $scraperFrame =
[ 'HTML', 
    [ 
        [ 'NEXT', 'Next ' ]
       ,[ 'COUNT', 'Your search resulted in <b>([0-9,]+)</b> jobs.' ]
       ,[ 'BODY', '<!-- Begin Nested Right Table Cell -->', undef,
            [  
               [ 'TABLE', 
                 [
                   [ 'TABLE', 
                      [
                          [ 'TR' ]
                         ,[ 'TR' ]
                         ,[ 'HIT*', #'Job::Brainpower',
                             [ 
                                 [ 'TR', 
                                     [
                                         [ 'TD', [ [ 'A', 'url', 'jobID' ] ] ]
                                        ,[ 'TD' ] # There's a TD in a <!--COMMENT-->, here ! ! ! all are "Any Designation". E.G., <!--<TD><H6>&nbsp;&nbsp;&nbsp;&nbsp;TITLE</H6></TD>-->
                                        ,[ 'TD', 'skills' ]
                                        ,[ 'TD', 'payrate' ]
                                        ,[ 'TD', 'location' ]
                                     ]
                                 ]
                                ,[ 'TR' ]
                             ]
                          ]
#                         ,[ 'BOGUS', 1 ]  #Bogus result at the beginning . . .
                         ,[ 'BOGUS', -1 ] # and at the end!
                      ]
                   ]
                 ]
               ]
            ]
        ]
    ]
];            


    # scraperDetail describes the format of the detail page.
my $scraperDetail = 
[ 'TidyXML', \&cleanupHeadBody, \&removeScriptsInHTML, \&specialBrainpowerTreatment, 
    [ 
        ['XPath', '/html/body/table[3]/tr/td[7]/table/tr/td/table', 
            [  
                 ['XPath', 'tr[5]/td[2]',  'title',   \&trimTags, \&trimLFs]
                ,['XPath', 'tr[6]/td[2]',  'role',    \&trimTags, \&trimLFs]
                ,['XPath', 'tr[7]/td[2]',  'skills',  \&trimTags, \&trimLFs]
                ,['XPath', 'tr[8]/td[2]',  'jobType', \&trimTags, \&trimLFs]
                ,['XPath', 'tr[9]/td[2]',  'payrate', \&trimTags, \&trimLFs]
                ,['XPath', 'tr[10]/td[2]', 'jobLength', \&trimTags, \&trimLFs]
                ,['XPath', 'tr[11]/td[2]', 'city',    \&trimTags, \&trimLFs]
                ,['XPath', 'tr[12]/td[2]', 'state',   \&trimTags, \&trimLFs]
                ,['XPath', 'tr[13]/td[2]', 'postdate', \&trimTags, \&trimLFs]
                ,['XPath', 'tr[15]/td[2]', 'description', \&trimTags, \&trimLFs]
            ]
        ]
    ]
];


sub specialBrainpowerTreatment {
    my ($self, $hit, $xml) = @_;
    $$xml =~ s-\&reqid-\&amp;reqid-gsi;
    $$xml =~ s-\&resumeid-\&amp;resumeid-gsi;
    $$xml =~ s-\<mailto:-\&lt;mailto:-gsi;
    return $xml;
}

sub init {
    my ($self) = @_;
    $self->searchEngineHome('http://www.Brainpower.com');
    $self->searchEngineLogo('<IMG SRC="http://www.brainpower.com/images/logo_circ_01.gif">');
    return $self;
}


sub testParameters {
    my ($self) = @_;
    
    if ( ref $self ) {
        $self->{'isTesting'} = 1;
    }
    
    return {
                 #'SKIP' => &WWW::Scraper::TidyXML::isNotTestable('Brainpower') #'Man, this one takes a long time!' #  
                 'SKIP' => "Brainpower has gone the login route: Scraper's not up to that yet, but here's the framework if you want to do it yourself."
                            # EVEN CAME UP WITH "3709Operation is not allowed on an object referencing a closed or invalid connection"! gdw.2003.01.16
                ,'TODO' => ''
                ,'testNativeQuery' => 'Perl'
                ,'expectedOnePage' => 9
                ,'expectedMultiPage' => 16
                ,'expectedBogusPage' => 3
                ,'usesPOST' => 1
           };
}

# Access methods for the structural declarations of this Scraper engine.
sub scraperRequest { $scraperRequest }
sub scraperFrame { $scraperFrame }
sub scraperDetail{ $scraperDetail }

##############################################################
# The text in this <TD> element are four lines representing
# postDate, location, jobCategory and jobType. Parse that here.
sub parseLocation {
    my ($self, $hit, $dat) = @_;
    $dat = $self->trimLFLFs($hit, $dat);
    $dat =~ m/\n(.*?)\n(.*?)\n(.*?)\n(.*)/s;
    $hit->_elem('postDate', $1);
#    $self->_elem('location', $2);
    $hit->_elem('jobCategory', $3);
    $hit->_elem('jobType', $4);
    return $2;
}


###############################################
#
# nextURL - calculate the next page's URL.
#
# Here is the JavaScript that FlipDog uses to
# create it's "More Results" link. So it's
# pretty obvious what we need to do!
#
# var jobCount = 25;
# var jobStart = 1;
# var jobTotal = 221;
# function PageResults( bNext )
# {
# var szQS = "";
# if ( bNext )
# szQS = document.location.search.replace( /&job=\d+/, "" ) + "&job=" + String(jobStart + jobCount);
# else
# szQS = document.location.search.replace( /&job=\d+/, "" ) + "&job=" + String(jobStart - jobCount);
# location.href = "/js/jobsearch-results.html" + szQS;
# }
sub getNextPage {
    my ($self, $hit, $dat) = @_;
    
    return undef unless 
        $dat =~ m/var jobCount = (\d+).*?var jobStart = (\d+).*?var jobTotal = (\d+)/s;
    my ($jobCount, $jobStart, $jobTotal) = ($1,$2,$3);
    my $url = $self->{'_last_url'};
    $jobStart += $jobCount;
    return undef if $jobStart > $jobTotal; # (not represented in the JavaScript, but necessary)
    $url =~ s/\&job=(\d+)/\&job=$jobStart/;
    return $url;
}

# Translate from the canonical Request->payrate to Brainpower's 'rate' option.
sub translatePayrate {
    my ($self, $rqst, $val) = @_;
    return ('rate', $val);
}



### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### 
# Given a candidate hit, do post-selection.
# Return 1 to keep this hit, 0 to cast it away.
sub postSelect {
    my ($scraper, $rqst, $rslt) = @_;
    
    # Do the base postSelect, sans locations.
    return 0 unless $rqst->postSelect($scraper, $rslt, ['locations']);
    
    # Brainpower's too dumb to put the location in the results, we have to look at details!
    return $scraper->SUPER::postSelect($rqst, $rslt);
}


{ package WWW::Scraper::Response::Job::BrainpowerX;
use vars qw(@ISA);
@ISA = qw(WWW::Scraper::Response::Job);
use WWW::Scraper::Response::Job;

sub resultTitles {
    my $self = shift;
    my $resultT = {}; #$self->SUPER::resultTitles();
    
    # These fields are from the results page.
    $$resultT{'url'}      = 'url';
    $$resultT{'skills'}    = 'Skills';
    $$resultT{'jobID'}   = 'Job ID';
    $$resultT{'location'} = 'Location';
    
    return $resultT if $self->{'_scraperSkipDetailPage'};
    
    # The following fields come from the detail page.
    $$resultT{'role'}     = 'Role';
    $$resultT{'skillSet'} = 'Skill Set';
    $$resultT{'type'}     = 'Type';
    $$resultT{'payrate'}  = 'Payrate';
    $$resultT{'city'}     = 'City';
    $$resultT{'state'}    = 'State';
    $$resultT{'postDate'} = 'Post Date';
    $$resultT{'description'} = 'Description';

    return $resultT;
}

sub results {
    my $self = shift;
    my $results = {}; #$self->SUPER::results();
    
    # These fields are from the results page.
    $$results{'url'} = $self->url();
    $$results{'jobID'} = $self->jobID();
    $$results{'skills'} = $self->skills();
    $$results{'location'} = $self->location();
    $$results{'city'} = $self->city();
    return $results if $self->{'_scraperSkipDetailPage'};
    
    # The following fields come from the detail page.
    for ( qw(role skillSet type payrate state postDate description) ) {
        $$results{$_} = $self->$_();
    }
    return $results;
}

sub location { my $x = $_[0]->SUPER::location(); $x =~ s/\s+$//g; return $x;}

sub description { my $rslt = $_->SUPER::description();
# Hey, if some of those bubble-heads at the KBDs want to put in a few hundred spaces, then !%^&!* them!
    $rslt =~ s/\s+/ /g;
# The same goes for massive doses of <br>s. What is it with these people?
    $rslt =~ s/\n+/\n/g;
    return $rslt;
 }

}


1;
__END__