| Bundle-WWW-Scraper-Job documentation | Contained in the Bundle-WWW-Scraper-Job distribution. |
WWW::Scraper::Monster - Scrapes Monster.com
use WWW::Search;
my $oSearch = new WWW::Search('Monster');
my $sQuery = WWW::Search::escape_query("unix and (c++ or java)");
$oSearch->native_query($sQuery,
{'st' => 'CA',
'tm' => '14d'});
while (my $res = $oSearch->next_result()) {
print $res->company . "\t" . $res->title . "\t" . $res->change_date
. "\t" . $res->location . "\t" . $res->url . "\n";
}
This class is a Monster specialization of WWW::Search. It handles making and interpreting Monster searches at http://www.monster.com. Monster supports Boolean logic with "and"s "or"s. See http://jobsearch.monster.com/jobsearch_tips.asp for a full description of the query language.
The returned WWW::Scraper::Response objects contain url, title, company, location and change_date fields.
The following search options can be activated by sending a hash as the second argument to native_query().
The default is to return jobs posted in last 30 days. An example below changes the default to 14 days:
No restriction by default.
over 8
Only jobs in state $state. To select multiple states separate them with a "+", e.g. {'st' => 'NY+NJ+CT'}
Use {'fn' => $cat_id} to select one to five (5) job categories. For multiple selection separate selections with a space, e.g. 'fn' => '1 2'. Leave blank to select all categories.
Glenn Wood, Chttp://search.cpan.org/search?mode=author&query=GLENNWOOD.
Copyright (C) 2001 Glenn Wood. All rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| Bundle-WWW-Scraper-Job documentation | Contained in the Bundle-WWW-Scraper-Job distribution. |
package WWW::Scraper::Monster; ##################################################################### use strict; use vars qw(@ISA $VERSION); @ISA = qw(WWW::Scraper); $VERSION = sprintf("%d.%02d", q$Revision: 1.07 $ =~ /(\d+)\.(\d+)/); use WWW::Scraper(qw(1.48 generic_option findNextForm trimLFs)); use WWW::Scraper::Response::Job; use WWW::Scraper::FieldTranslation(1.00); #http://jobsearch.monster.com/jobsearch.asp?cy=US&re=14&brd=1%2C1863&lid=883&lid=356&fn=6&q=Perl&sort=rv&vw=b # detailed #http://jobsearch.monster.com/jobsearch.asp?re=10&vw=d&pg=1&cy=US&brd=1%2C1863&lid=883&lid=356&fn=6&q=Perl&sort=rv #http://jobsearch.monster.com/jobsearch.asp?q=Sales&re=13&sort=rv&tm=60d&brd=1%2C1863&cy=US&fn=6&lid=883&lid=356&vw=d #http://jobsearch.monster.com/jobsearch.asp?brd=1%2C1863&cy=US&fn=6&lid=883&lid=356&q=Sales&re=10&sort=rv&tm=60&vw=d #http://jobsearch.monster.com/jobsearch.asp?brd=1%2C1863&cy=US&fn=6&lid=883&lid=356&q=Sales&re=13&sort=rv&tm=60&vw=d my $scraperRequest = { 'type' => 'QUERY' # Type of query generation is 'QUERY' # This is the basic URL on which to build the query. ,'url' => 'http://jobsearch.monster.com/jobsearch.asp?' # This is the Scraper attributes => native input fields mapping ,'nativeQuery' => 'q' ,'nativeDefaults' => { 'brd' => '1' ,'cy' => 'US' ,'fn' => '6' ,'re' => '13' ,'brd' => '1,1863' ,'lid' => ['883',356] ,'sort' => 'rv' # 'rv' - by relevance ,'vw' => 'd' # 'd'etailed, or 'b'rief ,'tm' => '60d' } ,'defaultRequestClass' => 'Job' ,'fieldTranslations' => { '*' => { 'skills' => 'q' # ,'payrate' => \&translatePayrate # ,'locations' => new WWW::Scraper::FieldTranslation('Monster', 'Job', 'locations') ,'*' => '*' } } # Some more options for the Scraper operation. ,'cookies' => 0 # Some search engines don't connect every time - retry Monster this many times. ,'retry' => 2 }; my $scraperFrame = [ 'HTML', [ #<B>Jobs <B>1</B> to <B>6</B> of <B>6</B></B> #<B>Jobs <B>1</B> to <B>6</B> of more than <B>6,000</B></B> [ 'COUNT', 'Jobs \d+ to \d+ of (\d+)' ] # Jobs 1 to 50 of 241 ,[ 'NEXT', 1, 'Next' ] ,[ 'BODY', '<!-- Jobs \S+ of \S+ -->', undef, [ [ 'TABLE' ] ,[ 'TABLE', [ [ 'TABLE', [ ['TABLE'],['TABLE'],[ 'TABLE' , [ ['TR'], [ 'HIT*', 'Job', [ [ 'TR', [ [ 'TD', 'postDate' ] ,[ 'TD', 'location', \&trimLFs ] ,[ 'TD' ] # spacer. ,[ 'TD', [ [ 'A', 'url', 'title' ] ] ] ,[ 'TD', 'company' ] ] ] ] ] # ,[ 'BOGUS', 1 ] # The first row is column titles. ] ] ] ]] ] ] ] ] ]; sub testParameters { # We can't test Dogpile, or any other TidyXML sub-class, until we know Tidy.exe is accessible. return { 'SKIP' => '' ,'testNativeQuery' => 'Sales' ,'expectedOnePage' => 25 ,'expectedMultiPage' => 27 ,'expectedBogusPage' => 3 ,'testNativeDefaults' => { 'brd' => '1' ,'cy' => 'US' ,'fn' => '6' ,'re' => '13' ,'brd' => '1,1863' ,'lid' => ['883',356] ,'sort' => 'rv' # 'rv' - by relevance ,'vw' => 'd' # 'd'etailed, or 'b'rief ,'tm' => '60d' } }; } # Access methods for the structural declarations of this Scraper engine. sub scraperRequest { $scraperRequest; } sub scraperFrame { $_[0]->SUPER::scraperFrame($scraperFrame); } sub scraperDetail{ undef } { package WWW::Scraper::Request::Monster; use WWW::Scraper::Request; use vars qw(@ISA); @ISA = qw(WWW::Scraper::Request); sub generateQuery { my ($self, $query) = @_; # Process the inputs. # (Now in sorted order for consistency regardless of hash ordering.) my $options = $self->{'queryField'}.'='.WWW::Search::escape_query($query); my $options_ref = $self->{'optionsRef'}; foreach (sort keys %$options_ref) { my $val = $options_ref->{$_}; # Handle 'st' specially . . . $val =~ s/\+/\,/g if($_ eq 'st'); # Convert "nam=val1 val2" into "nam=val1&nam=val2" $val =~ s/\+/\&$_=/g unless($_ eq 'q'); $options .= "&$_=".WWW::Search::escape_query($val); }; return $self->{'_base_url'}.$options } } # Translate from the canonical Request->payrate to Monster's 'rate' option. sub translatePayrate { my ($self, $rqst, $val) = @_; return ('rate', $val); } 1; __END__