WWW::Scraper::Monster - Scrapes Monster.com


Bundle-WWW-Scraper-Job documentation Contained in the Bundle-WWW-Scraper-Job distribution.

Index


Code Index:

NAME

Top

WWW::Scraper::Monster - Scrapes Monster.com

SYNOPSIS

Top

 use WWW::Search;
 my $oSearch = new WWW::Search('Monster');
 my $sQuery = WWW::Search::escape_query("unix and (c++ or java)");
 $oSearch->native_query($sQuery,
 			{'st' => 'CA',
			 'tm' => '14d'});
 while (my $res = $oSearch->next_result()) {
     print $res->company . "\t" . $res->title . "\t" . $res->change_date
	 . "\t" . $res->location . "\t" . $res->url . "\n";
 }

DESCRIPTION

Top

This class is a Monster specialization of WWW::Search. It handles making and interpreting Monster searches at http://www.monster.com. Monster supports Boolean logic with "and"s "or"s. See http://jobsearch.monster.com/jobsearch_tips.asp for a full description of the query language.

The returned WWW::Scraper::Response objects contain url, title, company, location and change_date fields.

OPTIONS

Top

The following search options can be activated by sending a hash as the second argument to native_query().

Restrict by Date

The default is to return jobs posted in last 30 days. An example below changes the default to 14 days:

{'tm' => '14d'}

lid - Restrict by Location

No restriction by default.

over 8

st - State

Only jobs in state $state. To select multiple states separate them with a "+", e.g. {'st' => 'NY+NJ+CT'}

fn - Job Function

Use {'fn' => $cat_id} to select one to five (5) job categories. For multiple selection separate selections with a space, e.g. 'fn' => '1 2'. Leave blank to select all categories.

AUTHOR

Top

Glenn Wood, Chttp://search.cpan.org/search?mode=author&query=GLENNWOOD.

COPYRIGHT

Top


Bundle-WWW-Scraper-Job documentation Contained in the Bundle-WWW-Scraper-Job distribution.

package WWW::Scraper::Monster;

#####################################################################

use strict;
use vars qw(@ISA $VERSION);
@ISA = qw(WWW::Scraper);
$VERSION = sprintf("%d.%02d", q$Revision: 1.07 $ =~ /(\d+)\.(\d+)/);

use WWW::Scraper(qw(1.48 generic_option findNextForm trimLFs));
use WWW::Scraper::Response::Job;
use WWW::Scraper::FieldTranslation(1.00);

#http://jobsearch.monster.com/jobsearch.asp?cy=US&re=14&brd=1%2C1863&lid=883&lid=356&fn=6&q=Perl&sort=rv&vw=b
# detailed
#http://jobsearch.monster.com/jobsearch.asp?re=10&vw=d&pg=1&cy=US&brd=1%2C1863&lid=883&lid=356&fn=6&q=Perl&sort=rv
#http://jobsearch.monster.com/jobsearch.asp?q=Sales&re=13&sort=rv&tm=60d&brd=1%2C1863&cy=US&fn=6&lid=883&lid=356&vw=d
#http://jobsearch.monster.com/jobsearch.asp?brd=1%2C1863&cy=US&fn=6&lid=883&lid=356&q=Sales&re=10&sort=rv&tm=60&vw=d
#http://jobsearch.monster.com/jobsearch.asp?brd=1%2C1863&cy=US&fn=6&lid=883&lid=356&q=Sales&re=13&sort=rv&tm=60&vw=d
my $scraperRequest = 
   { 
      'type' => 'QUERY'       # Type of query generation is 'QUERY'
      # This is the basic URL on which to build the query.
     ,'url' => 'http://jobsearch.monster.com/jobsearch.asp?'
      # This is the Scraper attributes => native input fields mapping
     ,'nativeQuery' => 'q'
     ,'nativeDefaults' =>
                      {    'brd' => '1'
                          ,'cy'  => 'US'
                          ,'fn'  => '6'
                          ,'re'  => '13'
                          ,'brd' => '1,1863'
                          ,'lid'  => ['883',356]
                          ,'sort'  => 'rv'      # 'rv' - by relevance
                          ,'vw'  => 'd'         # 'd'etailed, or 'b'rief
                          ,'tm'  => '60d'
                      }
     ,'defaultRequestClass' => 'Job'
     ,'fieldTranslations' =>
             { '*' => 
                  {    'skills'    => 'q'
#                      ,'payrate'   => \&translatePayrate
#                      ,'locations' => new WWW::Scraper::FieldTranslation('Monster', 'Job', 'locations')
                      ,'*'         => '*'
                  }
             }
      # Some more options for the Scraper operation.
     ,'cookies' => 0
     # Some search engines don't connect every time - retry Monster this many times.
     ,'retry' => 2
   };

my $scraperFrame =
[ 'HTML', 
    [ 
                   #<B>Jobs <B>1</B> to <B>6</B> of <B>6</B></B>
                   #<B>Jobs <B>1</B> to <B>6</B> of more than <B>6,000</B></B>
        [ 'COUNT', 'Jobs \d+ to \d+ of (\d+)' ]  # Jobs 1 to 50 of 241
       ,[ 'NEXT', 1, 'Next' ]
       ,[ 'BODY', '<!-- Jobs \S+ of \S+ -->', undef,
          [
            [ 'TABLE' ]
            ,[ 'TABLE', 
               [
                   [ 'TABLE',
                   [
                      ['TABLE'],['TABLE'],[ 'TABLE' , 
[
['TR'], 
                      [ 'HIT*', 'Job',
                        [ 
                            [ 'TR', 
                                [
                                    [ 'TD', 'postDate' ]
                                   ,[ 'TD', 'location', \&trimLFs ]
                                   ,[ 'TD' ] # spacer.
                                   ,[ 'TD', [ [ 'A', 'url', 'title' ] ] ]
                                   ,[ 'TD', 'company' ]
                                ]
                            ]
                        ]
                    ]
#                   ,[ 'BOGUS', 1 ] # The first row is column titles.
                ]
                ]
                ]
]]
            ]
          ]
        ]
    ]
];

sub testParameters {
    # We can't test Dogpile, or any other TidyXML sub-class, until we know Tidy.exe is accessible.
    return {
                 'SKIP' => ''
                ,'testNativeQuery' => 'Sales'
                ,'expectedOnePage' => 25
                ,'expectedMultiPage' => 27
                ,'expectedBogusPage' => 3
                ,'testNativeDefaults' =>
                                {  'brd' => '1'
                                  ,'cy'  => 'US'
                                  ,'fn'  => '6'
                                  ,'re'  => '13'
                                  ,'brd' => '1,1863'
                                  ,'lid'  => ['883',356]
                                  ,'sort'  => 'rv'      # 'rv' - by relevance
                                  ,'vw'  => 'd'         # 'd'etailed, or 'b'rief
                                  ,'tm'  => '60d'
                                }
           };
}


# Access methods for the structural declarations of this Scraper engine.
sub scraperRequest { $scraperRequest; }
sub scraperFrame { $_[0]->SUPER::scraperFrame($scraperFrame); }
sub scraperDetail{ undef }


{ package WWW::Scraper::Request::Monster;
use WWW::Scraper::Request;
use vars qw(@ISA);
@ISA = qw(WWW::Scraper::Request);

sub generateQuery {
    my ($self, $query) = @_;

    # Process the inputs.
    # (Now in sorted order for consistency regardless of hash ordering.)
    my $options = $self->{'queryField'}.'='.WWW::Search::escape_query($query);
    my $options_ref = $self->{'optionsRef'};
    foreach (sort keys %$options_ref) {
        my $val = $options_ref->{$_};
        # Handle 'st' specially . . .
        $val =~ s/\+/\,/g if($_ eq 'st');
        # Convert "nam=val1 val2" into "nam=val1&nam=val2"
        $val =~ s/\+/\&$_=/g unless($_ eq 'q');

        $options .= "&$_=".WWW::Search::escape_query($val);
    };
    
    return $self->{'_base_url'}.$options
}

}

# Translate from the canonical Request->payrate to Monster's 'rate' option.
sub translatePayrate {
    my ($self, $rqst, $val) = @_;
    return ('rate', $val);
}


1;


__END__