| WWW-Search-Jobs documentation | Contained in the WWW-Search-Jobs distribution. |
WWW::Search::Yahoo::Classifieds::Employment - class for searching employment classifieds on Yahoo!
use WWW::Search;
my $oSearch = new WWW::Search('Yahoo::Classifieds::Employment');
my $sQuery = WWW::Search::escape_query("unix c++ java");
$oSearch->native_query($sQuery,
{'g' => 14,
'cr' => 'California'});
while (my $res = $oSearch->next_result()) {
my $true_url = $oSearch->getMoreInfo($res->url);
print $res->company . "\t" . $res->title . "\t" . $res->change_date
. "\t" . $res->location . "\t" . $true_url . "\n";
}
This class is a YC specialization of WWW::Search. It handles making and interpreting YC searches at http://careers.yahoo.com
The returned WWW::SearchResult objects contain url, title, company, location and change_date fields.
The returned url is the one found in the Yahoo! own database. However, it quite often appears in other databases where this url was originally taken from. To retrieve this "true" url use the function getMoreInfo as written in the above example.
The following search options can be activated by sending a hash as the second argument to native_query().
The default is to match ALL keywords in your query. To match ANY keywords use
The default is to return all ads in the Yahoo ! job database. To change it use
Display jobs where company name matches $pattern.
Display jobs where job title matches $pattern.
No preference by default. To select jobs from a specific job category use the following option:
Category can be one of the following:
No restriction by default. The following option is used to select jobs from a specific industry:
Possible industries include:
No preference by default. The following option restrict your search to a desired location:
Location can be one of the following:
WWW::Search::YC is originally written by Alexander Tkatchev
(Alexander.Tkatchev@cern.ch).
1.02 -- patches from Rick Myers (rik@sumthin.nu) that fixes important changes in Yahoo! Classifieds search engine. Plus some fixes of my own...
1.01 -- original release
THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
| WWW-Search-Jobs documentation | Contained in the WWW-Search-Jobs distribution. |
# # YC.pm # Author: Alexander Tkatchev # e-mail: Alexander.Tkatchev@cern.ch # # WWW::Search back-end for Yahoo!! Classifields # http://classifieds.yahoo.com/employment.html # package WWW::Search::Yahoo::Classifieds::Employment; use strict; use warnings;
##################################################################### use Carp (); require HTML::TokeParser; use WWW::Search qw(generic_option); use base 'WWW::Search'; require WWW::SearchResult; our $VERSION = do { my @r = (q$Revision: 1.112 $ =~ /\d+/g); sprintf "%d."."%03d" x $#r, @r }; sub native_setup_search { my($self, $native_query, $native_options_ref) = @_; $self->{agent_e_mail} = 'alexander.tkatchev@cern.ch'; $self->user_agent('non-robot'); if (!defined($self->{_options})) { $self->{'search_base_url'} = 'http://classifieds.yahoo.com'; $self->{_options} = { 'search_url' => $self->{'search_base_url'} . '/display' .'/employment', 'cr' => '', 'ck' => $native_query, 'ce_f' => '', 'cpo' => '', 'cpj' => '', 'g' => 30, 'cs' => 'time+2', 'cc' => 'employment', 'cf' => '', 'za' => 'and', 'ct_hft' => 'table' }; } my $options_ref = $self->{_options}; if (defined($native_options_ref)) { # Copy in new options. foreach (keys %$native_options_ref) { $options_ref->{$_} = $native_options_ref->{$_}; } # foreach } # if # Process the options. my($options) = ''; foreach (sort keys %$options_ref) { # printf STDERR "option: $_ is " . $options_ref->{$_} . "\n"; next if (generic_option($_)); my $escaped = $options_ref->{$_}; $escaped = WWW::Search::escape_query($options_ref->{$_}) if ($_ eq 'cr' || $_ eq 'ce_f' || $_ eq 'cpj' || $_ eq 'cpo' || $_ eq 'ce_i'); $options .= $_ . '=' . $escaped . '&'; } # Finally figure out the url. $self->{_next_url} = $self->{_options}{'search_url'} .'?'. $options; $self->{_debug} = $options_ref->{'search_debug'}; } # native_setup_search # private sub native_retrieve_some { my ($self) = @_; my $debug = $self->{'_debug'}; print STDERR " * YC::native_retrieve_some()\n" if($debug); # fast exit if already done return undef if (!defined($self->{_next_url})); # sleep so as to not overload the server: $self->user_agent_delay; # get some print STDERR " * sending request (",$self->{_next_url},")\n" if($debug); my($response) = $self->http_request('GET', $self->{_next_url}); $self->{'_next_url'} = undef; if (!$response->is_success) { print STDERR $response->error_as_HTML; return undef; } print STDERR " * got response\n" if($debug); if($response->content =~ m/Your search found no results|No results match your search/) { print STDERR "No documents matched the query\n"; return 0; } # parse the output my($hits_found) = 0; my($hit) = (); my $p = new HTML::TokeParser(\$response->content()); my $tag; $tag = $p->get_tag("form"); my $action = $tag->[1]{action}; my $extra_url = ''; while(1) { $tag = $p->get_tag("input"); $extra_url .= $tag->[1]{name} . '=' . WWW::Search::escape_query($tag->[1]{value}) . '&'; last if $tag->[1]{name} eq 'search'; # exit; } while(1) { $tag = $p->get_tag("td"); my $data = $p->get_trimmed_text("/td"); last if($data eq 'LOCATION' || $data eq 'COMPANY' || $data eq 'FULL LISTING'); } while(1) { $tag = $p->get_tag("tr"); $tag = $p->get_tag("td"); # first column contains only spaces $tag = $p->get_tag("td"); # this one does as well $tag = $p->get_tag("td"); my $date = $p->get_trimmed_text("/td"); last unless($date =~ m|(\d+)/(\d+)/(\d+)|); $tag = $p->get_tag("td"); my $company = $p->get_trimmed_text("/td"); $tag = $p->get_tag("td"); my $function = $p->get_trimmed_text("/td"); $tag = $p->get_tag("td"); my $term = $p->get_trimmed_text("/td"); $tag = $p->get_tag("td"); my $title = $p->get_trimmed_text("/td"); $tag = $p->get_tag("td"); my $location = $p->get_trimmed_text("/td"); $tag = $p->get_tag("input"); my $name = $tag->[1]{name}; my $value = $tag->[1]{value}; # my $url = $tag->[1]{href}; # $url =~ s/$CR?$LF//g; # $url =~ s/(pre\=)(\d+)(\&)//; # $url =~ s/\&cr\=national//; my $url = $self->{search_base_url}. "$action?$extra_url&$name=$value&position0.x=1&position0.y=1"; # print STDERR "$location\t$title\t$company\t$date\t$url\n"; $hit = new WWW::SearchResult; $hit->url($url); $hit->company($company); $hit->change_date($date); $hit->title($title); $hit->description($function); $hit->location($location); push(@{$self->{cache}}, $hit); $hits_found++; } # # Find next link # $p = new HTML::TokeParser(\$response->content()); while(1) { $tag = $p->get_tag("img"); last if($tag->[1]{'alt'} eq 'Previous'); } FIND_NEXT_URL: while(1) { $tag = $p->get_tag("a"); my $nextlink = $tag->[1]{href}; if(defined($nextlink)) { my $linklabel = $p->get_trimmed_text("/a"); next FIND_NEXT_URL if($linklabel =~ m/Previous/); if(!($linklabel =~ m/Next/)) { print "No next link\n" if($debug); last; } $nextlink =~ s/[\r\n]//g; # not sure here but $CR and $LF are undefined print "$linklabel: $nextlink\n" if($debug); $self->{'_next_url'} = $self->{'search_base_url'} . $nextlink; last; } } return $hits_found; } # native_retrieve_some sub getMoreInfo { my $self = shift; my $url = shift; my($response) = $self->http_request('GET',$url); if ($response->is_success) { my $content = $response->content(); if($content =~ m/Additional information/) { my $p = new HTML::TokeParser(\$content); my ($tag,$testurl); while(1) { $tag = $p->get_tag("a"); $testurl = $tag->[1]{href}; my $linktitle = $p->get_trimmed_text("/a"); if($linktitle =~ m/Additional information/) { last if($testurl =~ m/net-temps/); $url = $testurl; $url =~ s|yahoo/yahoo_frameset.cgi\?||; $url =~ s/\&html\=yahoofoundhtml//; last; } } } } return $url; } 1;