| WWW-Arbeitsagentur documentation | Contained in the WWW-Arbeitsagentur distribution. |
WWW::Arbeitsagentur::Search - Search for Jobs & Applicants via Arbeitsagentur.de
package MySearch;
use WWW::Arbeitsagentur::Search;
my $search = MySearch->new('job' => 'Perl Job',
'user' => 'username@arbeitsagentur'
'pw' => 'mypassword@arbeitsagentur',
'mech' => WWW::Mechanize->new(),
);
$search->search_my_way;
sub search_my_way{
my $self = shift;
$self->connect();
# do other stuff
$self->collect_result_pages;
return $self->count_results;
}
This module is the base class for all search classes. It inherits from WWW::Arbeitsagentur and provides methods to collect search results and save them to disk.
Saves the result pages in the directory determined by $search->path. Returns the number of errors(!).
Usage: # After successful search: $search->collect_result_pages(); $search->path( 'download/' ); # where to save the files. $search->save_results();
Save a page from the result list to $search->path.
Parameter: index number of the page to be saved from array $self->results.
Return 0 on failure, 1 on success.
Usage:
# primitive filtering:
my $page = $search->result(5);
if ($page =~ m/Perl Coder/){
$search->save_page(5);
}
If our search was successful, this method collects all jobs / applicants found on the result pages. Data is stored in the array $search->results.
Returns the number of pages found.
Selects a job description in a search form on http://www.arbeitsargentur.de.
Usage:
# After navigating the mech to the search form:
$search->beruf('Perl Coder');
$search->select_job();
Returns 1 if a matching job-description was found. Returns 0 on failure.
http://arbeitssuche.sourceforge.net
Ingo Wiarda dewarim@users.sourceforce.net
This module is based upon http://arbeitssuche.sourceforge.net, written by Ingo Wiarda and Stefan Rother.
Copyright (C) 2006 by Ingo Wiarda
This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| WWW-Arbeitsagentur documentation | Contained in the WWW-Arbeitsagentur distribution. |
package WWW::Arbeitsagentur::Search;
our $VERSION = "0.0.1"; @ISA = qw/WWW::Arbeitsagentur/; use Digest::SHA qw(sha256_hex); use WWW::Arbeitsagentur::Arbeitnehmer; use WWW::Arbeitsagentur::Applicant; use WWW::Arbeitsagentur::Search::FastSearchForWork; use WWW::Arbeitsagentur; use strict; use warnings; sub save_results{ my ($self) = @_; my $errors = 0; for ( my $x = 0; $x < $self->results_count() ;$x++){ my $result = $self->save_page( $x ); if ($result == 0){ $errors++; warn("save_page failed. $!\n") unless $result; } } return $errors; } sub collect_result_pages{ my ($self) = @_; my $mech = $self->mech(); $mech->dump_content(); my @results = $mech->find_all_links('url_regex' => qr!anzeigeDetails\?redirect_id=\d+!); my $result_url = 0; my $pages_found = 0; $result_url = $results[0]->url() if $results[0]; return 0 unless $result_url; while($result_url){ $mech->get($result_url); $mech->dump_content('job.html'); if (! $mech->success()){ warn( "Could not fetch ". $result_url.".\n" ); return $pages_found; # Todo: better error handling - fetch next, retry etc.? } $self->results_push( $mech->content ); $pages_found++; # find the URL of the next applicant / job: @results = $mech->find_all_links("text_regex" => qr/(?:chstes Stellenangebot|chster Bewerber)/); $result_url = @results ? $results[0]->url() : 0; warn "next URL: ".$result_url."\n"; $mech->dump_content(); last if $self->_module_test; } return $pages_found; } sub select_job { my ($self) = @_; my $mech = $self->mech(); my ($berufsbezeichnung) = $self->beruf(); # The method should be more talkative why it returns 0... return 0 unless $berufsbezeichnung; return 0 unless $self->job_typ(); $mech->dump_content(); warn "Sende Formular Berufswahl.\n"; $mech->submit_form('form_number' => 3, 'fields' =>{ 'art' => $self->job_typ(), 'berufsbezeichnung' => $berufsbezeichnung, }, 'button' => 'cmd#starteBerufswahl##true' ); $mech->dump_content('job_type_selected.html'); if ($mech->content() =~ m/Ergebnis der Suche nach Berufen/){ # es gibt mehrere Berufe dieses Namens -> nehmen wir den, der am ehesten passt: warn "Job-Title $berufsbezeichnung is not correct. Will try anaway.\n"; # The following should be refactored into a "Test job_list"-Type Funktion. #if ( $self->module_test == 0){ # open(JOBS, ">>oddjobs.txt"); # print JOBS $berufsbezeichnung."\n"; # close(JOBS); #} eval{ $mech->follow_link('text_regex' => qr/$berufsbezeichnung/i)}; if ( $@ ) { warn "Could find nothing that matches $berufsbezeichnung. $@\n"; $mech->dump_content('job_not_found.html'); return 0; } else{ $mech->dump_content('job_found.html'); return 1; } } # correct job was found: if ($mech->content() =~ m/name="beruf" value="[.\d]+"/){ return 1; } else{ # we found nothing: return 0; } } sub save_page{ my ($self, $page_id) = @_; my $data = $self->results_index( $page_id ); # Do not save zero-length files: if ( length($data) == 0 ){ warn("Page is 0 Bytes long - may be a download problem.\n"); return 0; } my ($ref) = WWW::Arbeitsagentur::extract_refnumber(\$data); if ($ref){ warn "save_page: found reference number: $ref.\n"; } else{ warn "Could not find a reference number. Will use hash-value as filename.\n"; $ref = sha256_hex($data)."_sha"; } my $path = $self->path(); # CSS-Links anpassen: $data =~ s!<link type="text/css" href="/vam/css/!<link type="text/css" href="!; # Seite anpassen: # Damit bei Stellenangeboten die gewünschten Fähigkeiten nicht # in einem großen Block stehen, werden Zeilenumbrüche eingefügt. $data =~ s/\);/\);<br \/>/g; $data =~ s/<body.*?Seiteninhalt<\/h2/<body><h2>Seiteninhalt<\/h2/sm; open(HTML, ">$path$ref.html") or die("Could not open file for writing!\n$!\n"); print HTML $data; close(HTML) or die("Could not close file\n$!\n"); return 1; } 1; __END__