| Scraper documentation | Contained in the Scraper distribution. |
WWW::Scraper::NorthernLight - Scrapes NorthernLight.com
require WWW::Scraper;
$search = new WWW::Scraper('NorthernLight');
This class is an NorthernLight specialization of WWW::Search. It handles making and interpreting NorthernLight searches http://www.NorthernLight.com.
This class exports no public interface; all interaction should be done through WWW::Search objects.
None at this time (2001.05.06)
Specifies who to query with the NorthernLight protocol. The default is at http://www.northernlight.com/power.html
WWW::Scraper::NorthernLight is written and maintained
by Glenn Wood, http://search.cpan.org/search?mode=author&query=GLENNWOOD.
Copyright (c) 2001 Glenn Wood All rights reserved.
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
| Scraper documentation | Contained in the Scraper distribution. |
package WWW::Scraper::NorthernLight; ##################################################################### use strict; use vars qw($VERSION @ISA); @ISA = qw(WWW::Scraper); $VERSION = sprintf("%d.%02d", q$Revision: 1.0 $ =~ /(\d+)\.(\d+)/); use Carp (); use WWW::Scraper(qw(2.27 generic_option addURL trimTags)); use WWW::Scraper::FieldTranslation; my $scraperRequest = { 'type' => 'FORM' # Type of query generation is 'QUERY' ,'formNameOrNumber' => 'powSearch' ,'submitButton' => 'search' # This is the basic URL on which to build the query. ,'url' => 'http://www.northernlight.com/power.html' # This is the Scraper attributes => native input fields mapping ,'nativeQuery' => 'qr' ,'nativeDefaults' => { 'qr' => undef } ,'fieldTranslations' => { '*' => { 'skills' => 'qr' # ,'payrate' => undef # ,'locations' => new WWW::Scraper::FieldTranslation('NorthernLight', 'Job', 'locations') ,'*' => '*' } } # Some more options for the Scraper operation. ,'cookies' => 0 }; my $scraperFrame = [ 'HTML', [ # </b> found <b>10,032,977 items</b> [ 'COUNT', 'found\s+<b>([0-9,]+)\s+items?</b>'] ,[ 'NEXT', 'alt="Next Page"' ] ,[ 'BODY', '<!--NLBannerStart-->', '<!--NLResultListEnd-->', [ [ 'HIT*', [ [ 'BODY', '<!--NLResultStart-->', '<!--NLResultEnd-->', [ [ 'TR', [ # <!-- --><!-- <td> </td> --> [ 'SNIP', '<!--[^>]*?<td>.*?-->', [ [ 'TD', [['SPAN', 'number']] ] ,[ 'TD', [ [ 'A', 'url', 'title' ] ,['REGEX', '<!--NLResultRelevanceStart-->(\d+)% -', 'relevance'] ,['REGEX', '<!--NLResultRelevanceEnd-->(.*?) ', 'source'] ,['REGEX', '</b>(.*?)<br>', 'description'] ,['REGEX', '<!-- Misc Block --><!-- \d+ -->(.*?)<!-- Misc Block --><!-- \d+ -->', 'miscBlock'] ,[ 'TABLE', [ ['TR'] ,['REGEX', '(\d+)%:', 'secondRelevance'] ,[ 'A', 'secondUrl', 'secondTitle' ] ] ] #,['SPAN', 'avail'] #needs better treatment of the <SPAN> at the top of this <TD> for this to work. ,[ 'AQ', 'more\s+results', 'moreResultsUrl', undef ] # <!-- Inline Clustering --> ] ] ] ] ] ] ] ] ] ] ] ] ] ]; # Access methods for the structural declarations of this Scraper engine. sub scraperRequest { $scraperRequest } sub scraperFrame { $_[0]->SUPER::scraperFrame($scraperFrame); } sub scraperDetail{ undef } sub testParameters { my ($self) = @_; if ( ref $self ) { $self->{'isTesting'} = 1; } return { 'SKIP' => "NorthernLight's search engine seems to be down these days!?" ,'testNativeQuery' => 'search scraper' ,'expectedOnePage' => 9 ,'expectedMultiPage' => 12 ,'expectedBogusPage' => 0 }; } 1;