/usr/local/CPAN/Scraper/WWW/Scraper/Opcode/REGEX.pm
use strict;
package WWW::Scraper::Opcode::REGEX;
use base qw(WWW::Scraper::Opcode);
use vars qw($VERSION);
# new() Scraper::Opcode
# $cls - Op class
# $scaffold - current scaffold
# $params - ref to array of params in the 'OP()' portion of the scaffold.
sub new {
my ($cls, $scaffold, $params) = @_;
my $self = bless {};
my @scfld = @$scaffold;
shift @scfld;
my $regex = shift @scfld;
my @fields;
map { push @fields, $_ unless !$_ || ref($_) || m{^#} } @scfld;
$self->{'fieldsCaptured'} = \@fields;
$self->{'fieldsDiscovered'} = \@fields;
return $self;
}
sub scrape {
my ($self, $scraper, $scaffold, $TidyXML, $hit) = @_;
my (@ary,@dts,$hit_found) = (@$scaffold,undef,0);
shift @ary;
my $regex = shift @ary;
if ( ${$TidyXML->asString()} =~ s/$regex//si )
{
@dts = ($1,$2,$3,$4,$5,$6,$7,$8,$9);
for ( @ary )
{
if ( ! defined $_ ) { # "if ( $_ eq '' )" reports "use of uninitialized variable" under diagnostics.
shift @dts;
}
elsif ( ref($_) eq 'CODE' ) {
$dts[0] = &$_($scraper,$hit,$dts[0]);
}
elsif ( $_ eq 'url' )
{
my $url = new URI::URL(shift @dts, $scraper->{_base_url});
$url = $url->abs();
print "REGEX binding 'url' => $url\n" if ($scraper->ScraperTrace('d'));
$hit->plug_url($url);
}
elsif ( $_ ) {
my $dt = $scraper->trimTags($hit, shift @dts);
print "REGEX binding '$_' => $dt\n" if ($scraper->ScraperTrace('d'));
$hit->plug_elem($_, $dt, $TidyXML) if defined $dt;
}
}
$hit_found = 1;
}
return undef;
}
1;