/usr/local/CPAN/Scraper/WWW/Scraper/Opcode/BODY.pm



use strict;

package WWW::Scraper::Opcode::BODY;
use base qw(WWW::Scraper::Opcode);
use vars qw($VERSION);

# new() Scraper::Opcode
#  $cls - Op class
#  $scaffold - current scaffold
#  $params - ref to array of parames in the 'OP()' portion of the scaffold.
sub new {
    my ($cls, $scaffold, $params) = @_;
    my $self = bless {};
    $self->{'fieldsDiscovered'} = ['background','bgcolor'];
    return $self;
}

sub scrape {
    my ($self, $scraper, $scaffold, $TidyXML, $hit) = @_;
    
    # This if/else clauses handle the legacy versions of the 'BODY' opcode.
    my $sub_string = undef;
    if ( ref($$scaffold[1]) eq 'ARRAY' ) {
        # This else clause is the only real, new Opcode, version of the 'BODY' opcode.
        my ($sub_string, $attributes) = $TidyXML->getMarkedTextAndAttributes('BODY');
        return undef unless $attributes;
        
        return ($$scaffold[1], $sub_string, $attributes);
    }
    elsif ( $$scaffold[1] and $$scaffold[2] ) {
        ${$TidyXML->asString()} =~ s-$$scaffold[1](.*?)$$scaffold[2]--si; # Strip off the adminstrative clutter at the beginning and end.
        $sub_string = $1 || '';
    } elsif ( $$scaffold[1] ) {
        ${$TidyXML->asString()} =~ s-$$scaffold[1](.*)$-$1-si; # Strip off the adminstrative clutter at the beginning.
        $sub_string = $1 || '';
    } elsif ( $$scaffold[2] ) {
        ${$TidyXML->asString()} =~ s-^(.*?)$$scaffold[2]-$1-si; # Strip off the adminstrative clutter at the end.
        $sub_string = $1 || '';
    } else {
        return undef;
    }
    
    # Continuing for the legacy versions of the 'BODY' opcode.
    if ( $$scaffold[3] && ('ARRAY' ne ref $$scaffold[3]) ) # if next_scaffold is an array ref, then we'll recurse (below)
    {
        if ( $sub_string ) {
            my $binding = $$scaffold[3];
            my $datParser = $$scaffold[4];
            $datParser = \&WWW::Scraper::trimTags unless $datParser;
            if ( $binding eq 'url' )
            {
                my $url = new URI::URL(&$datParser($scraper, $hit, $sub_string), $scraper->{_base_url});
                $url = $url->abs();
                $hit->plug_url($url);
            } 
            elsif ( $binding) {
                my $dat = &$datParser($scraper, $hit, $sub_string);
                $hit->plug_elem($binding, $dat) if defined $dat;
            }
        }
    }

    return ($self->_next_scaffold($scaffold), $sub_string, undef);
}


1;