/usr/local/CPAN/Scraper/WWW/Scraper/Opcode/NEXT.pm



use strict;

package WWW::Scraper::Opcode::NEXT;
use base qw(WWW::Scraper::Opcode);
use vars qw($VERSION);

# new() Scraper::Opcode
#  $cls - Op class
#  $scaffold - current scaffold
#  $params - ref to array of params in the 'OP()' portion of the scaffold.
sub new {
    my ($cls, $scaffold, $params) = @_;
    my $self = bless {};

    $self->{'fieldsCaptured'} = [];
    $self->{'fieldsDiscovered'} = ['NEXT'];
    return $self;
}

sub scrape {
    my ($self, $scraper, $scaffold, $TidyXML, $hit) = @_;
    
    my (@ary, $dat) = (@$scaffold, ${$TidyXML->asString()});

    if ( ref $ary[1] )
    {
        my $datParser = $ary[1];
        my $url = ${$TidyXML->asString()};
        $url = WWW::Scraper::unescape_query($url) if $TidyXML->m_isTidyd();
        $scraper->{'_next_url'} = &$datParser($scraper, $hit, $url);                
        print STDERR  "NEXT_URL: $scraper->{'_next_url'}\n" if ($scraper->ScraperTrace('N'));
    }
    else
    {
        # A simple regex will not work here, since the "next" string may often
        # appear even when there's no <A>...</A> surrounding it. The problem occurs
        # when there is a <A>...</A> preceding it, *and* following it. Simple regex's
        # will find the first anchor, even though it's not the HREF for the "next" string.
        my $next_url_button = $ary[1];
        print STDERR  "next_url_button: $next_url_button\n" if ($scraper->ScraperTrace('N'));

        while ( 1 ) 
        {
            my ($sub_string, $attributes) = $TidyXML->getMarkedTextAndAttributes('A');
            last unless $sub_string;
            if ( $sub_string =~ m-$next_url_button-si )
            {
                my $url = $attributes->{'href'};
                if ( $url ) {
                   # Well, you learn something every day!
                   if ( my ($newName, $newValue) = ($url =~ m{&(.*?)=(.*)$}) and $url !~ m{\?} ) {
                      $url = $scraper->{'_last_url'};
                      $url =~ s{&$newName=[^&]*}{}g; # remove any earlier appearance of this parameter.
                      $url .= "&$newName=$newValue";
                   }
                    my $datParser = $ary[3];

                    $datParser = \&WWW::Scraper::null unless $datParser;
                    $scraper->{'_base_url'} =~ m-^(.*)/.*$-;
                    my $baseURL = $1;
                    $url = new URI::URL(&$datParser($scraper, $hit, $url), $scraper->{'_base_url'});
                    $url = $url->abs();
                    $url = WWW::Scraper::unescape_query($url);# if $TidyXML->m_isTidyd();
                    $scraper->{'_next_url'} = $url;
                    print STDERR  "NEXT_URL: $url\n" if ($scraper->ScraperTrace('U'));
                    last;
                }
            }
        }
    }
    return undef;
}


1;