| webrobot documentation | Contained in the webrobot distribution. |
WWW::Webrobot::Recur::Browser - act like a browser when selecting a url
see "Request_<recurse in WWW::Webrobot::pod::Testplan">
This module allows to load an HTML page, all contained frames (recursivly) and all images.
Constructor.
See next in WWW::Webrobot::pod::Recur
See allowed in WWW::Webrobot::pod::Recur
| webrobot documentation | Contained in the webrobot distribution. |
package WWW::Webrobot::Recur::Browser; use WWW::Webrobot::HtmlAnalyzer; use strict; # Author: Stefan Trcek # Copyright(c) 2004 ABAS Software AG
sub new { my $proto = shift; my $class = ref($proto) || $proto; my %parm = (@_); my $self = { frame => [], img => [], seen => {}, visited => {}, url_rejected => sub {}, url_accepted => sub {}, }; bless ($self, $class); return $self; }
sub next { my $self = shift; my ($r) = @_; my $in = $r -> {'_content'}; my $uri = $r -> {_request} -> {_uri}; if ($self -> is_type("text/html", $r->{_headers}->{'content-type'})) { # nur in einer HTML-Seite gibt es neue Links my ($img, $frame, $a, $refresh) = WWW::Webrobot::HtmlAnalyzer -> get_links($uri, \$in); ($img, $frame) = $self -> only_allowed($img, $frame); push @{$self -> {img}}, @$img; push @{$self -> {frame}}, @$frame; } my $e = $self -> next_link($self->{img}, $self->{frame}); $self -> {visited} -> {$e} = 1 if defined $e; return $e; } sub is_type { my $self = shift; my ($match, $obj) = @_; return 0 if !defined $obj; $obj = [$obj] if !ref($obj); foreach (@$obj) { return 1 if m/$match/; } return 0; }
sub allowed { my ($self, $uri) = @_; return 1; } sub only_allowed { my $self = shift; my @ret = (); foreach my $array (@_) { # delete all links that are not allowed my @new = (); foreach (@$array) { if (!defined($self -> {seen} -> {$_})) { # link unseen yet $self -> {seen} -> {$_} = 1; if ($self -> allowed($_)) { push @new, $_; $self -> {url_accepted} -> ($_); } else { $self -> {url_rejected} -> ($_); } } } push @ret, \@new; } return @ret; } sub next_link { my $self = shift; foreach my $array (@_) { my $n = shift @$array; return $n if defined $n; } return undef; }
1;