use HTML::LinkExtor;
use LWP::Simple;
sub Iterator (&) {
$_[0];
}
sub traverse {
my @queue = @_;
my %seen;
return Iterator {
while (@queue) {
my $url = shift @queue;
$url =~ s/#.*$//;
next if $seen{$url}++;
my ($content_type) = head($url);
if ($content_type eq 'text/html') {
my $html = get($url);
push @queue, get_links($url, $html);
}
return $url;
}
return; # exhausted
}
}
sub get_links {
my ($base, $html) = @_;
my @links;
my $more_links = sub {
my ($tag, %attrs) = @_;
push @links, values %attrs;
};
HTML::LinkExtor->new($more_links, $base)->parse($html);
return @links;
}
sub NEXTVAL { $_[0]->() }
my $it = traverse('http://perl.plover.com/');
while (my ($url, $head, undef, $referrer) = NEXTVAL($urls)) {
print "$url $referrer $.\n" if $. % 100 == 0;
next if %$head;
print "Page '$referrer' has a bad link to '$url'\n";
}