use HTML::LinkExtor; use LWP::Simple; sub Iterator (&) { $_[0]; } sub traverse { my @queue = @_; my %seen; return Iterator { while (@queue) { my $url = shift @queue; $url =~ s/#.*$//; next if $seen{$url}++; my ($content_type) = head($url); if ($content_type eq 'text/html') { my $html = get($url); push @queue, get_links($url, $html); } return $url; } return; # exhausted } } sub get_links { my ($base, $html) = @_; my @links; my $more_links = sub { my ($tag, %attrs) = @_; push @links, values %attrs; }; HTML::LinkExtor->new($more_links, $base)->parse($html); return @links; } sub NEXTVAL { $_[0]->() } my $it = traverse('http://perl.plover.com/'); while (my ($url, $head, undef, $referrer) = NEXTVAL($urls)) { print "$url $referrer $.\n" if $. % 100 == 0; next if %$head; print "Page '$referrer' has a bad link to '$url'\n"; }