use HTML::LinkExtor; use LWP::Simple; sub Iterator (&) { $_[0]; } sub imap (&$) { my ($transform, $it) = @_; return sub { local $_ = NEXTVAL($it); return unless defined $_; return $transform->(); }; } sub igrep (&$) { my ($is_interesting, $it) = @_; return sub { local $_; while (defined ($_ = NEXTVAL($it))) { return $_ if $is_interesting->(); } return; } } sub traverse { my $interesting_link; $interesting_link = shift if ref $_[0] eq 'CODE'; my %seen; my @queue = map [$_, 'user-supplied'], @_; my $q_it = igrep { ! $seen{$_->[0]}++ && print "unseen.\n" } imap {$_->[0] =~ s/#.*$//; $_} Iterator { print "Shifting...\n"; print " @{$queue[0]}\n"; return shift @queue }; if ($interesting_link) { $q_it = igrep {$interesting_link->()} $q_it; } return imap { my ($url, $referrer) = @$_; my (%head, $html); @head{qw(TYPE LENGTH LAST_MODIFIED EXPIRES SERVER)} = head($url); if ($head{TYPE} eq 'text/html') { $html = get($url); my @links = get_links($url, $html); push @queue, map [$_, $url], @links; } return wantarray ? ($url, \%head, $html, $referrer) : $url; } $q_it; } sub get_links { my ($base, $html) = @_; my @links; my $more_links = sub { my ($tag, %attrs) = @_; push @links, values %attrs; }; HTML::LinkExtor->new($more_links, $base)->parse($html); return @links; } sub NEXTVAL { $_[0]->() } my $top = 'http://perl.plover.com/'; my $interesting = sub { $_->[0] =~ /^\Q$top/o }; my $urls = traverse($interesting, $top); while (my ($url, $head, undef, $referrer) = NEXTVAL($urls)) { print "$referrer -> $url\n"; print " (bad link)\n" unless $head->{TYPE}; <>; }