use HTML::LinkExtor; use LWP::Simple; sub Iterator (&) { $_[0]; } sub traverse { my $interesting_links = sub { @_ }; $interesting_links = shift if ref $_[0] eq 'CODE'; my @queue = @_; my %seen; return Iterator { while (@queue) { my $referrer; my $url = shift @queue; if (ref $url) { ($url, $referrer) = @$url } $url =~ s/#.*$//; next if $seen{$url}++; my (%head, $html); @head{qw(TYPE LENGTH LAST_MODIFIED EXPIRES SERVER)} = head($url); if ($head{TYPE} eq 'text/html') { $html = get($url); push @queue, map [$_, $referrer], $interesting_links->($referrer, get_links($url, $html)) } return wantarray ? ($url, \%head, $html, $referrer) : $url; } return; # exhausted } } sub get_links { my ($base, $html) = @_; my @links; my $more_links = sub { my ($tag, %attrs) = @_; push @links, values %attrs; }; HTML::LinkExtor->new($more_links, $base)->parse($html); return @links; } sub NEXTVAL { $_[0]->() } my $top = 'http://perl.plover.com/'; my $interesting = sub { grep /^\Q$top/o, @_ }; my $urls = traverse($interesting, $top); for (1..($ARGV[0] || 10)) { my ($url, $h, $cont) = NEXTVAL($urls); print "$url\n"; print "\t", join("\n\t", map "$_ => $h->{$_}", keys %$h), "\n"; substr($cont, 70) = "" if length($cont)>70; $cont =~ tr/\n/ /; print "\t$cont\n\n"; }