use HTML::LinkExtor;
use LWP::Simple;
sub Iterator (&) {
$_[0];
}
sub traverse {
my $interesting_links = sub { @_ };
$interesting_links = shift if ref $_[0] eq 'CODE';
my @queue = @_;
my %seen;
return Iterator {
while (@queue) {
my $url = shift @queue;
$url =~ s/#.*$//;
next if $seen{$url}++;
my (%head, $html);
@head{qw(TYPE LENGTH LAST_MODIFIED EXPIRES SERVER)} = head($url);
if ($head{TYPE} eq 'text/html') {
$html = get($url);
push @queue, $interesting_links->(get_links($url, $html))
}
return wantarray ? ($url, \%head, $html) : $url;
}
return; # exhausted
}
}
sub get_links {
my ($base, $html) = @_;
my @links;
my $more_links = sub {
my ($tag, %attrs) = @_;
push @links, values %attrs;
};
HTML::LinkExtor->new($more_links, $base)->parse($html);
return @links;
}
sub NEXTVAL { $_[0]->() }
my $top = 'http://perl.plover.com/';
my $interesting = sub { grep /^\Q$top/o, @_ };
my $urls = traverse($interesting, $top);
for (1..($ARGV[0] || 10)) {
my ($url, $h, $cont) = NEXTVAL($urls);
print "$url\n";
print "\t", join("\n\t", map "$_ => $h->{$_}", keys %$h), "\n";
substr($cont, 70) = "" if length($cont)>70;
$cont =~ tr/\n/ /;
print "\t$cont\n\n";
}