use HTML::LinkExtor;
use LWP::Simple;

sub Iterator (&) {
  $_[0];
}

sub traverse {
  my $interesting_links = sub { @_ };
  $interesting_links = shift if ref $_[0] eq 'CODE';

  my @queue = @_;
  my %seen;

  return Iterator {
    while (@queue) {
      my $referrer;
      my $url = shift @queue;
      if (ref $url) { ($url, $referrer) = @$url }
      $url =~ s/#.*$//;
      next if $seen{$url}++;

      my (%head, $html);
      @head{qw(TYPE LENGTH LAST_MODIFIED EXPIRES SERVER)} = head($url);
      if ($head{TYPE} eq 'text/html') {
        $html = get($url);
        push @queue, 
          map [$_, $referrer],
            $interesting_links->($referrer, get_links($url, $html))
      }
      return wantarray ? ($url, \%head, $html, $referrer) : $url;
    }
    return;                     # exhausted
  }
}


sub get_links {
  my ($base, $html) = @_;
  my @links;
  my $more_links = sub {
    my ($tag, %attrs) = @_;
    push @links, values %attrs;
  };

  HTML::LinkExtor->new($more_links, $base)->parse($html);
  return @links;
}

sub NEXTVAL { $_[0]->() }

my $top = 'http://perl.plover.com/';
my $interesting = sub { grep /^\Q$top/o, @_ };
my $urls = traverse($interesting, $top);
for (1..($ARGV[0] || 10)) {
  my ($url, $h, $cont) = NEXTVAL($urls);
  print "$url\n";
  print "\t", join("\n\t", map "$_ => $h->{$_}", keys %$h), "\n";
  substr($cont, 70) = "" if length($cont)>70;
  $cont =~ tr/\n/ /;
  print "\t$cont\n\n";
}


