use HTML::LinkExtor;
use LWP::Simple;

sub Iterator (&) {
  $_[0];
}

sub imap (&$) {
  my ($transform, $it) = @_;
  return sub {
    local $_ = NEXTVAL($it);
    return unless defined $_;
    return $transform->();
  };
}

sub igrep (&$) {
  my ($is_interesting, $it) = @_;
  return sub {
    local $_;
    while (defined ($_ = NEXTVAL($it))) {
      return $_ if $is_interesting->();
    }
    return;
  }
}


sub traverse {
  my $interesting_link;
  $interesting_link = shift if ref $_[0] eq 'CODE';

  my %seen;
  my @queue = map [$_, 'user-supplied'], @_;
  my $q_it = 
    igrep {  ! $seen{$_->[0]}++ && print "unseen.\n" }
      imap {$_->[0] =~ s/#.*$//; $_}
        Iterator { print "Shifting...\n"; print "  @{$queue[0]}\n"; return shift @queue };
  if ($interesting_link) {
    $q_it = igrep {$interesting_link->()} $q_it;
  }

  return imap {
    my ($url, $referrer) = @$_;

    my (%head, $html);
    @head{qw(TYPE LENGTH LAST_MODIFIED EXPIRES SERVER)} = head($url);
    if ($head{TYPE} eq 'text/html') {
      $html = get($url);
      my @links = get_links($url, $html);
      push @queue, map [$_, $url], @links;
    }
    return wantarray ? ($url, \%head, $html, $referrer) : $url;
  } $q_it;
}


sub get_links {
  my ($base, $html) = @_;
  my @links;
  my $more_links = sub {
    my ($tag, %attrs) = @_;
    push @links, values %attrs;
  };

  HTML::LinkExtor->new($more_links, $base)->parse($html);
  return @links;
}

sub NEXTVAL { $_[0]->() }

my $top = 'http://perl.plover.com/';
my $interesting = sub { $_->[0] =~ /^\Q$top/o };
my $urls = traverse($interesting, $top);
while (my ($url, $head, undef, $referrer) = NEXTVAL($urls)) {
  print "$referrer -> $url\n";
  print "  (bad link)\n" unless $head->{TYPE};
  <>;
}






