#!/usr/bin/perl
######################################################################
##
##     listmake ::: Jed Reynolds ::: Nov 23, 1995
##
##     This is a silly li'l program which makes a list of a source
##     tree and compiles a list of filenames and html <title>s they
##     contain. This is so I can get recursive list and know what 
##     stuff is by what title and name it is.
##
######################################################################

# 
# 
# I'm not sure if I just want to work 
# from the text directory or not. This would 
# be the code to do just that. Hrm...
# do I really want them to have to download all the 
# gifs or not? Hey...I'll do this, I'll let them
# /choose/ whether or not they wanna download
# the pictures or not, so there!
# Even though this does make my job harder...
# 

######################################################################
##
##    sub fileadd
##
##    This subroutine extracts a filename a title from the file
##    and puts it in nn_file
######################################################################
sub fileadd {

    #print LOGFILE "contains: @_\n";

    $htmlfile = $_[0];

    #print  "I'm working on $htmlfile\n";

    open (HTMLFILE, "<$htmlfile");

    while( <HTMLFILE> ) {

	chop;
	
	#
	# determine whether we are in multiline status
	# 
	if ($multiLine == 1) {
	    $titlestring = $titlestring ." ". $_;
	    if (/>/) {
		$multiLine = 0;
		$foundit = 0;
	    }
	    next;
	}
	
	# 
	# find <title> statements!
	#
	if  (/<title/i) {
	    $foundit =1;
	    if ( !/<title>*<\/title>/i) { # Time to cat a multi line string!
		$multiLine =+ 1;
		$titlestring  = $_;
	    }
	}
	elsif ($multiLine == 0)	# not dealing with an img statement
	{
	    #print LOGFILE "$_\n";
	    if ( $foundit == 1 ) {
		print NN_FILE "$_\n";
	    }
	}
	else
	{
	    #$multiLine--;
	}			

	# 
	# don't print out the line if it's not a title
	# 

	if ( $foundit == 1 ) {


	    # 
	    # remove unwanted frills from our title
	    # 
	    $titlestring =~ s/<head>//ig;
	    $titlestring =~ s/<\/head>//ig;
	    $titlestring =~ s/<html>//ig;
	    $titlestring =~ s/<\/html>//ig;
	    $titlestring =~ s/<body>//ig;
	    $titlestring =~ s/<\/body>//ig;
	    $titlestring =~ s/<title>//ig;
	    $titlestring =~ s/<\/title>//ig;
	    $titlestring =~ s/^\W*//; # remove starting whitespace
	    $titlestring =~ s/\W*$//; # remove ending whitespace
	    
	    # figure out "a nice format"
	    $centering = 42 -  length( $titlestring );
	    $padding = " " x $centering;

	    $outline = $titlestring . ":" . $padding 
		. $workingdir . "/" . $htmlfile .  "\n";	    
	    print NN_FILE "$outline";
	}			# end of if foundit
    }				# end of fucknob
    close HTMLFILE;
}				# end of do_file

######################################################################
##
##     main script
##
######################################################################

# open a logfile, tempfile, and final file

open(LOGFILE, ">listmake.log") || die "Can't open logfile.\n";
open (NN_FILE, ">listmake.out") || die "Can't open nn.file.\n";

$basedirectory =`pwd`;
chop $basedirectory;

print LOGFILE "STARTING OFF IN: $basedirectory\n";

foreach ( `du` ) {
    chdir ($basedirectory);
    chop;
    s/.*\s+//i;
    s/$basedirectory//i;

    # 
    # skip any non-text directories
    # 
    if (!/.*text.*/) {
	print LOGFILE "Skipping non-TEXT dir.\n";
	next;
    }

    print LOGFILE " ENTERING $_ \n";

    $workingdir = $_;

    chdir( $_ );

    # 
    # for each *.html file
    # 
    while ( $htmlfile = <*.html> ) {
	$tallyfiles++;
	print LOGFILE "$htmlfile, ";
	do fileadd($htmlfile);
    }			# end of while each *.html
    print LOGFILE "\n";
}				# end foreach du

print "Processed $tallyfiles files.\n";

close (NN_FILE);
close (LOGFILE);
# eof #
