#!/usr/bin/perl
######################################################################
##
##	accpetance ::: Jed Reynolds ::: nov 23, '95
##
##	This is a hopefully more useful perl script that
##	uses the data created by denail to actaully RENAME
##	and update all the urls in a html sourcetree.
##	I would use this with caution, your html sourcetree
##	might bite hard on this one!
##
######################################################################

######################################################################
# 
# sub replace_filename
# 
# this is a sub to take the string containing a url and slap
# in the fixed name
# 
######################################################################
sub replace_filename {
    $badstring = $_[0];
    
    # 
    # look for the filename and whether the file name is at the 
    # end of a directory reference.
    # 
    $_ = $badstring;
    ($oldurlname) = /.*href="(\S*)"\>/i;
    if ( $oldurlname =~ /\// ) {
	$_ = $oldurlname;
	($oldurlname) = /.*\/(\S*)$/i;
    }
    print LOGFILE "looking for: $oldurlname\n";

    # 
    # look up $oldurlname
    # 

    $found = 0;

    while ( ($old, $new) = each( %NAMES ) ) {
	if ($oldurlname eq $old ) {
	    print LOGFILE "  FOUND $oldurlname => $new \n";
	    $found = 1;
	    $goodhits++;
	    $newurlname = $new;
	}
    }				# end of while names

    # we don't want to upset real urls.
    $_ = $badstring;
    if ( /http/i ) {
	print LOGFILE "Skipping real url.\n";
	$found = 0;
    }

    if ($found == 1) {

	# 
	# print good url to file
	# 

	$_ = $badstring;
	
	print LOGFILE "$oldurlname --> $newurlname\n";
	
	s/$oldurlname/$newurlname/i;
	$goodstring = $_;
	print LOGFILE "updating to $goodstring\n";
	#warn " $thatfile: updating to $goodstring\n\n";
	#print TARGETFILE "<!-- good>";
	print TARGETFILE "$goodstring\n";
    }
    else
    {
	print LOGFILE "  CAN'T find $oldurlname!\n";
	#warn "CAN'T find $oldurlname!\n";
	$badhits++;
	
	#
	# print original url to file
	# 
	print LOGFILE "  Leaving as $badstring\n";
	#print TARGETFILE "<!-- bad -->";
	print TARGETFILE "$badstring\n";
    }

}				# end of replace_filename


######################################################################
# 
# sub rename_urls
# 
# This subroutine renames the filenames in the urls in the html file
# 
# This is not an easy thing to do. Wish me luck.
######################################################################
sub rename_urls {
    # 
    # get the filenames to open and open them
    # 
    $thisfile = $_[0];
    $thatfile = $_[1];
    open (SOURCEFILE, "<$thisfile") || 
	die "Can't open file $thisfile\n";
    open (TARGETFILE, ">".$thatfile . "_") || 
	die "Can't open file $thatfile\n";

    # 
    # for each line, find urls
    # look up urls, skip the ones not found
    # and replace the ones that were.
    # 
    while (<SOURCEFILE>) {
	chop;
	$line++;

	#
	# continue from last line?
	# 

	if ($multiLine == 1) {
	    $urlstring = $urlstring ." ". $_;
	    if (/<\/a>/i) {
		$multiLine = 0;
		# 
		# this is the spot to remove shit
		# 
		do replace_filename( $urlstring );
	    }			# end if
	    next;
	}			# end if
	
	#
	# find <a href> </a> statements
	#
	if  (/<a\s*href=/i) {	# this is an href

	    if ( !/<\/a>/i) {	# Time to cat a multi line string!
		$multiLine =+ 1;
		$urlstring  = $_;
	    }
	    else
	    {
		print LOGFILE "\n==>working on single-line on $urlstring.\n";
		do replace_filename( $urlstring );
	    }

	}
	elsif ($multiLine == 0)	# not dealing with an img statement
	{
	    #print "$_\n";
	    print TARGETFILE "$_\n";
	}
	else
	{
	    print "I SHOULD NOT BE HERE!\n";
	    #$multiLine--;
	}
    }				# end of while(<>)
    
    # 
    # clean up our mess
    # 
    close(SOURCEFILE);
    close(TARGETFILE);

}				# end of rename_urls


######################################################################
# 
# sub rename_html_files
# 
# This subroutine renames the html files that it finds and calls a 
# another subroutine (rename_urls) to update the urls in that file
# 
######################################################################
sub rename_html_files {
    # for each directory
    $basedirectory =`pwd`;
    chop $basedirectory;

    foreach ( `du` ) {
	chdir ($basedirectory);
	chop;
	s/.*\s+//i;
	s/$basedirectory//i;
	
#	# 
#	# skip any non-text directories
#	# 
#	if (!/.*text.*/) {
#	    print LOGFILE "Skipping non-TEXT dir.\n";
#	    next;
#	}

	print LOGFILE " ENTERING $_ \n";
	
	$workingdir = $_;

	chdir( $_ );
	
    
	# for each file, look up file (html file)
	# find name of file, warn on names not found,
	# rename files to names which are in database.
	# then call rename_urls on the new filename.


	# 
	# for each *.html file
	# 

	while ( $htmlfile = <*.html> ) {
	    print LOGFILE "\n Starting work on: $htmlfile\n";
	    $tallyfiles++;

	    # look up html file in NAMES
	    $newname = $NAMES{ $htmlfile };

	    # make sure this file exists
	    if ( length($newname) > 0 ) {
		do rename_urls( $htmlfile, $newname );
	    }
	    else
	    {
		print LOGFILE "File $htmlfile is not in dmb.\n";
		die "File $htmlfile is not in dmb.\n";
	    }			# end of if else
	}			# end of while each *.html
	print LOGFILE "\n";
    }				# end foreach du
    
}				# end of rename_html_files

######################################################################
# 
# main script
#
# open logfile, dbm files
# 
######################################################################

#
# open logfile, filename mappings, and dbm
#

open (LOGFILE, ">acceptance.log") || die "Can't open logfile.\n";

# 
# test for the nessicary database files
# 

if (!( (-e "filelist.dir") && (-e "filelist.pag")) ) {
    die "Cannot file proper database files. TIME TO HACK, baby!\n";
}

dbmopen ( %NAMES, "filelist", 0644 ) 
    || die "Can't create or open filelist\n";

$goodhits = 0;
$badhits = 0;
do rename_html_files();

print "Processed $tallyfiles files with $goodhits hits and $badhits misses.\n";

dbmclose(%NAMES) || warn "Error closeing filelist\n";
close (LOGFILE);


exit;
# eof
