#!/usr/bin/perl -w
# Creates a structured HTML list ('sitemap') of HTML files
# Copyright (C) 1998 Daniel Naber <dnaber@mini.gt.owl.de>
# version 1.08, 1998-08-16 (version number is independent from java version)
# See below for configuration. 
# Usage from command line: ./tree.pl [htmldir] >outputfile
#
# See http://www.ev-stift-gymn.guetersloh.de/server/tree_e.html for the 
# latest version. It would be nice to include a link to this page if you 
# use the script to generate a public page.
#
# CHANGES:
# 1997-09-07: first version
# (...)
# 1998-28-04: new option: @includefiles, @excludepatterns now
#	called @excludefiles; one space after $pictag
# 1998-10-06: checks if $htmldir exists and if it's a directory;
#	$patternfile is now called $templatefile
# 1998-08-16: made ISO 8601 date default
#
# TODO/BUGS/PROBLEMS:
# $htmldir may not point to a link, links beneath $htmldir will be ignored
#
# COPYRIGHT:
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
# - user-configurable options ----------------------------------------------

# directory with the html files (may be overriden by command line argument),
# don't set a trailing slash
if( $ARGV[0] ) {
	$htmldir = $ARGV[0];
} else {
	$htmldir = "/www/iris";
}

$templatefile = "tree-template.html";	# take this file to build the output page
#$templatefile = "";		# comment in and you'll just get the list
$baseurl = "";			# this will be in front of any URL

$cgi = 1;			# set to 1 to use this as a cgi script
# set the following options both to 1 to generate a list you can use offline
$offline = 0;			# enables you to use generated file offline (from disk)
$indexrefs = 0;			# make links to 'dir/index.html' etc. (instead of 'dir/')

@indexfiles = ('index.html', 'index.shtml');	# default-files' names
@inchtml = ('shtml', 'html', 'htm');		# take files with these suffixes as HTML files
@incpics = ();					# don't include pictures
#@incpics = ('gif', 'jpg', 'jpeg');	# list pictures with these suffixes
$listsize = 0;			# include size in kb for every file?

$self = "/server/az.html";	# output file (relative path; won't be linked in the list)
$selftitle = "Carte du site d'IRIS";

# do only include these files/directories, use '*' as a wildcard,
# use '@includefiles = ();' to  include all files matching the pattern
# except those in @excludefiles:
@includefiles = ();
# do not include these files/directories:
@excludefiles = (
  '/*.rc',
  '/altern*',
  '/annonce*',
  '/banner*',
  '/cosgo*',
  '/erreur*',
  '/les-iris/lbi*',
  '/maintenance.html',
  '/recherche.html',
  '/search.html',
  '/assises/*',
  '/c-altern/*',
  '/cgi-bin/*',
  '/educ/*',
  '/images/*',
  '/iris/private/*',
  '/les-iris/sep*',
  '/les-iris/li-*',
  '/proc/*',
  '/search/*',
  '/sgdg/*',
  '/tmp/*',
  '/WEBASSISES/*'
);

$listwithouttitle = 1;		# include html files without <title>?

$date = "DAY.MONTH.YEAR";		# german format
#$date = "MONTH/DAY/YEAR";		# american format
#$date = "YEAR-MONTH-DAY";		# date according to ISO 8601

# for those of you who like the plain output:
#$dirtag = '<ul>';
#$dirtag_end = '</ul>';
#$foldertag = '<li>';
#$htmltag = '<li>';
#$pictag = '<li>';
#$nolinktag = '<li>';

# for those who like output with an icon in front of every item:
# (this seems to be valid HTML, but it's not good HTML)
$dirtag = '<dl>';
$dirtag_end = '</dl>';
$foldertag = '<dt><img src="/images/tree_images/folder.open.gif" alt="*">';
$htmltag = '<dt><img src="/images/tree_images/generic.gif" alt="*">';
$pictag = '<dt><img src="/images/tree_images/image2.gif" alt="*">';
$nolinktag = '<dt><img src="/images/tree_images/folder.open.gif" alt="*">';

#$modifiedtag = '<img src="/images/tree_images/new.gif">';	# mark files that changed not long ago
#$modifiedtime = 24;		# mark files that are not older than $modifiedtime hours (0 = option off)
$modifiedtag_day = '<img src="/images/tree_images/new_day.gif">';	# mark files that changed not long ago
$modifiedtime_day = 24;		# mark files that are not older than $modifiedtime hours (0 = option off)
$modifiedtag_3day = '<img src="/images/tree_images/new_3day.gif">';	# mark files that changed not long ago
$modifiedtime_3day = 72;		# mark files that are not older than $modifiedtime hours (0 = option off)
$modifiedtag_week = '<img src="/images/tree_images/new_week.gif">';	# mark files that changed not long ago
$modifiedtime_week = 168;		# mark files that are not older than $modifiedtime hours (0 = option off)
# - nearly no configuration below ------------------------------------------

# $st = time();		# comment in if you're interested in runtime
use File::Find;
$depth = 0;
($htmlct, $htmlsize, $picct, $picsize) = (0, 0, 0, 0);	# count size und number
$partlist = "";

&getdate;
&init;
&first_part_output;
find(\&doperfile, $htmldir);
&list_output;
&last_part_output;
# $diff = time() - $st; print STDERR "time: $diff secs\n";	# see above
exit;

# --------------------------------------------------------------------------

sub getdate {
	my ($sec,$min,$hour,$mday,$mon,$year) = 0;	# avoid warning with perl's -w option
	($sec,$min,$hour,$mday,$mon,$year) = localtime(time());
	($mon < 12) ? ($mon++) : ($mon = 1);
	$year += 1900;
	$mon = "0".$mon if( length($mon) == 1 );
	$mday = "0".$mday if( length($mday) == 1 );	
	$date =~ s#DAY#$mday#i;
	$date =~ s#MONTH#$mon#i;
	$date =~ s#YEAR#$year#i;
}

sub init {
	if( $cgi ) {
		select(STDOUT); $| = 1;
		$nph = 1 if( $0 =~ m#nph-tree# );
		print "HTTP/1.0 200 OK\n" if( $nph );
		print "Content-Type: text/html\n\n";
	}
	if( ! -d $htmldir ) {
		print "Error: $0: '$htmldir' doesn't exist or isn't a directory.";
		exit;
	}
	my $expat;			# enable '*' as wildcard in @excludefiles
	foreach $expat (@excludefiles) {
		$expat =~ s#\*#.*?#g;
	}
	foreach $expat (@includefiles) {	# the same in @includefiles
		$expat =~ s#\*#.*?#g;
	}
}

sub first_part_output {
	$output = &load($templatefile);
	$output =~ s#<!-- \$date -->#$date#igs;
	my ($first_part) = ($output =~ m#^(.*?)<!-- \$list -->#is);
	$first_part = "" if ( ! defined($first_part) );	# avoid warning
	print $first_part;
}

sub doperfile {
	my $thisfile = $File::Find::name;
	$thisfile .= "/" if( -d $thisfile );
	my ($thisfile_rel) = ($thisfile =~ m#^$htmldir(/.*)#);	# part after $htmldir

	my $expat;
	# include only files from @includefiles:
	if( scalar(@includefiles) >= 1 ) {
		my $do_use = 0;
		foreach $expat (@includefiles) {
			if( $thisfile_rel =~ m#^$expat$# ) {
				$do_use = 1;
				last;
			}
		}
		return if( ! $do_use );
	}
	
	# exclude files from @excludefiles:
	foreach $expat (@excludefiles) {
		return if( $thisfile_rel =~ m#^$expat$# );
	}

	if( ! &isfile($thisfile, @indexfiles)
		&& ($thisfile =~ m#/$# 
		|| &isfileclass($thisfile, @inchtml) 
		|| &isfileclass($thisfile, @incpics)) ) {
		push(@filelist, $thisfile);
	}
}

sub list_output {
	my $thisfile;
	my $dirsdone = "";			# have we been here already?
	my $thisdir = "";
	my $dirtag_ct = 0;
	@filelist = sort(@filelist);
	print "$dirtag\n";
	$dirtag_ct++;
	foreach $thisfile (@filelist) {
		($url) = ($thisfile =~ m#$htmldir(/.*)#i);
		$olddepth = $depth;
		$depth = ($url =~ s#/#/#gi);    		# 1 = html-root
		$olddir = $thisdir;
		($thisdir) = ($url =~ m#(.*/).*?#i);
		if( $thisdir ne $olddir && ! ($dirsdone =~ m#^$thisdir$#m) ) {	# deeper level or same level
			$dirsdone .= "$thisdir\n";
			$partlist .= "$dirtag_end\n" x ($olddepth-$depth+1);
			$dirtag_ct -= &minzero($olddepth-$depth+1);
			$partlist .= " $nolinktag$baseurl$url\n" if( ! &getdefaultfile("$htmldir$thisdir") );
			$partlist .= "$dirtag\n";
			$dirtag_ct++;
		} elsif( ! ($thisdir =~ m#$olddir#i) ) {	# higher level
			$partlist .= "$dirtag_end\n" x ($olddepth-$depth);
			$dirtag_ct -= &minzero($olddepth-$depth);
			$partlist .= &getinfo($thisfile, 0);
		} else {					# same level as before
			$partlist .=  &getinfo($thisfile, 0);
			$partlist =~ s#$dirtag\n$dirtag_end\n##ig;
			print $partlist;
			$partlist = "";
		}
	}
	print $partlist;
	# close list correctly:
	print "$dirtag_end\n" x $dirtag_ct;
}

sub last_part_output {
	$htmlsize = int($htmlsize/1000);	# size in kB
	$picsize = int($picsize/1000);
	$output =~ s#<!-- \$htmlct -->#$htmlct#igs;
	$output =~ s#<!-- \$htmlsize -->#$htmlsize#igs;
	$output =~ s#<!-- \$picsct -->#$picct#igs;
	$output =~ s#<!-- \$picsize -->#$picsize#igs;
	my ($last_part) = ($output =~ m#<!-- \$list -->(.*)$#is);
	$last_part = "" if ( ! defined($last_part) );	# avoid warning
	print $last_part;
}

# --------------------------------------------------------------------------

sub getdefaultfile {
	my $dir = shift;
	my $item;
	foreach $item (@indexfiles) {
	 	if( -e "$dir$item" ) {		# there's a defaultfile
			$partlist .= &getinfo("$dir$item", 1);
			return $item;
		}
	}
	return 0;
}

sub getinfo
{
	my $thisfile = shift;
	my $isindexfile = shift;
	my ($suffix) = ($thisfile =~ m#.*\.(.*)#);
	my ($size, $exactsize) = &getsize($thisfile);
	my $entry = "";
	my $linkurl;
	$offline ? ($linkurl = $htmldir.$url) : ($linkurl = $url);
	$linkurl = $baseurl.$linkurl;
	if( &isfileclass($thisfile, @inchtml) ) {
		$htmlsize += $exactsize;
		$htmlct++;
		my $string = &load_part($thisfile);
		if( $thisfile eq "$htmldir$self" ) {			# output file itself
			$entry .= " $nolinktag$selftitle";
		} elsif( $string =~ m#<title>(.*?)</title>#is ) {	# common case
			if( $isindexfile ) {
				$entry .= " $foldertag";
			} else {
				$entry .= " $htmltag";
			}
#			$entry .= " $modifiedtag" if( &is_it_modified($thisfile) );
			SWITCH: {
			  if( &is_it_modified($thisfile,$modifiedtime_day) ){ $entry .= " $modifiedtag_day" ; last SWITCH; }
			  if( &is_it_modified($thisfile,$modifiedtime_3day) ){ $entry .= " $modifiedtag_3day" ; last SWITCH; }
			  if( &is_it_modified($thisfile,$modifiedtime_week) ){ $entry .= " $modifiedtag_week" ; last SWITCH; }
			}
			$entry .= " <a href=\"$linkurl\">$1";
			$entry .= " ($size&nbsp;kB)" if( $listsize );
			$entry .= "</a>";
		} else {						# files with no title tag
			if( $listwithouttitle ) {
				$entry .= " $nolinktag$baseurl$url";
				$entry .= " ($size&nbsp;kB)" if( $listsize );
			}
		}
		$entry .= "\n";
	} elsif( &isfileclass($thisfile, @incpics) ) {
		$picsize += $exactsize;
		$picct++;
		my ($filenameonly) = ($url =~ m#.*/(.*)#i);
		$entry .= " $pictag";
		$entry .= " $modifiedtag" if( &is_it_modified($thisfile) );
		$entry .= " <a href=\"$linkurl\">$filenameonly";
		$entry .= " ($size&nbsp;kB)" if( $listsize );
		$entry .= "</a>\n";
	}

	# links to dir/ or to dir/index.html (see configuration section)
	if( $indexrefs ) {
		my ($filepart) = ($thisfile =~ m#.*/(.*)#);
		$entry =~ s#href="(.*?/)"#href="$1$filepart"#i;
	}
	return $entry;
}

sub is_it_modified {
	my $filename = shift;
	my $modifiedtime = shift;
	($mtime) = (stat($filename))[9];
	if( $modifiedtime && ((time() - $mtime) < ($modifiedtime*60*60)) ) {
		return 1;
	} else {
		return 0;
	}
}

sub getsize {			# get filesize in (kB, bytes)
	my $file = shift;
	my $exactsize = -s $file;
	my $size = int($exactsize/1000);
	$size = 1 if( $size == 0 );
	return $size, $exactsize;
}

sub isfileclass {		# check filesuffix
	my $file = shift;
	my @fileclass = @_;
	my $item;
	foreach $item (@fileclass) {
		return 1 if( $file =~ m#\.$item$# );
	}
	return 0;
}

sub isfile {			# check filename
	my $file = shift;
	my @files = @_;
	my $item;
	foreach $item (@files) {
		return 1 if( $file =~ m#/$item$# );
	}
	return 0;
}

sub load_part {			# only load file till </title> is reached
	my $file = shift;
	my $string = "";
	open(INPUT, "<$file") || die "Cannot open '$file': $!";
	while(<INPUT>) {
		$string .= $_;
		last if( $_ =~ m#</title>#i );
	}
	close(INPUT);
	$string = "" if( ! defined($string) && $file =~ m#^$htmldir/$self$# );	# avoid warning
	return $string;
}

sub load {
	my $file = shift;
	my $string;
	open(INPUT, "<$file") || return "";
	undef $/;
	$string = (<INPUT>);
	$/ = "\n";
	close(INPUT);
	return $string;
}

sub minzero {			# returns 0 if argument is < 0, else returns the argument
	my $var = shift;
	if( $var > 0 ) {
		return $var;
	} else {
		return 0;
	}
}
