#!/usr/bin/perl -w
# Creates a structured HTML list ('sitemap') of HTML files
# Copyright (C) 1998 Daniel Naber
# version 1.08, 1998-08-16 (version number is independent from java version)
# See below for configuration.
# Usage from command line: ./tree.pl [htmldir] >outputfile
#
# See http://www.ev-stift-gymn.guetersloh.de/server/tree_e.html for the
# latest version. It would be nice to include a link to this page if you
# use the script to generate a public page.
#
# CHANGES:
# 1997-09-07: first version
# (...)
# 1998-28-04: new option: @includefiles, @excludepatterns now
# called @excludefiles; one space after $pictag
# 1998-10-06: checks if $htmldir exists and if it's a directory;
# $patternfile is now called $templatefile
# 1998-08-16: made ISO 8601 date default
#
# TODO/BUGS/PROBLEMS:
# $htmldir may not point to a link, links beneath $htmldir will be ignored
#
# COPYRIGHT:
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
# - user-configurable options ----------------------------------------------
# directory with the html files (may be overriden by command line argument),
# don't set a trailing slash
if( $ARGV[0] ) {
$htmldir = $ARGV[0];
} else {
$htmldir = "/www/iris";
}
$templatefile = "tree-template.html"; # take this file to build the output page
#$templatefile = ""; # comment in and you'll just get the list
$baseurl = ""; # this will be in front of any URL
$cgi = 1; # set to 1 to use this as a cgi script
# set the following options both to 1 to generate a list you can use offline
$offline = 0; # enables you to use generated file offline (from disk)
$indexrefs = 0; # make links to 'dir/index.html' etc. (instead of 'dir/')
@indexfiles = ('index.html', 'index.shtml'); # default-files' names
@inchtml = ('shtml', 'html', 'htm'); # take files with these suffixes as HTML files
@incpics = (); # don't include pictures
#@incpics = ('gif', 'jpg', 'jpeg'); # list pictures with these suffixes
$listsize = 0; # include size in kb for every file?
$self = "/server/az.html"; # output file (relative path; won't be linked in the list)
$selftitle = "Carte du site d'IRIS";
# do only include these files/directories, use '*' as a wildcard,
# use '@includefiles = ();' to include all files matching the pattern
# except those in @excludefiles:
@includefiles = ();
# do not include these files/directories:
@excludefiles = (
'/*.rc',
'/altern*',
'/annonce*',
'/banner*',
'/cosgo*',
'/erreur*',
'/les-iris/lbi*',
'/maintenance.html',
'/recherche.html',
'/search.html',
'/assises/*',
'/c-altern/*',
'/cgi-bin/*',
'/educ/*',
'/images/*',
'/iris/private/*',
'/les-iris/sep*',
'/les-iris/li-*',
'/proc/*',
'/search/*',
'/sgdg/*',
'/tmp/*',
'/WEBASSISES/*'
);
$listwithouttitle = 1; # include html files without ?
$date = "DAY.MONTH.YEAR"; # german format
#$date = "MONTH/DAY/YEAR"; # american format
#$date = "YEAR-MONTH-DAY"; # date according to ISO 8601
# for those of you who like the plain output:
#$dirtag = '
';
#$dirtag_end = '
';
#$foldertag = '
';
#$htmltag = '
';
#$pictag = '
';
#$nolinktag = '
';
# for those who like output with an icon in front of every item:
# (this seems to be valid HTML, but it's not good HTML)
$dirtag = '
';
$dirtag_end = '
';
$foldertag = '
';
$htmltag = '
';
$pictag = '
';
$nolinktag = '
';
#$modifiedtag = ''; # mark files that changed not long ago
#$modifiedtime = 24; # mark files that are not older than $modifiedtime hours (0 = option off)
$modifiedtag_day = ''; # mark files that changed not long ago
$modifiedtime_day = 24; # mark files that are not older than $modifiedtime hours (0 = option off)
$modifiedtag_3day = ''; # mark files that changed not long ago
$modifiedtime_3day = 72; # mark files that are not older than $modifiedtime hours (0 = option off)
$modifiedtag_week = ''; # mark files that changed not long ago
$modifiedtime_week = 168; # mark files that are not older than $modifiedtime hours (0 = option off)
# - nearly no configuration below ------------------------------------------
# $st = time(); # comment in if you're interested in runtime
use File::Find;
$depth = 0;
($htmlct, $htmlsize, $picct, $picsize) = (0, 0, 0, 0); # count size und number
$partlist = "";
&getdate;
&init;
&first_part_output;
find(\&doperfile, $htmldir);
&list_output;
&last_part_output;
# $diff = time() - $st; print STDERR "time: $diff secs\n"; # see above
exit;
# --------------------------------------------------------------------------
sub getdate {
my ($sec,$min,$hour,$mday,$mon,$year) = 0; # avoid warning with perl's -w option
($sec,$min,$hour,$mday,$mon,$year) = localtime(time());
($mon < 12) ? ($mon++) : ($mon = 1);
$year += 1900;
$mon = "0".$mon if( length($mon) == 1 );
$mday = "0".$mday if( length($mday) == 1 );
$date =~ s#DAY#$mday#i;
$date =~ s#MONTH#$mon#i;
$date =~ s#YEAR#$year#i;
}
sub init {
if( $cgi ) {
select(STDOUT); $| = 1;
$nph = 1 if( $0 =~ m#nph-tree# );
print "HTTP/1.0 200 OK\n" if( $nph );
print "Content-Type: text/html\n\n";
}
if( ! -d $htmldir ) {
print "Error: $0: '$htmldir' doesn't exist or isn't a directory.";
exit;
}
my $expat; # enable '*' as wildcard in @excludefiles
foreach $expat (@excludefiles) {
$expat =~ s#\*#.*?#g;
}
foreach $expat (@includefiles) { # the same in @includefiles
$expat =~ s#\*#.*?#g;
}
}
sub first_part_output {
$output = &load($templatefile);
$output =~ s##$date#igs;
my ($first_part) = ($output =~ m#^(.*?)#is);
$first_part = "" if ( ! defined($first_part) ); # avoid warning
print $first_part;
}
sub doperfile {
my $thisfile = $File::Find::name;
$thisfile .= "/" if( -d $thisfile );
my ($thisfile_rel) = ($thisfile =~ m#^$htmldir(/.*)#); # part after $htmldir
my $expat;
# include only files from @includefiles:
if( scalar(@includefiles) >= 1 ) {
my $do_use = 0;
foreach $expat (@includefiles) {
if( $thisfile_rel =~ m#^$expat$# ) {
$do_use = 1;
last;
}
}
return if( ! $do_use );
}
# exclude files from @excludefiles:
foreach $expat (@excludefiles) {
return if( $thisfile_rel =~ m#^$expat$# );
}
if( ! &isfile($thisfile, @indexfiles)
&& ($thisfile =~ m#/$#
|| &isfileclass($thisfile, @inchtml)
|| &isfileclass($thisfile, @incpics)) ) {
push(@filelist, $thisfile);
}
}
sub list_output {
my $thisfile;
my $dirsdone = ""; # have we been here already?
my $thisdir = "";
my $dirtag_ct = 0;
@filelist = sort(@filelist);
print "$dirtag\n";
$dirtag_ct++;
foreach $thisfile (@filelist) {
($url) = ($thisfile =~ m#$htmldir(/.*)#i);
$olddepth = $depth;
$depth = ($url =~ s#/#/#gi); # 1 = html-root
$olddir = $thisdir;
($thisdir) = ($url =~ m#(.*/).*?#i);
if( $thisdir ne $olddir && ! ($dirsdone =~ m#^$thisdir$#m) ) { # deeper level or same level
$dirsdone .= "$thisdir\n";
$partlist .= "$dirtag_end\n" x ($olddepth-$depth+1);
$dirtag_ct -= &minzero($olddepth-$depth+1);
$partlist .= " $nolinktag$baseurl$url\n" if( ! &getdefaultfile("$htmldir$thisdir") );
$partlist .= "$dirtag\n";
$dirtag_ct++;
} elsif( ! ($thisdir =~ m#$olddir#i) ) { # higher level
$partlist .= "$dirtag_end\n" x ($olddepth-$depth);
$dirtag_ct -= &minzero($olddepth-$depth);
$partlist .= &getinfo($thisfile, 0);
} else { # same level as before
$partlist .= &getinfo($thisfile, 0);
$partlist =~ s#$dirtag\n$dirtag_end\n##ig;
print $partlist;
$partlist = "";
}
}
print $partlist;
# close list correctly:
print "$dirtag_end\n" x $dirtag_ct;
}
sub last_part_output {
$htmlsize = int($htmlsize/1000); # size in kB
$picsize = int($picsize/1000);
$output =~ s##$htmlct#igs;
$output =~ s##$htmlsize#igs;
$output =~ s##$picct#igs;
$output =~ s##$picsize#igs;
my ($last_part) = ($output =~ m#(.*)$#is);
$last_part = "" if ( ! defined($last_part) ); # avoid warning
print $last_part;
}
# --------------------------------------------------------------------------
sub getdefaultfile {
my $dir = shift;
my $item;
foreach $item (@indexfiles) {
if( -e "$dir$item" ) { # there's a defaultfile
$partlist .= &getinfo("$dir$item", 1);
return $item;
}
}
return 0;
}
sub getinfo
{
my $thisfile = shift;
my $isindexfile = shift;
my ($suffix) = ($thisfile =~ m#.*\.(.*)#);
my ($size, $exactsize) = &getsize($thisfile);
my $entry = "";
my $linkurl;
$offline ? ($linkurl = $htmldir.$url) : ($linkurl = $url);
$linkurl = $baseurl.$linkurl;
if( &isfileclass($thisfile, @inchtml) ) {
$htmlsize += $exactsize;
$htmlct++;
my $string = &load_part($thisfile);
if( $thisfile eq "$htmldir$self" ) { # output file itself
$entry .= " $nolinktag$selftitle";
} elsif( $string =~ m#(.*?)#is ) { # common case
if( $isindexfile ) {
$entry .= " $foldertag";
} else {
$entry .= " $htmltag";
}
# $entry .= " $modifiedtag" if( &is_it_modified($thisfile) );
SWITCH: {
if( &is_it_modified($thisfile,$modifiedtime_day) ){ $entry .= " $modifiedtag_day" ; last SWITCH; }
if( &is_it_modified($thisfile,$modifiedtime_3day) ){ $entry .= " $modifiedtag_3day" ; last SWITCH; }
if( &is_it_modified($thisfile,$modifiedtime_week) ){ $entry .= " $modifiedtag_week" ; last SWITCH; }
}
$entry .= " $1";
$entry .= " ($size kB)" if( $listsize );
$entry .= "";
} else { # files with no title tag
if( $listwithouttitle ) {
$entry .= " $nolinktag$baseurl$url";
$entry .= " ($size kB)" if( $listsize );
}
}
$entry .= "\n";
} elsif( &isfileclass($thisfile, @incpics) ) {
$picsize += $exactsize;
$picct++;
my ($filenameonly) = ($url =~ m#.*/(.*)#i);
$entry .= " $pictag";
$entry .= " $modifiedtag" if( &is_it_modified($thisfile) );
$entry .= " $filenameonly";
$entry .= " ($size kB)" if( $listsize );
$entry .= "\n";
}
# links to dir/ or to dir/index.html (see configuration section)
if( $indexrefs ) {
my ($filepart) = ($thisfile =~ m#.*/(.*)#);
$entry =~ s#href="(.*?/)"#href="$1$filepart"#i;
}
return $entry;
}
sub is_it_modified {
my $filename = shift;
my $modifiedtime = shift;
($mtime) = (stat($filename))[9];
if( $modifiedtime && ((time() - $mtime) < ($modifiedtime*60*60)) ) {
return 1;
} else {
return 0;
}
}
sub getsize { # get filesize in (kB, bytes)
my $file = shift;
my $exactsize = -s $file;
my $size = int($exactsize/1000);
$size = 1 if( $size == 0 );
return $size, $exactsize;
}
sub isfileclass { # check filesuffix
my $file = shift;
my @fileclass = @_;
my $item;
foreach $item (@fileclass) {
return 1 if( $file =~ m#\.$item$# );
}
return 0;
}
sub isfile { # check filename
my $file = shift;
my @files = @_;
my $item;
foreach $item (@files) {
return 1 if( $file =~ m#/$item$# );
}
return 0;
}
sub load_part { # only load file till
is reached
my $file = shift;
my $string = "";
open(INPUT, "<$file") || die "Cannot open '$file': $!";
while() {
$string .= $_;
last if( $_ =~ m##i );
}
close(INPUT);
$string = "" if( ! defined($string) && $file =~ m#^$htmldir/$self$# ); # avoid warning
return $string;
}
sub load {
my $file = shift;
my $string;
open(INPUT, "<$file") || return "";
undef $/;
$string = ();
$/ = "\n";
close(INPUT);
return $string;
}
sub minzero { # returns 0 if argument is < 0, else returns the argument
my $var = shift;
if( $var > 0 ) {
return $var;
} else {
return 0;
}
}