#!/usr/bin/perl -w # Creates a structured HTML list ('sitemap') of HTML files # Copyright (C) 1998 Daniel Naber # version 1.08, 1998-08-16 (version number is independent from java version) # See below for configuration. # Usage from command line: ./tree.pl [htmldir] >outputfile # # See http://www.ev-stift-gymn.guetersloh.de/server/tree_e.html for the # latest version. It would be nice to include a link to this page if you # use the script to generate a public page. # # CHANGES: # 1997-09-07: first version # (...) # 1998-28-04: new option: @includefiles, @excludepatterns now # called @excludefiles; one space after $pictag # 1998-10-06: checks if $htmldir exists and if it's a directory; # $patternfile is now called $templatefile # 1998-08-16: made ISO 8601 date default # # TODO/BUGS/PROBLEMS: # $htmldir may not point to a link, links beneath $htmldir will be ignored # # COPYRIGHT: # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # - user-configurable options ---------------------------------------------- # directory with the html files (may be overriden by command line argument), # don't set a trailing slash if( $ARGV[0] ) { $htmldir = $ARGV[0]; } else { $htmldir = "/www/iris"; } $templatefile = "tree-template.html"; # take this file to build the output page #$templatefile = ""; # comment in and you'll just get the list $baseurl = ""; # this will be in front of any URL $cgi = 1; # set to 1 to use this as a cgi script # set the following options both to 1 to generate a list you can use offline $offline = 0; # enables you to use generated file offline (from disk) $indexrefs = 0; # make links to 'dir/index.html' etc. (instead of 'dir/') @indexfiles = ('index.html', 'index.shtml'); # default-files' names @inchtml = ('shtml', 'html', 'htm'); # take files with these suffixes as HTML files @incpics = (); # don't include pictures #@incpics = ('gif', 'jpg', 'jpeg'); # list pictures with these suffixes $listsize = 0; # include size in kb for every file? $self = "/server/az.html"; # output file (relative path; won't be linked in the list) $selftitle = "Carte du site d'IRIS"; # do only include these files/directories, use '*' as a wildcard, # use '@includefiles = ();' to include all files matching the pattern # except those in @excludefiles: @includefiles = (); # do not include these files/directories: @excludefiles = ( '/*.rc', '/altern*', '/annonce*', '/banner*', '/cosgo*', '/erreur*', '/les-iris/lbi*', '/maintenance.html', '/recherche.html', '/search.html', '/assises/*', '/c-altern/*', '/cgi-bin/*', '/educ/*', '/images/*', '/iris/private/*', '/les-iris/sep*', '/les-iris/li-*', '/proc/*', '/search/*', '/sgdg/*', '/tmp/*', '/WEBASSISES/*' ); $listwithouttitle = 1; # include html files without ? $date = "DAY.MONTH.YEAR"; # german format #$date = "MONTH/DAY/YEAR"; # american format #$date = "YEAR-MONTH-DAY"; # date according to ISO 8601 # for those of you who like the plain output: #$dirtag = '<ul>'; #$dirtag_end = '</ul>'; #$foldertag = '<li>'; #$htmltag = '<li>'; #$pictag = '<li>'; #$nolinktag = '<li>'; # for those who like output with an icon in front of every item: # (this seems to be valid HTML, but it's not good HTML) $dirtag = '<dl>'; $dirtag_end = '</dl>'; $foldertag = '<dt><img src="/images/tree_images/folder.open.gif" alt="*">'; $htmltag = '<dt><img src="/images/tree_images/generic.gif" alt="*">'; $pictag = '<dt><img src="/images/tree_images/image2.gif" alt="*">'; $nolinktag = '<dt><img src="/images/tree_images/folder.open.gif" alt="*">'; #$modifiedtag = '<img src="/images/tree_images/new.gif">'; # mark files that changed not long ago #$modifiedtime = 24; # mark files that are not older than $modifiedtime hours (0 = option off) $modifiedtag_day = '<img src="/images/tree_images/new_day.gif">'; # mark files that changed not long ago $modifiedtime_day = 24; # mark files that are not older than $modifiedtime hours (0 = option off) $modifiedtag_3day = '<img src="/images/tree_images/new_3day.gif">'; # mark files that changed not long ago $modifiedtime_3day = 72; # mark files that are not older than $modifiedtime hours (0 = option off) $modifiedtag_week = '<img src="/images/tree_images/new_week.gif">'; # mark files that changed not long ago $modifiedtime_week = 168; # mark files that are not older than $modifiedtime hours (0 = option off) # - nearly no configuration below ------------------------------------------ # $st = time(); # comment in if you're interested in runtime use File::Find; $depth = 0; ($htmlct, $htmlsize, $picct, $picsize) = (0, 0, 0, 0); # count size und number $partlist = ""; &getdate; &init; &first_part_output; find(\&doperfile, $htmldir); &list_output; &last_part_output; # $diff = time() - $st; print STDERR "time: $diff secs\n"; # see above exit; # -------------------------------------------------------------------------- sub getdate { my ($sec,$min,$hour,$mday,$mon,$year) = 0; # avoid warning with perl's -w option ($sec,$min,$hour,$mday,$mon,$year) = localtime(time()); ($mon < 12) ? ($mon++) : ($mon = 1); $year += 1900; $mon = "0".$mon if( length($mon) == 1 ); $mday = "0".$mday if( length($mday) == 1 ); $date =~ s#DAY#$mday#i; $date =~ s#MONTH#$mon#i; $date =~ s#YEAR#$year#i; } sub init { if( $cgi ) { select(STDOUT); $| = 1; $nph = 1 if( $0 =~ m#nph-tree# ); print "HTTP/1.0 200 OK\n" if( $nph ); print "Content-Type: text/html\n\n"; } if( ! -d $htmldir ) { print "Error: $0: '$htmldir' doesn't exist or isn't a directory."; exit; } my $expat; # enable '*' as wildcard in @excludefiles foreach $expat (@excludefiles) { $expat =~ s#\*#.*?#g; } foreach $expat (@includefiles) { # the same in @includefiles $expat =~ s#\*#.*?#g; } } sub first_part_output { $output = &load($templatefile); $output =~ s#<!-- \$date -->#$date#igs; my ($first_part) = ($output =~ m#^(.*?)<!-- \$list -->#is); $first_part = "" if ( ! defined($first_part) ); # avoid warning print $first_part; } sub doperfile { my $thisfile = $File::Find::name; $thisfile .= "/" if( -d $thisfile ); my ($thisfile_rel) = ($thisfile =~ m#^$htmldir(/.*)#); # part after $htmldir my $expat; # include only files from @includefiles: if( scalar(@includefiles) >= 1 ) { my $do_use = 0; foreach $expat (@includefiles) { if( $thisfile_rel =~ m#^$expat$# ) { $do_use = 1; last; } } return if( ! $do_use ); } # exclude files from @excludefiles: foreach $expat (@excludefiles) { return if( $thisfile_rel =~ m#^$expat$# ); } if( ! &isfile($thisfile, @indexfiles) && ($thisfile =~ m#/$# || &isfileclass($thisfile, @inchtml) || &isfileclass($thisfile, @incpics)) ) { push(@filelist, $thisfile); } } sub list_output { my $thisfile; my $dirsdone = ""; # have we been here already? my $thisdir = ""; my $dirtag_ct = 0; @filelist = sort(@filelist); print "$dirtag\n"; $dirtag_ct++; foreach $thisfile (@filelist) { ($url) = ($thisfile =~ m#$htmldir(/.*)#i); $olddepth = $depth; $depth = ($url =~ s#/#/#gi); # 1 = html-root $olddir = $thisdir; ($thisdir) = ($url =~ m#(.*/).*?#i); if( $thisdir ne $olddir && ! ($dirsdone =~ m#^$thisdir$#m) ) { # deeper level or same level $dirsdone .= "$thisdir\n"; $partlist .= "$dirtag_end\n" x ($olddepth-$depth+1); $dirtag_ct -= &minzero($olddepth-$depth+1); $partlist .= " $nolinktag$baseurl$url\n" if( ! &getdefaultfile("$htmldir$thisdir") ); $partlist .= "$dirtag\n"; $dirtag_ct++; } elsif( ! ($thisdir =~ m#$olddir#i) ) { # higher level $partlist .= "$dirtag_end\n" x ($olddepth-$depth); $dirtag_ct -= &minzero($olddepth-$depth); $partlist .= &getinfo($thisfile, 0); } else { # same level as before $partlist .= &getinfo($thisfile, 0); $partlist =~ s#$dirtag\n$dirtag_end\n##ig; print $partlist; $partlist = ""; } } print $partlist; # close list correctly: print "$dirtag_end\n" x $dirtag_ct; } sub last_part_output { $htmlsize = int($htmlsize/1000); # size in kB $picsize = int($picsize/1000); $output =~ s#<!-- \$htmlct -->#$htmlct#igs; $output =~ s#<!-- \$htmlsize -->#$htmlsize#igs; $output =~ s#<!-- \$picsct -->#$picct#igs; $output =~ s#<!-- \$picsize -->#$picsize#igs; my ($last_part) = ($output =~ m#<!-- \$list -->(.*)$#is); $last_part = "" if ( ! defined($last_part) ); # avoid warning print $last_part; } # -------------------------------------------------------------------------- sub getdefaultfile { my $dir = shift; my $item; foreach $item (@indexfiles) { if( -e "$dir$item" ) { # there's a defaultfile $partlist .= &getinfo("$dir$item", 1); return $item; } } return 0; } sub getinfo { my $thisfile = shift; my $isindexfile = shift; my ($suffix) = ($thisfile =~ m#.*\.(.*)#); my ($size, $exactsize) = &getsize($thisfile); my $entry = ""; my $linkurl; $offline ? ($linkurl = $htmldir.$url) : ($linkurl = $url); $linkurl = $baseurl.$linkurl; if( &isfileclass($thisfile, @inchtml) ) { $htmlsize += $exactsize; $htmlct++; my $string = &load_part($thisfile); if( $thisfile eq "$htmldir$self" ) { # output file itself $entry .= " $nolinktag$selftitle"; } elsif( $string =~ m#<title>(.*?)#is ) { # common case if( $isindexfile ) { $entry .= " $foldertag"; } else { $entry .= " $htmltag"; } # $entry .= " $modifiedtag" if( &is_it_modified($thisfile) ); SWITCH: { if( &is_it_modified($thisfile,$modifiedtime_day) ){ $entry .= " $modifiedtag_day" ; last SWITCH; } if( &is_it_modified($thisfile,$modifiedtime_3day) ){ $entry .= " $modifiedtag_3day" ; last SWITCH; } if( &is_it_modified($thisfile,$modifiedtime_week) ){ $entry .= " $modifiedtag_week" ; last SWITCH; } } $entry .= " $1"; $entry .= " ($size kB)" if( $listsize ); $entry .= ""; } else { # files with no title tag if( $listwithouttitle ) { $entry .= " $nolinktag$baseurl$url"; $entry .= " ($size kB)" if( $listsize ); } } $entry .= "\n"; } elsif( &isfileclass($thisfile, @incpics) ) { $picsize += $exactsize; $picct++; my ($filenameonly) = ($url =~ m#.*/(.*)#i); $entry .= " $pictag"; $entry .= " $modifiedtag" if( &is_it_modified($thisfile) ); $entry .= " $filenameonly"; $entry .= " ($size kB)" if( $listsize ); $entry .= "\n"; } # links to dir/ or to dir/index.html (see configuration section) if( $indexrefs ) { my ($filepart) = ($thisfile =~ m#.*/(.*)#); $entry =~ s#href="(.*?/)"#href="$1$filepart"#i; } return $entry; } sub is_it_modified { my $filename = shift; my $modifiedtime = shift; ($mtime) = (stat($filename))[9]; if( $modifiedtime && ((time() - $mtime) < ($modifiedtime*60*60)) ) { return 1; } else { return 0; } } sub getsize { # get filesize in (kB, bytes) my $file = shift; my $exactsize = -s $file; my $size = int($exactsize/1000); $size = 1 if( $size == 0 ); return $size, $exactsize; } sub isfileclass { # check filesuffix my $file = shift; my @fileclass = @_; my $item; foreach $item (@fileclass) { return 1 if( $file =~ m#\.$item$# ); } return 0; } sub isfile { # check filename my $file = shift; my @files = @_; my $item; foreach $item (@files) { return 1 if( $file =~ m#/$item$# ); } return 0; } sub load_part { # only load file till is reached my $file = shift; my $string = ""; open(INPUT, "<$file") || die "Cannot open '$file': $!"; while() { $string .= $_; last if( $_ =~ m##i ); } close(INPUT); $string = "" if( ! defined($string) && $file =~ m#^$htmldir/$self$# ); # avoid warning return $string; } sub load { my $file = shift; my $string; open(INPUT, "<$file") || return ""; undef $/; $string = (); $/ = "\n"; close(INPUT); return $string; } sub minzero { # returns 0 if argument is < 0, else returns the argument my $var = shift; if( $var > 0 ) { return $var; } else { return 0; } }