Home | Downloads | * SiteGen Web Site Indexer |     Share This Page

#!/usr/bin/perl -w
# sitegen.pl populates a website's pages with drop-down menus.
# this script relies on these two custom Meta tags:
# <META NAME="group" CONTENT="GROUP/SUBGROUP/SUBGROUP">
# <META NAME="groupTitle" CONTENT="Group Title for Menus">
# the "group" tag should be in each page
# the "groupTitle" tag only needs to be in
# one of each group's pages.
# This scheme also relies on pages consistently having page titles,
# and unique page titles within each group's pages.
# set group to "NOSHOW" for pages not to appear in the menus.
# also, each named group must have at least one associated page
# so it can be displayed in the menus
# if an argument of "-z" is provided to this script,
# it will remove all its content from the
# set of Web pages
# WARNING: this script replaces pages en masse.
# ALWAYS maintain a full backup of your page set.
# Copyright (c) 2005, P. Lutus, released under the GPL.
# BEGIN user settings
$source = ($ARGV[0])?$ARGV[0]:"/path/to/Web/pages";
$plainTextIndexPath = "/path/plainTextSiteIndex.txt";
$rightArrowPath = "$source/images/rightarrow.png";
$leftArrowPath  = "$source/images/leftarrow.png";
# END user settings
$startBlockTag = "<!-- SiteIndexBegin -->";
$endBlockTag   = "<!-- SiteIndexEnd -->";
$idTag = "<!-- Menus created by SiteGen, http://www.arachnoid.com/SiteGen -->";
$scriptTag =
"<script language=\"JavaScript\">\nfunction toNewPage(t) { location.href = t.options[t.selectedIndex].value; }\n</script>\n";
use File::Find;
sub processDir {
    my $path = $File::Find::name;
    # if it's a file
    if ( -f $path ) {
        # if it has an apparent HTML/PHP page suffix
        # and doesn't have a tilde at the start of its name
        if ( $path =~ /\.(htm[l|]|php)$/ && !( $path =~ /\/\~.*/ ) ) {
            push( @fileList, $path );
        }
    }
}
sub readFile {
    local $/;
    open( FILE, $_[0] );
    my $data = <FILE>;
    close FILE;
    return $data;
}
sub writeFile {
    open( FILE, ">$_[0]" );
    print FILE $_[1];
    close FILE;
}
sub readMetaTag {
    my ( $dataRef, $tag ) = @_;
    my ($value) =
      ${$dataRef} =~ /meta\s*name\s*=\s*\"$tag\"\s*content\s*=\s*\"(.*?)\"/is;
    return $value || "";
}
sub readPageTitle {
    my $dataRef = $_[0];
    my ($title) = ${$dataRef} =~ /<title>\s*(.*?)\s*<\/title>/is;
    if ( !$title ) {
        $path = $_[1];
        if ( !$path =~ /\.php$/i ) {
            print "Error: HTML page without a title: \"$path\"\n";
        }
        $title = "No Title";
    }
    return $title;
}
# relative path for HTML links
sub makeRelPath {
    my ( $from, $to ) = @_;
    my @fromlist = split( "/", $from );
    my @tolist   = split( "/", $to );
    # are the paths identical?
    if ( $from eq $to ) {
        $result = pop @tolist;
    }
    else {
        # drop path elements until unequal
        while ( $fromlist[0] eq $tolist[0] ) {
            shift @fromlist;
            shift @tolist;
        }
        # create relative path from -> to
        $result = "../" x $#fromlist;
        $result .= join( "/", @tolist );
    }
    return $result;
}
# use ' members' and ' grouptitle'
# (note leading spaces)
# to assure that these items appear
# before the subgroup list
sub create_siteindex {
    my @fileList = @{ $_[0] };
    for my $file (@fileList) {
        my $data       = readFile($file);
        my $groupName  = readMetaTag( \$data, "group" );
        my $groupTitle = readMetaTag( \$data, "groupTitle" );
        my $pageTitle  = readPageTitle( \$data, $file );
        if ( length($groupName) > 0 ) {
            $hasgroup++;
            if ( $groupName eq "NOSHOW" ) {
                # exclude these pages
                $noshow++;
            }
            else {
                my $tree = $_[1];
                my @items = split( "/", $groupName );
                for my $item (@items) {
                    $tree->{$item} = {} unless defined $tree->{$item};
                    $tree = $tree->{$item};
                }
                $tree->{' members'}{$pageTitle} = $file;
                if ( length($groupTitle) > 0 ) {
                    $tree->{' grouptitle'} = $groupTitle;
                }
            }
        }
        $total++;
    }
}
# just for debugging
sub debug_dump_hash {
    my ( $treeRef, $path ) = @_;
    for my $key ( sort keys %{$treeRef} ) {
        my $value = $treeRef->{$key};
        if ( ref($value) eq "HASH" ) {
            debug_dump_hash( \%{$value}, "$path|$key" );
        }
        else {
            print "$path|$key = $value\n";
        }
    }
}
sub makeSiteIndexBlock {
    my ( $parentRef, $keysetRef, $treeRef, $i, $sourcePath ) = @_;
    my $result = $startBlockTag . "\n" . $idTag . "\n<table><tr>";
    for ( $j = 1 ; $j <= $i ; $j++ ) {
        my $destPath = $pagePaths[$j];
        my $destName = $groupTitles[$j];
        $result .=
            "<td><a href=\""
          . makeRelPath( $sourcePath, $destPath ) . "\">"
          . $destName
          . "</a>&nbsp;|&nbsp;</td>";
    }
    $result .= "<td>\n" . $scriptTag;
    $result .= "<select onChange=\"toNewPage(this)\" title=\"Open this list to choose a page\">\n";
    # make a list of members of this grup
    # and the first members of any sibling groups
    my %optionHash = ();
    # look for members of this group
    # $key = page title, $value = page path
    for my $key ( @{$keysetRef} ) {
        my $value = $treeRef->{$key};
        $optionHash{$key} = $value;
    }
    # now look for sibling groups and
    # locate their first member pages
    for my $key ( %{$parentRef} ) {
        my $value = $parentRef->{$key};
        if ( ref($value) eq "HASH" ) {
            if ( $key ne ' members' && $key ne ' grouptitle' ) {
                $groupTitle = $value->{' grouptitle'};
                my $memlist  = $value->{' members'};
                my @keyset   = ( sort keys %{$memlist} );
                my $firstkey = $keyset[0];
                if ( !$firstkey || !defined $memlist->{$firstkey} ) {
                    print "Fatal Error: no index page for group $key.\n";
                    print "Be sure there is at least one page\n";
                    print "for each named group. Quitting now.\n";
                    exit 1;
                }
                my $firstvalue = $memlist->{$firstkey};
                $optionHash{$groupTitle} = $firstvalue;
            }
        }
    }
    # now compile the option drop-down list
    $selIndex = 0;
    $k        = 0;
    for my $key ( sort keys %{optionHash} ) {
        my $value = $optionHash{$key};
        my $relPath = makeRelPath( $sourcePath, $value );
        $sel = "";
        if ( $value eq $sourcePath ) {
            $sel      = " selected";
            $selIndex = $k;
        }
        $result .=
          "<option value=\"" . $relPath . "\"" . $sel . ">" . $key . "\n";
        $k++;
    }
    $result .= "</select>\n</td>";
    # now rescan the option array
    # to create the arrowed items
    $k = 0;
    for my $key ( sort keys %{optionHash} ) {
        my $value = $optionHash{$key};
        my $relPath = makeRelPath( $sourcePath, $value );
        if ( $k == $selIndex - 1 ) {    # left arrow
            $result .= "<td><a href=\"" . $relPath . "\" title=\"Click for prior page\">";
            $result .=
                "<img src=\""
              . makeRelPath( $sourcePath, $leftArrowPath )
              . "\" border=\"0\"></a></td>";
        }
        elsif ( $k == $selIndex + 1 ) {    # right arrow
            $result .= "<td><a href=\"" . $relPath . "\" title=\"Click for next page\">";
            $result .=
                "<img src=\""
              . makeRelPath( $sourcePath, $rightArrowPath )
              . "\" border=\"0\"></a></td>";
        }
        $k++;
    }
    $result .= "</tr></table>\n" . $endBlockTag;
}
# process all the member pages of a named group
sub process_group {
    my ( $parentRef, $keysetRef, $treeRef, $i ) = @_;
    my $groupPath = "";
    my $pagePath  = "";
    for ( $j = 1 ; $j <= $i ; $j++ ) {
        $groupPath .= "$groupTitles[$j]|";
        $pagePath  .= "$pagePaths[$j]|";
    }
    # $key = page title, $value = page path
    for my $key ( @{$keysetRef} ) {
        my $value = $treeRef->{$key};
        print INDEXFILE "$groupPath$key\t$value\n";
        my $pageData = readFile($value);
        $changed = 0;
        if ($zap) {    # if only remove index content
            if ( $pageData =~ /$startBlockTag/ ) {
                $pageData =~ s/($startBlockTag.*?$endBlockTag\s*)//isg;
                $changed = 1;
            }
        }
        else {
            # the site index block is unique to each page
            # because it uses relative page addressing
            # so it must be regenerated for each page
            my $topIndexBlock =
              makeSiteIndexBlock( $parentRef, $keysetRef, $treeRef, $i,
                $value );
            my $bottomIndexBlock = $topIndexBlock;
            # the bottom index doesn't need the Javascript code
            $bottomIndexBlock =~ s/<script.*?<\/script>\n//sg;
            # get the original blocks from the page data
            my ( $originalTop, $originalBottom ) =
              $pageData =~ /($startBlockTag.*?$endBlockTag)/isg;
            # test: have there been any changes?
            # if not, there's no need to rewrite this page
            $changed =
              (      !$originalTop
                  || !$originalBottom
                  || $topIndexBlock ne $originalTop
                  || $bottomIndexBlock ne $originalBottom ) ? 1 : 0;
            if ($changed) {
                # zap existing blocks
                if ( $pageData =~ /$startBlockTag/ ) {
                    $pageData =~ s/($startBlockTag.*?$endBlockTag\s*)//isg;
                }
                # put in new blocks
                $pageData =~ s/(<body.*?>)(\s*)/$1\n$topIndexBlock\n/is;
                $pageData =~
                  s/(.*?)(\s*)(<\/body.*?>)/$1\n$bottomIndexBlock\n$3/is;
            }
        }
        if ($changed) {
            writeFile( $value, $pageData );
            $changedPages++;
        }
    }
}
sub process_tree {
    my ( $treeRef, $i ) = @_;
    for my $key ( sort keys %{$treeRef} ) {
        my $value = $treeRef->{$key};
        if ( ref($value) eq "HASH" ) {
            if ( $key eq " members" ) {
                # it's a hash of member pages
                @keyset = ( sort keys %{$value} );
                my $firstkey   = $keyset[0];
                my $firstvalue = $value->{$firstkey};
                $pageTitles[$i] = $firstkey;
                $pagePaths[$i]  = $firstvalue;
                process_group( $treeRef, \@keyset, $value, $i );
            }
            else {
                # it's a subgroup hash
                $groupNames[$i] = $key;
                process_tree( \%{$value}, $i + 1 );
            }
        }
        else {
            # it contains the group's title
            if ( $key eq " grouptitle" ) {
                $groupTitles[$i] = $value;
            }
        }
    }
}
# the zap option removes all this script's
# content from the page set
$zap = ( $ARGV[0] && $ARGV[0] eq "-z" ) ? 1 : 0;
@fileList = ();
find( { wanted => \&processDir, no_chdir => 1 }, $source );
%tree = ();
$hasgroup     = 0;
$total        = 0;
$noshow       = 0;
$changedPages = 0;
create_siteindex( \@fileList, \%tree );
@groupTitles = ();
@groupNames  = ();
@pageTitles  = ();
@pagePaths   = ();
# debug_dump_hash(\%tree,"");
open( INDEXFILE, ">$plainTextIndexPath" );
process_tree( \%tree, 0 );
close INDEXFILE;
print "total pages = $total, changed = $changedPages, noshow = $noshow\n";
# use this for "if ./sitegen.pl; then do something; fi
# when pages are changed
exit( $changedPages == 0 );
    

Home | Downloads | * SiteGen Web Site Indexer |     Share This Page