Home | Downloads | * SiteGen Web Site Indexer |     Share This Page

#!/usr/bin/ruby -w
# sitegen.rb populates a website's pages with drop-down menus.
# this script relies on these two custom Meta tags:
# <META NAME="group" CONTENT="GROUP/SUBGROUP/SUBGROUP">
# <META NAME="groupTitle" CONTENT="Group Title for Menus">
# the "group" tag should be in each page
# the "groupTitle" tag only needs to be in
# one of each group's pages.
# This scheme also relies on pages consistently having page titles,
# and unique page titles within each group's pages.
# set group to "NOSHOW" for pages not to appear in the menus.
# also, each named group must have at least one associated page
# so it can be displayed in the menus
# if an argument of "-z" is provided to this script,
# it will remove all its content from the
# set of Web pages
# WARNING: this script replaces page content en masse.
# ALWAYS maintain a full backup of your page set.
# Copyright (c) 2006, P. Lutus, released under the GPL.
require 'find'
# 1. Read options
$zap = false
$verbose = false
while(ARGV[0] && ARGV[0].slice(0,1) == "-")
   case ARGV[0].slice(1,1)
      when 'z' then $zap = true
      when 'v' then $verbose = true
   end
   ARGV.shift
end
# 2. Set paths
defaultPath="/path/to/Web/pages"
sourcePath = (ARGV[0])?ARGV[0]:defaultPath
$plainTextIndexPath = "/path/siteIndex.txt"
$rightArrowPath = "#{sourcePath}/images/rightarrow.png"
$leftArrowPath  = "#{sourcePath}/images/leftarrow.png"
# 3. Build list of all target pages
pageList = Array.new
Find.find(sourcePath) { |path|
   if (path =~ /\.(html?|php)$/i && !(path =~ /\/\~/))
      pageList.push path
   end
}
# 4. Collect data from pages, build group tree
groupTree = Hash.new
# error analysis lists
noGroupNameList = Array.new
noPageTitleList = Array.new
pageTitleNotUniqueList = Array.new
noShowList = Array.new
noGroupTitleList = Array.new
groupTitleRedefList = Array.new
$changedPages = Array.new
$groupTitleString      = " GroupTitle"
$groupPathString       = " GroupPath"
$noGroupTitleString    = "No Group Title!"
$memberPagesString     = " MemberPages"
$indexListString       = " IndexList"
pageList.each { |path|
   data = File.read(path)
   if ( data =~ /<META\s*NAME\s*=\s*"group"/i)
      groupPath = data.sub(/.*<META\s*NAME\s*=\s*"group"\s*CONTENT="(.*?)">.*/im,"\\1")
      if(groupPath.length > 0 && !(groupPath =~ /NOSHOW/i))
         gh = groupTree
         gPath = groupPath.split("/")
         # descend into child hashes
         gPath.each { |node|
            if(!gh[node])
               gh[node] = Hash.new
               gh[node][$groupTitleString] = $noGroupTitleString
            end
            gh = gh[node]
         }
         gh[$groupPathString] = groupPath
         if ( data =~ /<META\s*NAME\s*=\s*"groupTitle"/i)
            groupTitle = data.sub(/.*<META\s*NAME\s*=\s*"groupTitle"\s*CONTENT=\s*"(.*?)"\s*>.*/im,"\\1")
            if(gh[$groupTitleString] != $noGroupTitleString && gh[$groupTitleString] != groupTitle)
               puts "Error: group title redefinition in #{path}."
               groupTitleRedefList.push path
            else
               gh[$groupTitleString] = groupTitle
            end
         end
         gh[$memberPagesString] = Hash.new if !gh[$memberPagesString]
         gm = gh[$memberPagesString]
         pageTitle = data.sub(/.*<title>\s*(.*?)\s*<\/title>.*/im,"\\1")
         if (!pageTitle || pageTitle.length == 0)
            puts "Error: no page title in page #{path}"
            noPageTitleList.push path
         else
            if (gm[pageTitle])
               puts "Error: page title \"#{pageTitle}\" for #{path} is not unique."
               pageTitleNotUnique.push path
            end
            gm[pageTitle] = path
         end
      else
         noShowList.push path
      end
   else
      noGroupNameList.push path
   end
}
def makeRelPath(from,to)
   result = nil
   fromList = from.split("/")
   toList   = to.split("/")
   # are the paths identical?
   if (from == to )
      result = toList.pop;
   else
      # drop path elements until unequal
      while (fromList.first == toList.first)
         fromList.shift
         toList.shift
      end
      # create relative path from -> to
      result = "../" * (fromList.length-1)
      result += toList.join("/")
   end
   return result
end
def genIndexLists(hash)
   if(hash.class == Hash)
      if(hash[$memberPagesString])
         # test: is there a group title?
         if(hash[$groupTitleString] == $noGroupTitleString)
            noGroupTitleList.push hash[$groupPathString]
         end
         # now create index list including primary page
         # key/value pair from each subgroup
         if(!hash[$indexListString])
            indexList = Hash.new
            # add all existing members to list
            indexList.update(hash[$memberPagesString])
            # now add subgroups and their
            # key pages to the list
            hash.keys.each do |key|
               if(!(key =~ /^ /) && hash[key].class == Hash)
                  value = hash[key]
                  # get primary page key from child group,
                  # assign it to index list using group title as key
                  indexList[value[$groupTitleString]] = genIndexLists(value)
               end
            end
            indexList = indexList.sort.collect
            # assure that the first listed item is not a group
            item = hash[$memberPagesString].sort.collect[0]
            indexList.delete(item)
            indexList.unshift(item)
            hash[$indexListString] = indexList
         end
         # primary page is first in alpha sort
         return hash[$indexListString][0][1]
      else
         hash.keys.each do |key|
            if(!(key =~ /^ /))
               genIndexLists(hash[key])
            end
         end
      end
   end
end
# 5. Generate index lists for use
#    in creating page indices
genIndexLists(groupTree)
$startBlockTag = "<!-- SiteIndexBegin -->"
$endBlockTag   = "<!-- SiteIndexEnd -->"
$idTag = "<!-- Menus created by SiteGen, http://www.arachnoid.com/SiteGen -->"
$scriptTag = "<script language=\"JavaScript\">\nfunction toNewPage(t) { location.href = t.options[t.selectedIndex].value; }\n</script>\n"
def makeIndexBlock(hash,source,key,branch,plainIndexFile)
   indexBlock = $startBlockTag + "\n" + $idTag + "\n<table><tr>";
   branch.each do |item|
      indexBlock += "<td><a href=\"" + makeRelPath(source, item[1]) + "\">" + item[0] + "</a>&nbsp;|&nbsp;</td>"
   end
   indexBlock += "<td>\n" + $scriptTag;
   indexBlock += "<select onChange=\"toNewPage(this)\" title=\"Open this list to choose a page\">\n";
   # build drop-down list items
   selIndex = 0
   k = 0
   hash[$indexListString].each do |item|
      key = item[0]
      value = item[1]
      relPath = makeRelPath(source, value)
      sel = ""
      if(value == source)
         selIndex = k
         sel = " selected"
      end
      k += 1
      indexBlock += "<option value=\"" + relPath + "\"" + sel + ">" + key + "\n";
   end
   indexBlock += "</select>\n</td>"
   # now rescan for arrowed items
   k = 0
   hash[$indexListString].each do |item|
      value = item[1]
      relPath = makeRelPath(source, value)
      if (k == selIndex - 1) # left arrow
         indexBlock += "<td><a href=\"" + relPath + "\" title=\"Click for prior page\">";
         indexBlock += "<img src=\"" + makeRelPath(source, $leftArrowPath) + "\" border=\"0\"></a></td>"
      elsif (k == selIndex + 1) # right arrow
         indexBlock += "<td><a href=\"" + relPath + "\" title=\"Click for next page\">";
         indexBlock += "<img src=\"" + makeRelPath(source, $rightArrowPath) + "\" border=\"0\"></a></td>"
      end
      k += 1
   end
   indexBlock += "</tr></table>\n" + $endBlockTag;
   return indexBlock
end
def addToPlainIndex(branch,key,path,file)
   s = ""
   branch.each do |item|
      s += item[0] + "|"
   end
   s += key + "\t" + path + "\n"
   file.write(s)
end
def createPageIndexLists(hash,branch,plainIndexFile)
   if(hash.class == Hash)
      if(hash[$memberPagesString])
         branch.push [hash[$groupTitleString],hash[$indexListString][0][1]]
         members = hash[$memberPagesString]
         members.keys.sort.each do |key|
            changed = false
            path = members[key]
            addToPlainIndex(branch,key,path,plainIndexFile)
            pageData = File.read(path);
            if($zap)
               # just zap existing blocks
               if ( pageData =~ /#{$startBlockTag}/ )
                  pageData.gsub!(/#{$startBlockTag}.*?#{$endBlockTag}\s*/im,"")
                  changed = true
               end
            else
               topIndex = makeIndexBlock(hash,members[key],key,branch,plainIndexFile)
               # the bottom block doesn't need the JavaScript
               bottomIndex = topIndex.sub(/<script.*?<\/script>\n/im,"")
               origTopIndex,origBottomIndex = pageData.scan(/#{$startBlockTag}.*?#{$endBlockTag}/im)
               changed = (topIndex != origTopIndex || bottomIndex != origBottomIndex)
               if(changed)
                  # zap existing blocks
                  if ( pageData =~ /#{$startBlockTag}/ )
                     pageData.gsub!(/#{$startBlockTag}.*?#{$endBlockTag}\s*/im,"")
                  end
                  # put in new blocks
                  pageData.sub!(/(<body.*?>)(\s*)/im,"\\1\n#{topIndex}\n")
                  pageData.sub!(/(.*?)(\s*)(<\/body.*?>)/im,"\\1\n#{bottomIndex}\n\\3")
               end
            end
            if(changed)
               File.open(path,"w") { |f| f.write(pageData) }
               $changedPages.push path;
            end
         end
         hash.keys.sort.each do |key|
            if(!(key =~ /^ /))
               createPageIndexLists(hash[key],branch,plainIndexFile)
            end
         end
         branch.pop
      else
         hash.keys.sort.each do |key|
            if(!(key =~ /^ /))
               createPageIndexLists(hash[key],branch,plainIndexFile)
            end
         end
      end
   end
end
# 6. Write indices to pages
groupPath = Array.new
plainTextIndexFile = File.open($plainTextIndexPath,"w")
createPageIndexLists(groupTree,groupPath,plainTextIndexFile)
plainTextIndexFile.close()
def showResults(tag,list,extended = true)
   s = sprintf("%-28s %3d",tag,list.length)
   puts s
   if($verbose && extended)
      puts list
   end
end
# 7. Display results
showResults("Total Pages",pageList,false)
showResults("Changed Pages",$changedPages)
showResults("No Show",noShowList,false)
showResults("No Group Name",noGroupNameList)
showResults("No Group Title",noGroupTitleList)
showResults("Group Title Redefinition",groupTitleRedefList)
showResults("No Page Title",noPageTitleList)
showResults("Page Title Not Unique",pageTitleNotUniqueList)
# 8. Emit useful exit state
exit( $changedPages.length != 0 )
    

Home | Downloads | * SiteGen Web Site Indexer |     Share This Page