timjstewart · December 28, 2015 06:29
diff --git a/mirror b/mirror
 #! /bin/bash

 # where to put the mirrored documents
 base_dir=~/docs

 # the name of the master index file
 master_index=${base_dir}/index.html

 # download a single compressed archive and extract it to the current
 # directory
 function download_and_extract() {
    url=$1
    file_name=$2
    file_ext=$3

    wget -nv -np -nH ${url}

    case ${file_ext} in
        txz )
            tar Jxvf ${file_name}
            rm ${file_name}
            ;;
        gz )
            tar xvzf ${file_name}
            rm ${file_name}
            ;;
        zip )
            unzip ${file_name}
            rm ${file_name}
            ;;
        *)
            echo "don't know how to extract ${file_ext} files." 2>&1
            ;;
    esac
 }

 # use wget's recursive download feature to download all content
 # referred to by $url (but not going to $url's parent directory).
 function download_recursively() {
    url=$1

    wget -nv -np -nH -r ${url}
 }

 # write the first argument to the master index, after truncating the
 # file
 function write_to_index() {
    echo $1 > "${master_index}.tmp"
 }

 # append the first argument to the master index
 function append_to_index() {
    echo $1 >> "${master_index}.tmp"
 }

 # write the header of the master index file
 function write_index_header() {
    write_to_index "<html><head><title>Master Index</title></head><body>"
    append_to_index "<h3>Master Index</h3>"
 }

 function write_index_file() {
    href=$1
    text=$2
    dir=$3

    append_to_index "<li>"
    append_to_index "<a href=\"${href}\">${text}</a>"
    append_to_index "<i>($(du ${dir} -chsx | tail -n 1 | cut -f 1))</i>"
    append_to_index "</li>"
 }

 # write links to all index files that were found in the mirrored files
 #
 # use an iterative deepening depth-first search to find the most
 # likely index.html file.
 function write_index_files() {
    dir=$1
    name=$2
    index_file=$3

    link_text=$(echo ${name} | tr '/' '-')

    if [ "${index_file}" != "" ]
    then
        write_index_file "${dir}/${index_file}" "${link_text}" "${dir}"
    else
        IFS=$'\n'

        maxdepth=3
        found=0

        while (( ${found} == 0 && ${maxdepth} < 10 ))
        do
            for index in $(find ${dir} -maxdepth ${maxdepth} -name 'index.htm?' -print)
            do
                write_index_file "${index#${base_dir}/}" "${link_text}" "${dir}"
                found=1
            done

            (( maxdepth = ${maxdepth} + 1 ))
        done

        IFS=';'
    fi
 }

 # close out all the open HTML tags
 function write_index_footer() {
    append_to_index "<br/><i>generated at: $(date) using <a href=\"https://gist.github.com/timjstewart/7457111\">mirror</a></i>"
    append_to_index "<i>(Index Size: $(du ${base_dir} -chsx | tail -n 1 | cut -f 1))</i>"
    append_to_index "</ul></body></html>"
 }

 # Always rebuilds the index, even if nothing got downloaded.  Is this
 # a feature?  Not sure.
 write_index_header

 IFS=';'
 sort ~/.mirror | grep -ve "^#" | while read dir url index_file
 do
    if [ "${dir}" == "" ]
    then 
        continue 
    fi

    dest_dir=${base_dir}/${dir}
    if [ ! -d "${dest_dir}" ]
    then
        mkdir -p ${dest_dir}
        pushd ${dest_dir}

        file_name=$(basename ${url})
        file_ext="${file_name##*.}"

        case ${file_ext} in
            zip | gz | tgz | txz )
                download_and_extract ${url} ${file_name} ${file_ext}
                ;;

            html | htm | * )
                download_recursively ${url}
                ;;
        esac

        popd

    fi

    write_index_files ${dest_dir} ${dir} ${index_file}

 done

 write_index_footer

 mv ${master_index}.tmp ${master_index}
diff --git a/z - config file b/z - config file
 #
 # Configuration file for mirrored documentation
 #
 # NOTE: Make sure to leave a blank line at the end of the file
 #
 # Format: <doc-dir>;<doc-url>[;<index-file>]
 #
 # doc-dir - the sub-directory (under ~/docs) where this documentation 
 #           should be placed
 #
 # doc-url - the html or archive URL to download
 #
 # index-file - (optional) if mirror is having a hard time figuring out which 
 #              file to include in the Master Index, specify the relative path
 #              to the index file.
 #

 # Akka
 Akka/2.2.3;http://doc.akka.io/api/akka/2.2.3/

 # Scala (multiple versions)
 Scala/2.11-M5;http://scala-lang.org/files/archive/scala-docs-2.11.0-M5.txz
 Scala/2.10.3;http://scala-lang.org/files/archive/scala-docs-2.10.3.txz

 # Scala Testing
 ScalaCheck/1.11.0;http://scalacheck.org/files/scalacheck_2.10-1.11.0-javadoc.tar.gz
 ScalaTest/2.0;http://doc.scalatest.org/2.0/index.html

 # Reactive Java
 RxJava/0.14.10;http://netflix.github.io/RxJava/javadoc/index.html

 # Play Framework
 Play/2.0;http://www.playframework.com/documentation/2.0/api/scala/index.html

 # Emacs
 Elisp;https://www.gnu.org/software/emacs/manual/elisp.html_node.tar.gz
 Emacs;https://www.gnu.org/software/emacs/manual/emacs.html_node.tar.gz

 # Shell Utilities
 gawk;https://www.gnu.org/software/gawk/manual/gawk.html_node.tar.gz
 git;https://www.kernel.org/pub/software/scm/git/docs/

 # Java API
 Java/1.7;http://docs.oracle.com/javase/7/docs/;javase/7/docs/api/index.html

 # Google Libraries
 guice/4.0beta;https://google-guice.googlecode.com/git/latest-javadoc/packages.html

 # JodaTime
 JodaTime/2.4;http://www.joda.org/joda-time/apidocs/index.html

 # EasyMock
 EasyMock/3.1;http://www.easymock.org/api/easymock/3.1/index.html

 # JUnit
 JUnit;http://junit.sourceforge.net/javadoc/

 # Cassandra
 Cassandra Java Driver;http://www.datastax.com/drivers/java/2.0/apidocs/

 # Bash Manual
 Bash;https://www.gnu.org/software/bash/manual/bash.html_node.tar.gz

 # Pandas
 Pandas;http://pandas.pydata.org/pandas-docs/stable/

 # IPython
 IPython;http://ipython.org/ipython-doc/stable/index.html
	#! /bin/bash

	# where to put the mirrored documents
	base_dir=~/docs

	# the name of the master index file
	master_index=${base_dir}/index.html

	# download a single compressed archive and extract it to the current
	# directory
	function download_and_extract() {
	url=$1
	file_name=$2
	file_ext=$3

	wget -nv -np -nH ${url}

	case ${file_ext} in
	txz )
	tar Jxvf ${file_name}
	rm ${file_name}
	;;
	gz )
	tar xvzf ${file_name}
	rm ${file_name}
	;;
	zip )
	unzip ${file_name}
	rm ${file_name}
	;;
	*)
	echo "don't know how to extract ${file_ext} files." 2>&1
	;;
	esac
	}

	# use wget's recursive download feature to download all content
	# referred to by $url (but not going to $url's parent directory).
	function download_recursively() {
	url=$1

	wget -nv -np -nH -r ${url}
	}

	# write the first argument to the master index, after truncating the
	# file
	function write_to_index() {
	echo $1 > "${master_index}.tmp"
	}

	# append the first argument to the master index
	function append_to_index() {
	echo $1 >> "${master_index}.tmp"
	}

	# write the header of the master index file
	function write_index_header() {
	write_to_index "<html><head><title>Master Index</title></head><body>"
	append_to_index "<h3>Master Index</h3>"
	}

	function write_index_file() {
	href=$1
	text=$2
	dir=$3

	append_to_index "<li>"
	append_to_index "<a href=\"${href}\">${text}</a>"
	append_to_index "<i>($(du ${dir} -chsx \| tail -n 1 \| cut -f 1))</i>"
	append_to_index "</li>"
	}

	# write links to all index files that were found in the mirrored files
	#
	# use an iterative deepening depth-first search to find the most
	# likely index.html file.
	function write_index_files() {
	dir=$1
	name=$2
	index_file=$3

	link_text=$(echo ${name} \| tr '/' '-')

	if [ "${index_file}" != "" ]
	then
	write_index_file "${dir}/${index_file}" "${link_text}" "${dir}"
	else
	IFS=$'\n'

	maxdepth=3
	found=0

	while (( ${found} == 0 && ${maxdepth} < 10 ))
	do
	for index in $(find ${dir} -maxdepth ${maxdepth} -name 'index.htm?' -print)
	do
	write_index_file "${index#${base_dir}/}" "${link_text}" "${dir}"
	found=1
	done

	(( maxdepth = ${maxdepth} + 1 ))
	done

	IFS=';'
	fi
	}

	# close out all the open HTML tags
	function write_index_footer() {
	append_to_index "<br/><i>generated at: $(date) using <a href=\"https://gist.github.com/timjstewart/7457111\">mirror</a></i>"
	append_to_index "<i>(Index Size: $(du ${base_dir} -chsx \| tail -n 1 \| cut -f 1))</i>"
	append_to_index "</ul></body></html>"
	}

	# Always rebuilds the index, even if nothing got downloaded. Is this
	# a feature? Not sure.
	write_index_header

	IFS=';'
	sort ~/.mirror \| grep -ve "^#" \| while read dir url index_file
	do
	if [ "${dir}" == "" ]
	then
	continue
	fi

	dest_dir=${base_dir}/${dir}
	if [ ! -d "${dest_dir}" ]
	then
	mkdir -p ${dest_dir}
	pushd ${dest_dir}

	file_name=$(basename ${url})
	file_ext="${file_name##*.}"

	case ${file_ext} in
	zip \| gz \| tgz \| txz )
	download_and_extract ${url} ${file_name} ${file_ext}
	;;

	html \| htm \| * )
	download_recursively ${url}
	;;
	esac

	popd

	fi

	write_index_files ${dest_dir} ${dir} ${index_file}

	done

	write_index_footer

	mv ${master_index}.tmp ${master_index}
	#
	# Configuration file for mirrored documentation
	#
	# NOTE: Make sure to leave a blank line at the end of the file
	#
	# Format: <doc-dir>;<doc-url>[;<index-file>]
	#
	# doc-dir - the sub-directory (under ~/docs) where this documentation
	# should be placed
	#
	# doc-url - the html or archive URL to download
	#
	# index-file - (optional) if mirror is having a hard time figuring out which
	# file to include in the Master Index, specify the relative path
	# to the index file.
	#

	# Akka
	Akka/2.2.3;http://doc.akka.io/api/akka/2.2.3/

	# Scala (multiple versions)
	Scala/2.11-M5;http://scala-lang.org/files/archive/scala-docs-2.11.0-M5.txz
	Scala/2.10.3;http://scala-lang.org/files/archive/scala-docs-2.10.3.txz

	# Scala Testing
	ScalaCheck/1.11.0;http://scalacheck.org/files/scalacheck_2.10-1.11.0-javadoc.tar.gz
	ScalaTest/2.0;http://doc.scalatest.org/2.0/index.html

	# Reactive Java
	RxJava/0.14.10;http://netflix.github.io/RxJava/javadoc/index.html

	# Play Framework
	Play/2.0;http://www.playframework.com/documentation/2.0/api/scala/index.html

	# Emacs
	Elisp;https://www.gnu.org/software/emacs/manual/elisp.html_node.tar.gz
	Emacs;https://www.gnu.org/software/emacs/manual/emacs.html_node.tar.gz

	# Shell Utilities
	gawk;https://www.gnu.org/software/gawk/manual/gawk.html_node.tar.gz
	git;https://www.kernel.org/pub/software/scm/git/docs/

	# Java API
	Java/1.7;http://docs.oracle.com/javase/7/docs/;javase/7/docs/api/index.html

	# Google Libraries
	guice/4.0beta;https://google-guice.googlecode.com/git/latest-javadoc/packages.html

	# JodaTime
	JodaTime/2.4;http://www.joda.org/joda-time/apidocs/index.html

	# EasyMock
	EasyMock/3.1;http://www.easymock.org/api/easymock/3.1/index.html

	# JUnit
	JUnit;http://junit.sourceforge.net/javadoc/

	# Cassandra
	Cassandra Java Driver;http://www.datastax.com/drivers/java/2.0/apidocs/

	# Bash Manual
	Bash;https://www.gnu.org/software/bash/manual/bash.html_node.tar.gz

	# Pandas
	Pandas;http://pandas.pydata.org/pandas-docs/stable/

	# IPython
	IPython;http://ipython.org/ipython-doc/stable/index.html