Created
July 24, 2015 01:18
-
-
Save mdaniel/f18d16e84e161ce54753 to your computer and use it in GitHub Desktop.
Mirror a Confluence repo, one dir at a time
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/bash | |
set -eu | |
: ' | |
`http://<confluenceURL>/plugins/servlet/confluence/default` | |
For example if your Confluence URL is | |
`http://foo.atlassian.net/wiki` | |
you would enter: | |
`http://foo.atlassian.net/wiki/plugins/servlet/confluence/default` | |
This script is designed to action "index.html", so to bootstrap the | |
process: | |
cat >index.html<<HTML | |
<ul><li><a href="http://...">who cares</a></li></ul> | |
HTML | |
' | |
LIST_HTML=index.html | |
if [ ! -f "$LIST_HTML" ]; then | |
echo "I was expecting \"$LIST_HTML\" in this directory" >&2 | |
exit 1 | |
fi | |
N=${N:-} | |
MYDIR=`dirname $0` | |
MYDIR=`cd $MYDIR && pwd` | |
CK=$MYDIR/cookies.txt | |
if [ ! -f "$CK" ]; then | |
echo "Bogus, expected cookies but found none" >&2 | |
exit 1 | |
fi | |
unquote() { | |
python -u -c ' | |
import sys | |
from urllib import unquote | |
line = sys.stdin.readline().strip() | |
print(unquote(line)) | |
' | |
} | |
# the %2f business is because the "../" hyperlink is *entirely* url encoded | |
# https%3a%2f%2f... so not only do we not care about .. we for damn sure | |
# don't want to basename on that monstrosity | |
for i in `pup 'li a attr{href}' < ${LIST_HTML} | sed -e '/%2f/d'` | |
do | |
# $bn will only have a value if we have been through the loop once | |
if [ -n "${bn:-}" ]; then | |
$N sleep 10 | |
fi | |
bn="`basename "$i" | unquote`" | |
if expr "$i" : '.*/$' >/dev/null | |
then | |
if [ ! -d "$bn" ]; then | |
mkdir -v "$bn" | |
fi | |
outf="$bn/index.html" | |
else | |
outf="$bn" | |
fi | |
if [ -e "$outf" ]; then | |
echo "I will not overwrite \"$outf\"" >&2 | |
# clear this to avoid the sleeps | |
unset bn | |
continue | |
fi | |
$N curl-ua-firefox -vo "$outf" \ | |
--cookie "@${CK}" --cookie-jar "$CK" \ | |
"$i" | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment