Skip to content

Instantly share code, notes, and snippets.

@blha303
Last active August 29, 2015 14:16
Show Gist options
  • Save blha303/2a6cc9c13a4033a277fc to your computer and use it in GitHub Desktop.
Save blha303/2a6cc9c13a4033a277fc to your computer and use it in GitHub Desktop.
Script to mirror a directory visible over apache directory indexing. Creates a batch file containing urls and directory output locations
#!/usr/bin/env python2
# Usage: ./apache_mirror.py [BASE URL] [NUMBER OF COLUMNS]
# If the target site has more (or less) than three columns in the directory index, the second parameter is required.
import sys
import urllib2
from bs4 import BeautifulSoup as Soup
def do_iterate(curdir="", BASE="", COL="", fh=None):
print "Entering {}".format(BASE + curdir)
for tr in Soup(urllib2.urlopen(BASE + curdir).read()).findAll('tr')[1:]:
if tr.img["alt"] == "[PARENTDIR]":
continue
if tr.a["href"][-1] == "/":
do_iterate(curdir + tr.a["href"], BASE, COL, fh)
else:
print "Adding {}".format(BASE + curdir + tr.a["href"])
fh.write("curl {0}{1} --create-dirs -o \"{2}\"\n".format(BASE, curdir+tr.a["href"], (curdir+tr.a["href"]).replace("%20", " ")))
def main():
BASE=sys.argv[1] if len(sys.argv) > 1 else "http://repo.blha303.biz/"
COL=int(sys.argv[2]) if len(sys.argv) > 2 else 3
with open("getrepo.bat", "w") as fw:
do_iterate("", BASE, COL, fw)
return 0
if __name__ == "__main__":
sys.exit(main())
with open("getrepo.bat") as f:
d = [a.strip().replace("%", "%%").split(" ") for a in f.readlines()]
q = []
for a in d:
q.append(" ".join(a[:a.index("-o")+1] + [x.replace("/", "\\") for x in a[a.index("-o")+1:]]))
with open("getrepo.bat", "w") as f:
f.write("\n".join(q))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment