Skip to content

Instantly share code, notes, and snippets.

@EntityReborn
Created December 1, 2011 19:24
Show Gist options
  • Save EntityReborn/1419167 to your computer and use it in GitHub Desktop.
Save EntityReborn/1419167 to your computer and use it in GitHub Desktop.
Downloader for STaD
import urllib2, os, urlparse, re
from BeautifulSoup import BeautifulSoup
# User variables
DESTFOLDER = "STAD"
OVERWRITEHTML = True
OVERWRITEPICS = False
LASTCHAPTER = 81
# Shouldn't need to modify these constants unless downloading another story
BASEURL = "http://six.clubetchi.com/6times/"
FILEPATTERN = "std-part%02d.htm"
EXTRAFILES = [
"CoveredByYourGrace.ttf",
"DancingScript.ttf",
"GiveYouGlory.ttf",
"OvertheRainbow.ttf",
"CoveredByYourGrace.eot",
"DancingScript.eot",
"GiveYouGlory.eot",
"OvertheRainbow.eot",
"GoogleOpenFontLicense.txt",
"std-read.css"
]
EXTRAPAGES = [
"std-characters.htm",
"std-backstory.htm",
"std-artwork.htm",
"6times.htm"
]
LINKRENAMES = {
re.compile("^(\./)?contents"): "6times.htm",
re.compile("^(\./)?characters"): "std-characters.htm",
re.compile("^(\./)?part-.*"):
lambda x: "std-{0}.htm".format(x.replace("-","")),
}
def main():
# Make sure our destination folder is available.
if not os.path.exists(DESTFOLDER):
os.makedirs(DESTFOLDER)
# Support files.
for f in EXTRAFILES:
fpath = localFile(f)
if not os.path.exists(fpath):
print "Downloading %s..." % f
u = urllib2.urlopen(remoteFile(f))
data = u.read()
with open(fpath, "w") as fl:
fl.write(data)
# Supporting pages.
# Be aware that for these extrapages, some links might not work.
for page in EXTRAPAGES:
dlWithPix(localFile(page), remoteFile(page))
# The actual story! Yay!
for x in range(0, LASTCHAPTER+1): # #0 is the prologue/intro
dlWithPix(localFile(FILEPATTERN%x), remoteFile(FILEPATTERN%x))
def localFile(fname):
return os.path.join(DESTFOLDER, fname)
def remoteFile(fname):
return urlparse.urljoin(BASEURL, fname)
def dlHtml(dest, url, updatecallback=None):
if not os.path.exists(dest) or OVERWRITEHTML:
if callable(updatecallback):
try:
updatecallback("Downloading %s" % dest)
except Exception: pass
u = urllib2.urlopen(url)
soup = BeautifulSoup(u.read())
for before, after in LINKRENAMES.iteritems():
links = soup.findAll("a", href=before)
for link in links:
# Allow for lambdas/callables.
if callable(after):
link["href"] = after(link["href"])
else:
link["href"] = after
with open(dest, "w") as f:
f.write(soup.prettify())
else:
with open(dest, "r") as f:
soup = BeautifulSoup(f.read())
return soup
def dlPic(dest, url, updatecallback=None):
if not os.path.exists(dest) or OVERWRITEPICS:
if callable(updatecallback):
try:
updatecallback("Downloading %s" % dest)
except Exception: pass
path = dest.split("/")
u = urllib2.urlopen(url)
pic = u.read()
if len(path) == 1:
with open(path[0], "wb") as f:
f.write(pic)
else:
dirs = os.path.join(path[:-1])
try:
os.makedirs(dirs[0])
except Exception:
pass
with open(dest, "wb") as f:
f.write(pic)
def dlWithPix(dest, url):
def callback(msg):
print msg
html = dlHtml(dest, url, callback)
imgs = [image["src"] for image in html.findAll("img")]
for fname in imgs:
url = remoteFile(fname)
dest = localFile(fname)
dlPic(dest, url, callback)
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print "\nGoodbye."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment