Created
September 9, 2013 11:33
-
-
Save blha303/6494407 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 4chan thread grabber | |
import urllib | |
import urllib2 | |
import json | |
API_URL = "https://api.4chan.org/%s/res/%s.json" | |
IMAGE_URL = "https://images.4chan.org/%s/src/%s" | |
def getThreadInfo(board, id): | |
"""board is the board name (g, for example); id is the thread id""" | |
try: | |
thread = json.loads(urllib2.urlopen(API_URL % (board, id)).read()) | |
return thread | |
except: | |
return None | |
def getImageList(thread): | |
"""thread is the thread json produced by getThreadInfo""" | |
out = {} | |
for i in thread["posts"]: | |
if "tim" in i: | |
out[i["no"]] = {"filename": i["filename"], | |
"ext": i["ext"], | |
"id": str(i["tim"]) | |
} | |
return out | |
def downloadImage(board, imagename, filename=None): | |
if not filename: | |
filename = imagename | |
return urllib.urlretrieve(IMAGE_URL % (board, imagename), filename) | |
def downloadThread(board, id, showimages=True, getimages=True): | |
thread = getThreadInfo(board, id) | |
title = thread["posts"][0]["sub"] if "sub" in thread["posts"][0] else thread["posts"][0]["com"] | |
images = getImageList(thread) | |
x = 0 | |
sp = " " | |
with open(id + ".html", "w") as file: | |
file.write("""<html> | |
<head> | |
<style> | |
.post | |
{ | |
background-color: #BBBBBB; | |
} | |
.image | |
{ | |
min-height: 100px; | |
} | |
</style> | |
<title>%s</title> | |
</head> | |
<body> | |
<h2>%s</h2> | |
""" % (title, title)) | |
for post in thread["posts"]: | |
out = sp + "<a name=\"p{id}\">\n".format(id=str(post["no"])) | |
if post["no"] in images and showimages: | |
out += sp + " <div class=\"post image\" onmouseover=\"this.style.backgroundColor = '#FFBBBB';\" onmouseout=\"this.style.backgroundColor = '#BBBBBB';\">\n" | |
image = images[post["no"]] | |
if getimages: | |
downloadImage(board, image["id"] + image["ext"]) | |
out += sp + " <a href=\"{filename}\"><img src=\"{filename}\" align=\"left\" style=\"height: 100px; width: 125px;\"></a>\n".format(filename=image["id"] + image["ext"]) | |
else: | |
out += sp + " <div class=\"post\" onmouseover=\"this.style.backgroundColor = '#FFBBBB';\" onmouseout=\"this.style.backgroundColor = '#BBBBBB';\">\n" | |
out += sp + sp + post["com"].replace("href=\"" + id, "href=\"") + "\n" | |
out += sp + " </div>\n" | |
out += sp + "</a><hr>\n" | |
file.write(out) | |
file.write(""" | |
</body> | |
</html>""") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment