Skip to content

Instantly share code, notes, and snippets.

@denniskupec
Created October 8, 2017 21:06
Show Gist options
  • Save denniskupec/267c3596e773814cbe6f7dad4350c4e4 to your computer and use it in GitHub Desktop.
Save denniskupec/267c3596e773814cbe6f7dad4350c4e4 to your computer and use it in GitHub Desktop.
4chan thread archiver
import requests, sys, re, os, shutil
if len(sys.argv) < 2:
sys.exit("Usage: 4rchive.py <URL>")
url = re.search("4chan.org\/(\w+)\/\w+\/(\d+)\/?", sys.argv[1])
board = url.group(1)
path = "downloads/{}/{}/".format(board, url.group(2))
if not os.path.exists(path):
os.makedirs(path)
r = requests.get(sys.argv[1].replace("boards.4chan.org", "a.4cdn.org") + ".json")
if r.status_code != 200:
sys.exit("4chan API failure")
response = r.json()
total = len(response['posts'])
print("Downloading " + str(total) + " post(s)")
# allowed file extensions
# (ignoring text posts on an image board)
exts = [".webm", ".gif", ".jpg", ".jpeg", ".png"]
for i, post in enumerate(response['posts']):
if ('tim' not in post) or (post['ext'] not in exts):
continue
# discard postage stamp sized images, roughly
if post['w'] + post['h'] < 800:
continue
filename = str(post['tim']) + post['filename'] + post['ext']
iurl = "https://i.4cdn.org/{}/{}".format(board, filename)
print("[{}/{}] ".format(i+1, total) + iurl)
if os.path.exists(path + filename):
continue
r = requests.get(iurl, stream=True)
with open(path + filename, 'wb') as outf:
shutil.copyfileobj(r.raw, outf)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment