Created
February 11, 2016 10:48
-
-
Save pralhadstha/bc8171e5c942bfd0440f to your computer and use it in GitHub Desktop.
4chan images downloader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
# | |
# Copyright (c) 2016, Alexis Nootens <[email protected]> | |
# | |
# Permission to use, copy, modify, and/or distribute this software for any | |
# purpose with or without fee is hereby granted, provided that the above | |
# copyright notice and this permission notice appear in all copies. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH | |
# REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY | |
# AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, | |
# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM | |
# LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR | |
# OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR | |
# PERFORMANCE OF THIS SOFTWARE. | |
import sys, os, json, argparse, urllib.request as req | |
class Post: | |
def __init__(self, no, tim=None, ext=None, filename=None): | |
self.no = no | |
self.tim = tim | |
self.ext = ext | |
self.filename = filename | |
def hasimage(self): | |
return self.filename is not None | |
def getdocument(url): | |
try: | |
return req.urlopen(url) | |
except ValueError: | |
print("Not a valid URL") | |
except req.URLError as e: | |
print(e.reason) | |
sys.exit(1) | |
def download(argv, board, post): | |
url = "http://i.4cdn.org/{}/{}{}".format(board, post.tim, post.ext) | |
filename = "{}{}".format(post.filename if argv.f else post.tim, post.ext) | |
if argv.directory is not None: | |
if not os.path.exists(argv.directory): | |
os.makedirs(argv.directory) | |
filename = "{}/{}".format(argv.directory, filename) | |
print("Downloading:", filename) | |
req.urlretrieve(url, filename) | |
def parse(argv, board, thread): | |
url = "http://a.4cdn.org/{}/thread/{}.json".format(board, thread) | |
res = getdocument(url).read() | |
posts = [] | |
json.loads(res.decode(), object_hook=lambda post: | |
posts.append(Post(post.get('no'), post.get('tim'), | |
post.get('ext'), post.get('filename')))) | |
try: | |
for post in posts: | |
if post.hasimage(): | |
download(argv, board, post) | |
except KeyboardInterrupt: | |
print("\nInterrupted by user") | |
req.urlcleanup() | |
def main(argv): | |
url = argv.thread | |
getdocument(url) | |
try: | |
board = url.split('/')[3] | |
thread = url.split('/')[5] | |
parse(argv, board, thread) | |
except IndexError: | |
print("Not a valid URL thread") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description='Download images from an 4chan thread') | |
parser.add_argument('-f', '--filename', dest='f', action='store_true', help='preserve filenames (default: no)') | |
parser.add_argument('-d', dest='directory', type=str, required=False, metavar='DIRECTORY', | |
help='set the directory to save the files (default: current)') | |
parser.add_argument('thread', type=str, help='the thread url') | |
main(parser.parse_args()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment