-
-
Save ashildebrandt/9ad37ea659a0fbff5a05 to your computer and use it in GitHub Desktop.
| """ | |
| ash_ffffind.py | |
| v1.1 (September 14, 2015) | |
| by [email protected] | |
| Automatically downloads all images from ffffound saved by a specific user. | |
| Will first try to download the image from the original source (to get the highest quality possible). | |
| If that fails, it'll download the cached version from ffffound. | |
| Prerequisities: | |
| Beautiful Soup (http://www.crummy.com/software/BeautifulSoup/) | |
| Usage: | |
| python ffffind.py username | |
| """ | |
| import os, sys, urllib, imghdr | |
| from BeautifulSoup import BeautifulSoup | |
| from urlparse import urlparse | |
| from posixpath import basename, dirname | |
| class URLOpener(urllib.FancyURLopener): | |
| version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11' | |
| urllib._urlopener = URLOpener() | |
| def main(user): | |
| offset = 0 | |
| page = 1 | |
| while True: | |
| print "Capturing page "+str(page)+" ..." | |
| f = urllib.urlopen("http://ffffound.com/home/"+user+"/found/?offset="+str(offset)) | |
| s = f.read() | |
| f.close() | |
| if "<div class=\"description\">" in s: | |
| images = [] | |
| offset += 25 | |
| count = 0 | |
| soup = BeautifulSoup(s) | |
| for i in soup.findAll("div", { "class" : "description" }): | |
| images.append({"url": urlparse("http://" + str(i).split("<br />")[0].replace("<div class=\"description\">", ""))}) | |
| for i in soup.findAll("img"): | |
| if str(i).find("_m.") != -1: | |
| images[count]["backup"] = str(i).split("src=\"")[1].split("\"")[0] | |
| count += 1 | |
| for i in images: | |
| if os.path.exists(user+"/"+basename(i["url"].path)): | |
| print basename(i["url"].path) + " exists, stopping." | |
| sys.exit() | |
| else: | |
| print "Downloading " + basename(i["url"].path), | |
| try: | |
| urllib.urlretrieve(i["url"].geturl(), user+"/"+basename(i["url"].path)) | |
| print "... done." | |
| if not imghdr.what(user+"/"+basename(i["url"].path)) in ["gif", "jpeg", "png", None]: | |
| print "... unfortunately, it seems to be a bad image.\nDownloading backup", | |
| try: | |
| urllib.urlretrieve(i["backup"], user+"/"+basename(i["url"].path)) | |
| print "... which seems to have worked." | |
| except: | |
| print "... which also failed." | |
| if os.path.getsize(user+"/"+basename(i["url"].path)) < 5000: | |
| raise | |
| except: | |
| print "... failed. Downloading backup", | |
| try: | |
| urllib.urlretrieve(i["backup"], user+"/"+basename(i["url"].path)) | |
| print "... which seems to have worked." | |
| except: | |
| print "... which also failed." | |
| page += 1 | |
| else: | |
| print "Reached the end of the list, stopping." | |
| break | |
| if __name__ == '__main__': | |
| print("ffffound image downloader") | |
| if len(sys.argv) < 2: | |
| print "Usage:\n\t python ffffind.py username" | |
| else: | |
| try: | |
| if not os.path.exists("./"+sys.argv[1]): | |
| os.mkdir(sys.argv[1]) | |
| except: | |
| print "Error creating directory." | |
| sys.exit() | |
| user = sys.argv[1] | |
| print "Downloading all pictures from user '"+user+"'" | |
| main(user) |
@ashildebrandt Hi, I have installed BS4 via pip and I get this error when I run your script
$ python ffffind.py monsieurm
Traceback (most recent call last):
File "ffffind.py", line 22, in
from BeautifulSoup import BeautifulSoup
ImportError: No module named BeautifulSoup
However
$ pip install beautifulsoup4
Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python2.7/site-packages
Any idea ?
ok just install BeautifulSoup previous version
pip install beautifulsoup not pip install beautifulsoup4
Hey. I never before worked with Python. But I want to save my ffffounds so bad. So, is there any instruction, what to do with this script. I dowloaded python already, and tried to use the code, but i failed. Please, can you help?
BG - Phil :)
I updated the script to also create HTML pages in which you can view your images, plus fixed a couple of edge cases I came across: https://github.com/philgyford/ffffound-export
Anyone have a torrent of the html page dump? I totally learned about this too late :(
@Triune the entirety of ffffound.com is in fact available, via torrent, from archive.org:
https://archive.org/details/ffffound.com-warc-archive-2017-05-07
Enjoy!
Hi Aaron,
I am getting the same problems that jwmacias is experiencing. Is there a way to fix the file extension issue. That would be great