Last active
April 3, 2022 22:38
-
-
Save ashildebrandt/9ad37ea659a0fbff5a05 to your computer and use it in GitHub Desktop.
Ffffind (downloads every image from a given FFFFOUND! account)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
ash_ffffind.py | |
v1.1 (September 14, 2015) | |
by [email protected] | |
Automatically downloads all images from ffffound saved by a specific user. | |
Will first try to download the image from the original source (to get the highest quality possible). | |
If that fails, it'll download the cached version from ffffound. | |
Prerequisities: | |
Beautiful Soup (http://www.crummy.com/software/BeautifulSoup/) | |
Usage: | |
python ffffind.py username | |
""" | |
import os, sys, urllib, imghdr | |
from BeautifulSoup import BeautifulSoup | |
from urlparse import urlparse | |
from posixpath import basename, dirname | |
class URLOpener(urllib.FancyURLopener): | |
version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11' | |
urllib._urlopener = URLOpener() | |
def main(user): | |
offset = 0 | |
page = 1 | |
while True: | |
print "Capturing page "+str(page)+" ..." | |
f = urllib.urlopen("http://ffffound.com/home/"+user+"/found/?offset="+str(offset)) | |
s = f.read() | |
f.close() | |
if "<div class=\"description\">" in s: | |
images = [] | |
offset += 25 | |
count = 0 | |
soup = BeautifulSoup(s) | |
for i in soup.findAll("div", { "class" : "description" }): | |
images.append({"url": urlparse("http://" + str(i).split("<br />")[0].replace("<div class=\"description\">", ""))}) | |
for i in soup.findAll("img"): | |
if str(i).find("_m.") != -1: | |
images[count]["backup"] = str(i).split("src=\"")[1].split("\"")[0] | |
count += 1 | |
for i in images: | |
if os.path.exists(user+"/"+basename(i["url"].path)): | |
print basename(i["url"].path) + " exists, stopping." | |
sys.exit() | |
else: | |
print "Downloading " + basename(i["url"].path), | |
try: | |
urllib.urlretrieve(i["url"].geturl(), user+"/"+basename(i["url"].path)) | |
print "... done." | |
if not imghdr.what(user+"/"+basename(i["url"].path)) in ["gif", "jpeg", "png", None]: | |
print "... unfortunately, it seems to be a bad image.\nDownloading backup", | |
try: | |
urllib.urlretrieve(i["backup"], user+"/"+basename(i["url"].path)) | |
print "... which seems to have worked." | |
except: | |
print "... which also failed." | |
if os.path.getsize(user+"/"+basename(i["url"].path)) < 5000: | |
raise | |
except: | |
print "... failed. Downloading backup", | |
try: | |
urllib.urlretrieve(i["backup"], user+"/"+basename(i["url"].path)) | |
print "... which seems to have worked." | |
except: | |
print "... which also failed." | |
page += 1 | |
else: | |
print "Reached the end of the list, stopping." | |
break | |
if __name__ == '__main__': | |
print("ffffound image downloader") | |
if len(sys.argv) < 2: | |
print "Usage:\n\t python ffffind.py username" | |
else: | |
try: | |
if not os.path.exists("./"+sys.argv[1]): | |
os.mkdir(sys.argv[1]) | |
except: | |
print "Error creating directory." | |
sys.exit() | |
user = sys.argv[1] | |
print "Downloading all pictures from user '"+user+"'" | |
main(user) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@Triune the entirety of ffffound.com is in fact available, via torrent, from archive.org:
https://archive.org/details/ffffound.com-warc-archive-2017-05-07
Enjoy!