Last active
October 4, 2019 08:57
-
-
Save brantb/1614537 to your computer and use it in GitHub Desktop.
Crawls interfacelift.com and downloads wallpapers. Not written by me.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os, urllib2, re, sys, commands, random, time | |
# -- Changable Variables | |
url = 'http://interfacelift.com/wallpaper/downloads/date/widescreen/2560x1440/' | |
directory = '/Users/brant/Pictures/Wallpaper/InterfaceLift\ 2560x1440/' | |
stoponfind = '1' # Set to 0 to download all files even if the file exists and 1 to stop when it finds where it left off | |
wgetpath = 'wget' #Default on linux systems /usr/local/bin/wget on freebsd | |
# -- Should not need to edit below here unless something stops working -- | |
useragent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.3) Gecko/20100402 Firefox/3.6.3 (.NET CLR 3.5.30729)' #Fake useragent since wget is blocked | |
pattern = '(?<=<a href=")/wallpaper/.*jpg(?=">)' # The regex pattern used to look up picture url paths | |
picturepattern = '[^/]*$' # The regex pattern to pull picture filename to see if file exists | |
wallpapercount = 0 | |
count = 1 | |
while count < 9999999: | |
headers = { 'User-Agent' : useragent } | |
request = urllib2.Request(url + "index" + str(count) + ".html", None, headers) | |
data = urllib2.urlopen(request).read() | |
pictures = re.findall(pattern, data) | |
urlcount = len(pictures) | |
for picture in pictures: | |
m = re.search(picturepattern, picture) | |
picturefile=m.group() | |
if os.path.exists(directory + "/" + picturefile): | |
if stoponfind == "1": | |
print 'Directory up to date. Downloaded ' + str(wallpapercount) + ' new wallpaper.' | |
quit() | |
status, output = commands.getstatusoutput(wgetpath + ' -P ' + directory + ' --random-wait -nc -U "' + useragent + '" ' + 'http://interfacelift.com' + picture) | |
if status == 0: | |
print str(wallpapercount) + '. Downloaded http://interfacelift.com' + picture + ' ...' | |
else: | |
print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' | |
print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! WGET OUTPUT !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' | |
print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' | |
print '----------------------------------------------------------------------------------' | |
print output | |
print '----------------------------------------------------------------------------------' | |
print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' | |
print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' | |
print '!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!' | |
print str(wallpapercount) + '. DOWNLOAD FAILED check wget output above for reason.' | |
print 'Exiting script ... wget returned non 0 exit status code: ' + str(status) | |
quit() | |
wallpapercount += 1 | |
if urlcount == 0: | |
print "Downloaded " + str(wallpapercount) + " wallpaper from InterfaceLift." | |
randomnum = random.randint(5,10) | |
print 'Sleeping for :' + str(randomnum) | |
quit() | |
count += 1 | |
randomnum = random.randint(10,30) | |
print 'Sleeping for :' + str(randomnum) | |
time.sleep(randomnum) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment