Skip to content

Instantly share code, notes, and snippets.

@Raimondi
Created February 17, 2014 22:26
Show Gist options
  • Save Raimondi/9060450 to your computer and use it in GitHub Desktop.
Save Raimondi/9060450 to your computer and use it in GitHub Desktop.
scraper for /r/wallpapers @http://pastebin.com/8RKrFifq
#!/usr/bin/python
#imports
import urllib2
import os
from array import array
#variables
sourceholder = ""
index = 0
indexstart = 0
indexend = 0
imagepaths = []
holder = ""
path = "/home/hojlind/Pictures/Wallpapers"
time = 600
#get the htmlcode
#response =
req = urllib2.Request('http://www.reddit.com/r/wallpapers')
req.add_unredirected_header('User-Agent', 'Hojlinds wallpaper getter')
sourceholder = urllib2.urlopen(req).read()
while index < len(sourceholder):
indexstart = sourceholder.find("<a class=\"title \"", index)
indexend = sourceholder.find("</a>", index)
if indexend == -1:
break
#print "start" + str(indexstart)
#print "end" + str(indexend)
if sourceholder[indexstart : indexend].find("g") != -1 or sourceholder[indexstart : indexend].find("jpg") != -1 or sourceholder[indexstart : indexend].find("imgur") != -1:
imagepaths.append(sourceholder[indexstart : indexend + 4])
index = indexend + 4
#only sort the images we have a direct link to
for i in range(len(imagepaths)):
indexstart = imagepaths[i].find("href=")
indexend = imagepaths[i].find("jpg")
if indexend == -1 :
indexend = imagepaths[i].find("png")
imagepaths[i] = imagepaths[i][indexstart + 6 : indexend + 3]
#remove all empty elements
imagepaths = filter(None, imagepaths)
#get the pictures
if not os.path.exists(path):
os.makedirs(path)
for i in range(len(imagepaths)):
if imagepaths[i].find("jpg") != -1 :
filename = "wallpaper" + str(i) + ".jpg"
else :
filename = "wallpaper" + str(i) + ".png"
f = open(os.path.join(path, filename), "w")
reqImage = urllib2.Request(imagepaths[i])
reqImage.add_unredirected_header('User-Agent', 'Hojlinds wallpaper getter')
f.write(urllib2.urlopen(reqImage).read())
f.close()
#write all images into the xml file so that the slideshow actually works
#first open the file and set up all the static stuff
filename = "changer.xml"
f = open(os.path.join(path, filename), "w")
f.write("<background>\n<starttime>\n<year>2009</year>\n<month>08</month>\n<day>04</day>\n<hour>00</hour>\n<minute>00</minute>\n<second>00</second>\n</starttime>")
for i in range(len(imagepaths)) :
if imagepaths[i].find("jpg") != -1 :
filename = "wallpaper" + str(i) + ".jpg"
else :
filename = "wallpaper" + str(i) + ".png"
if i > 0 :
if imagepaths[i-1].find("jpg") != -1 :
prevfilename = "wallpaper" + str(i-1) + ".jpg"
else :
prevfilename = "wallpaper" + str(i-1) + ".png"
f.write("<transition>\n<duration>5.0</duration>\n<from>" + str(os.path.join(path, prevfilename)) + "</from>\n<to>" + str(os.path.join(path, filename)) + "</to>\n</transition>")
f.write("<static>\n<duration>" + str(time) + "</duration>\n<file>" + str(os.path.join(path, filename)) + "</file>\n</static>")
if i == (len(imagepaths) - 1) :
if imagepaths[0].find("jpg") != -1 :
prevfilename = "wallpaper" + str(i-1) + ".jpg"
else :
prevfilename = "wallpaper" + str(i-1) + ".png"
f.write("<transition>\n<duration>5.0</duration>\n<from>" + str(os.path.join(path, filename)) + "</from>\n<to>" + str(os.path.join(path, prevfilename)) + "</to>\n</transition>")
f.write("</background>")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment