Skip to content

Instantly share code, notes, and snippets.

@spudfkc
Last active December 24, 2015 12:38
Show Gist options
  • Save spudfkc/6798684 to your computer and use it in GitHub Desktop.
Save spudfkc/6798684 to your computer and use it in GitHub Desktop.
wow very scriptsuch code so python wow lolshibeGrabs the images from the front page of /r/supershibe and writes them to "links.html"Requires BeautifulSoup4
import urllib2
from bs4 import BeautifulSoup as bs
def scrape_site(url):
result = []
soup = bs(urllib2.urlopen(url).read())
for img in soup.find_all('img'):
if img.has_attr('alt'):
src = img.get('src')
link = ''.join(['http:', src[:src.rindex('.')-1],
src[src.rindex('.'):]])
title = img.get('title')
if title is None:
title = ''
else:
title = title[:title.index('<p>')]
result.append((title, link))
return result
def write_html(filename, links):
with open(filename, 'wa') as f:
for (title, link) in links:
s = u'<br> <p>{}</p><img src="{}" style="max-width: 100%;"></img><br>'.format(title, link)
f.write(s.encode('utf8'))
def main():
sites = [
'http://imgur.com/r/shibe',
'http://imgur.com/r/supershibe',
'http://imgur.com/r/dogecoin',
'http://imgur.com/r/doge',
'http://imgur.com/r/shiba'
# 'http://shibe-doge.tumblr.com'
]
filename = 'link2.html'
with open(filename, 'w') as f:
pass
for site in sites:
r = scrape_site(site)
write_html(filename, r)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment