Created
November 11, 2018 19:32
-
-
Save jeffreyroberts/0eed5c723be6deb0e8b01e8d2569b5cf to your computer and use it in GitHub Desktop.
WallPaper Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from requests import get | |
from requests.exceptions import RequestException | |
from contextlib import closing | |
from bs4 import BeautifulSoup | |
from urllib.request import urlopen | |
import urllib.request | |
import ssl | |
def simple_get(url): | |
""" | |
Attempts to get the content at `url` by making an HTTP GET request. | |
If the content-type of response is some kind of HTML/XML, return the | |
text content, otherwise return None. | |
""" | |
try: | |
with closing(get(url, stream=True)) as resp: | |
if is_good_response(resp): | |
return resp.content | |
else: | |
return None | |
except RequestException as e: | |
log_error('Error during requests to {0} : {1}'.format(url, str(e))) | |
return None | |
def is_good_response(resp): | |
""" | |
Returns True if the response seems to be HTML, False otherwise. | |
""" | |
content_type = resp.headers['Content-Type'].lower() | |
return (resp.status_code == 200 | |
and content_type is not None | |
and content_type.find('html') > -1) | |
def log_error(e): | |
""" | |
It is always a good idea to log errors. | |
This function just prints them, but you can | |
make it do anything. | |
""" | |
print(e) | |
for x in range(70): | |
var = [] | |
raw_html = simple_get('https://wall.alphacoders.com/by_sub_category.php?id=170808&name=Fractal+Wallpapers&page=' + str(x)) | |
html = BeautifulSoup(raw_html, 'html.parser') | |
for d in html.find_all('div', class_="boxgrid"): | |
for a in d.find_all('a'): | |
wallpaper_raw = simple_get('https://wall.alphacoders.com/' + a['href']) | |
wallpaper_html = BeautifulSoup(wallpaper_raw, 'html.parser') | |
wallpaper_div = wallpaper_html.find('div', { 'class': 'img-container-desktop'}) | |
wallpaper_anchor = wallpaper_div.find('a') | |
wallpaper_filename = wallpaper_anchor['href'][36:] | |
print(wallpaper_filename) | |
req = urllib.request.Request(wallpaper_anchor['href'], data=None, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'}) | |
context = ssl._create_unverified_context() | |
res = urlopen(req, context=context) | |
file = open('/Users/jlroberts/Projects/Python/Wallpapers/files/' + wallpaper_filename, 'wb') | |
file.write(res.read()) | |
file.close() | |
break | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment