Last active
January 4, 2018 20:51
-
-
Save OneGneissGuy/ee9e7fd5937be47a413777c22b517fe0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Webscraping local air quality index forecast from https://sparetheair.com/ | |
@author: saraceno | |
@email: [email protected] | |
@github: onegneissguy | |
code adapted from https://medium.com/python-pandemonium/6-things-to-develop-an-efficient-web-scraper-in-python-1dffa688793c | |
""" | |
def pull_aqi(tag_id="todayAQIBar"): | |
tags = soup.find_all(id=tag_id) | |
result = [] | |
for tag in tags: | |
result.append(tag) | |
return result[0] | |
def process_aqi(result): | |
AQI_value = int(result.text.strip().split(' ')[0]) | |
AQI_text = ' '.join(result.text.strip().split(' ')[-4:]) | |
return AQI_value, AQI_text | |
from bs4 import BeautifulSoup | |
import requests | |
url = 'http://www.sparetheair.com' | |
headers = { | |
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36' | |
} | |
today_tag = "todayAQIBar" | |
tomorrow_tag = "tomorrowAQIBar" | |
try: | |
page = requests.get(url, headers=headers, timeout=5) | |
soup = BeautifulSoup(page.content, 'html.parser') | |
#print(soup.prettify()) | |
today = process_aqi(pull_aqi(tag_id=today_tag)) | |
tomorrow = process_aqi(pull_aqi(tag_id=tomorrow_tag)) | |
if tomorrow[0] < today[0]: | |
print('The air quality will improve tomorrow and is forecasted to be {}'.format(today[1].lower())) | |
elif tomorrow[0] > today[0]: | |
print('The air quality will be worse tomorrow and is forecasted to be {}'.format(tomorrow[1].lower())) | |
else: | |
print('The air quality will remain the same is forecasted to be {}'.format(tomorrow[1].lower())) | |
except requests.ConnectionError as e: | |
print("OOPS!! Connection Error. Make sure you are connected to Internet. Technical Details given below.\n") | |
print(str(e)) | |
except requests.Timeout as e: | |
print("OOPS!! Timeout Error") | |
print(str(e)) | |
except requests.RequestException as e: | |
print("OOPS!! General Error") | |
print(str(e)) | |
except KeyboardInterrupt: | |
print("Someone closed the program") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment