Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save liquidgenius/274607ffb7596c9b9ac40160ca82aa59 to your computer and use it in GitHub Desktop.
Save liquidgenius/274607ffb7596c9b9ac40160ca82aa59 to your computer and use it in GitHub Desktop.
Python script for scraping pollution data from bbc / earthsense website
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import time
import csv
path_to_chromedriver = '/Users/Jamie/Projects/Propolis_Stuff/chromedriver' # change path as needed
browser = webdriver.Chrome(executable_path = path_to_chromedriver)
url = 'http://www.bbc.co.uk/news/science-environment-42566393'
browser.get(url)
browser.switch_to.frame(browser.find_element_by_class_name('pym-iframe'))
try:
element = WebDriverWait(browser, 5).until(
EC.visibility_of_element_located((By.ID, 'postcode-input'))
)
finally:
with open('schoolsGM_min.csv') as schoolcsv:
reader = csv.reader(schoolcsv)
with open('postcode_score.csv', 'a') as p:
for row in reader:
schoolname = row[0]
URN = row[1]
postcode = row[2]
localauth = row[3]
browser.find_element_by_id('postcode-input').clear()
browser.find_element_by_id('postcode-input').send_keys(postcode)
browser.find_element_by_id('postcode-input').send_keys(Keys.ENTER)
time.sleep(0.4)
score = (browser.find_element_by_class_name('ffa__summary-rating').text)
p.write('"' + schoolname + '";"' + URN + '";"' + postcode + '";"' + score + '";"' + localauth + '"\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment