Created
December 1, 2017 02:05
-
-
Save Radcliffe/0d1a8cff9301e0e8842c459153e71296 to your computer and use it in GitHub Desktop.
Python web scraper to get a list of 7-Eleven locations in the US
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Download a list of all 7-Eleven stores in the United States. | |
# WARNING: The source data is inaccurate! | |
# Presented at PyMNtos, 2017-11-30 | |
import requests | |
import csv | |
import time | |
import bs4 | |
def main(): | |
fieldnames = ['name', 'street', 'city', 'state', 'phone'] | |
with open('7-eleven-locations-usa.csv', 'w') as csvfile: | |
writer = csv.DictWriter(csvfile, fieldnames=fieldnames) | |
writer.writeheader() | |
writer.writerows(get_all_stores()) | |
def get_all_stores(): | |
start_page = 'https://www.hoursguide.com/7-eleven/' | |
page = get_page(start_page) | |
for state_url in get_urls(page): | |
page = get_page(state_url) | |
yield from get_stores(page) | |
for city_url in get_urls(page): | |
page = get_page(city_url) | |
yield from get_stores(page) | |
def get_page(url): | |
headers = {'user-agent': | |
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:57.0) Gecko/20100101 Firefox/57.0'} | |
response = None | |
wait = 1 | |
while response is None: | |
try: | |
response = requests.get(url, timeout=1, headers=headers) | |
time.sleep(.2) | |
except: | |
time.sleep(wait) | |
wait *= 2 | |
html = response.text | |
page = bs4.BeautifulSoup(html, 'lxml') | |
return page | |
def get_urls(page): | |
for element in page.select('div.states > ul.state_list > li'): | |
yield element.find('a')['href'] | |
def get_stores(page): | |
store_list = page.select('div.store_list > ul.listing_list > li') | |
for element in store_list: | |
name, city, state = element.select('a.food')[0].text.split(' - ') | |
street = element.select('span.size')[0].text | |
phone = element.select('span.price')[0].text | |
store = { | |
'name': name, | |
'city': city, | |
'state': state, | |
'street': street, | |
'phone': phone | |
} | |
print(store) | |
yield store | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment