Skip to content

Instantly share code, notes, and snippets.

@Radcliffe
Created December 1, 2017 02:05
Show Gist options
  • Save Radcliffe/0d1a8cff9301e0e8842c459153e71296 to your computer and use it in GitHub Desktop.
Save Radcliffe/0d1a8cff9301e0e8842c459153e71296 to your computer and use it in GitHub Desktop.
Python web scraper to get a list of 7-Eleven locations in the US
# Download a list of all 7-Eleven stores in the United States.
# WARNING: The source data is inaccurate!
# Presented at PyMNtos, 2017-11-30
import requests
import csv
import time
import bs4
def main():
fieldnames = ['name', 'street', 'city', 'state', 'phone']
with open('7-eleven-locations-usa.csv', 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(get_all_stores())
def get_all_stores():
start_page = 'https://www.hoursguide.com/7-eleven/'
page = get_page(start_page)
for state_url in get_urls(page):
page = get_page(state_url)
yield from get_stores(page)
for city_url in get_urls(page):
page = get_page(city_url)
yield from get_stores(page)
def get_page(url):
headers = {'user-agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:57.0) Gecko/20100101 Firefox/57.0'}
response = None
wait = 1
while response is None:
try:
response = requests.get(url, timeout=1, headers=headers)
time.sleep(.2)
except:
time.sleep(wait)
wait *= 2
html = response.text
page = bs4.BeautifulSoup(html, 'lxml')
return page
def get_urls(page):
for element in page.select('div.states > ul.state_list > li'):
yield element.find('a')['href']
def get_stores(page):
store_list = page.select('div.store_list > ul.listing_list > li')
for element in store_list:
name, city, state = element.select('a.food')[0].text.split(' - ')
street = element.select('span.size')[0].text
phone = element.select('span.price')[0].text
store = {
'name': name,
'city': city,
'state': state,
'street': street,
'phone': phone
}
print(store)
yield store
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment