Skip to content

Instantly share code, notes, and snippets.

@quandyfactory
Created July 29, 2014 23:46
Show Gist options
  • Select an option

  • Save quandyfactory/c1b2432721bdf1ab6ced to your computer and use it in GitHub Desktop.

Select an option

Save quandyfactory/c1b2432721bdf1ab6ced to your computer and use it in GitHub Desktop.
get_fringe_plays.py
#!/usr/bin/env python
"""Code I used to download all the details for the 2014 Hamilton Fringe Festival plays and convert them into a JSON file."""
from bs4 import BeautifulSoup as bs
import json
import requests
import xlrd
base_url = 'http://hamiltonfringe.ca/shows/'
r = requests.get(base_url)
data = r.text
soup = bs(data)
links = [link.get('href') for link in soup.find_all('a') if link.get('href') is not None and len(link.get('href')) > 31 and link.get('href')[:31] == base_url]
output = []
def get_soup(link):
r = requests.get(link)
data = r.text
soup = bs(data)
return soup
def process_soup(soup):
title = soup.title.text
billing = [div for div in soup.find_all('div') if div.get('class') is not None and 'show-billing' in div.get('class')][0].text
billing = billing.replace('\t', '')
billing = billing.replace('\n', ' \n')
company = [div for div in soup.find_all('div') if div.get('class') is not None and 'company-info' in div.get('class')][0].text
company = company.replace('\n', '')
description = [div for div in soup.find_all('div') if div.get('class') is not None and 'show-description' in div.get('class')][0].text
return { "title": title, "description": '%s\n%s\n\n%s' % (company, billing, description) }
def save_plays():
shows = []
for link in links:
soup = get_soup(link)
obj = process_soup(soup)
obj['link'] = link
shows.append(obj)
print('Link %s added to shows.' % (link))
output = json.dumps(shows)
output = output.replace('\\t', '')
while '\\n\\n\\n' in output:
output = output.replace('\\n\\n\\n', '\\n\\n')
output = output.replace('\\u2019', "'")
with open('fringe-shows.json', 'w') as file:
file.write(output)
print('File saved as fringe-shows.json')
def get_showtimes():
worksheet = xlrd.open_workbook('shows.xls')
sh = worksheet.sheet_by_name(u'shows')
headers = sh.row_values(0) # get column headers
data = []
for rownum in range(1,sh.nrows): # skip column headers
data.append(sh.row_values(rownum))
return headers, data
def get_plays():
with open('fringe-shows.json', 'r') as file:
obj = file.read()
return json.loads(obj)
def generate_sql(plays, headers, showtimes):
print('insert into wots (title, date, time, location, address, details, website, username) values ')
username = 'Ryan'
for showtime in showtimes:
title = showtime[2].strip()
for play in plays:
if play['title'].strip() == title:
location = showtime[0]
address = '%s, Hamilton, ON' % (showtime[1])
details = play['description']
website = play['link']
if showtime[4] != '': # july 17
date = '2014-07-17'
time = showtime[4]
print("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'), " % (title, date, time, location, address, details.replace("'", "\'"), website, username))
if showtime[5] != '': #July 18
date = '2014-07-18'
time = showtime[5]
print("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'), " % (title, date, time, location, address, details.replace("'", "\'"), website, username))
if showtime[6] != '': #July 19
date = '2014-07-19'
time = showtime[6]
print("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'), " % (title, date, time, location, address, details.replace("'", "\'"), website, username))
if showtime[7] != '': #July 20
date = '2014-07-20'
time = showtime[7]
print("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'), " % (title, date, time, location, address, details.replace("'", "\'"), website, username))
if showtime[8] != '': #July 21
date = '2014-07-21'
time = showtime[8]
print("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'), " % (title, date, time, location, address, details.replace("'", "\'"), website, username))
if showtime[9] != '': #July 2
date = '2014-07-22'
time = showtime[9]
print("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'), " % (title, date, time, location, address, details.replace("'", "\'"), website, username))
if showtime[10] != '': #July 23
date = '2014-07-23'
time = showtime[10]
print("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'), " % (title, date, time, location, address, details.replace("'", "\'"), website, username))
if showtime[11] != '': #July 24
date = '2014-07-24'
time = showtime[11]
print("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'), " % (title, date, time, location, address, details.replace("'", "\'"), website, username))
if showtime[12] != '': #July 25
date = '2014-07-25'
time = showtime[12]
print("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'), " % (title, date, time, location, address, details.replace("'", "\'"), website, username))
if showtime[13] != '': #July 26
date = '2014-07-26'
time = showtime[13]
print("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'), " % (title, date, time, location, address, details.replace("'", "\'"), website, username))
if showtime[14] != '': #July 27
date = '2014-07-27'
time = showtime[14]
print("('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s'), " % (title, date, time, location, address, details.replace("'", "\'"), website, username))
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment