Skip to content

Instantly share code, notes, and snippets.

@explodecomputer
Last active August 29, 2015 14:28
Show Gist options
  • Save explodecomputer/812903b2696c52f5610b to your computer and use it in GitHub Desktop.
Save explodecomputer/812903b2696c52f5610b to your computer and use it in GitHub Desktop.
Parse Bristol's gig listings
import sys
import urllib2
from bs4 import BeautifulSoup, NavigableString, Tag
def get_headfirstbristol():
web_page = urllib2.urlopen("http://www.headfirstbristol.co.uk/gig-listings").read()
soup = BeautifulSoup(web_page)
band = []
place = []
description = []
dates = []
date_tags = soup.select("div.date_container.date_font")
for tag in date_tags:
nextelem = tag.nextSibling
while check_nextelem(nextelem):
if isinstance(nextelem, Tag):
if set(['event', 'z']) <= set(nextelem['class']):
temp = nextelem.find("a").contents[0]
temp = [s.strip() for s in temp.split('@')]
band.append(temp[0])
place.append(temp[1])
description.append(nextelem.find("p").contents[0])
dates.append(tag.contents[0])
if not nextelem == None:
nextelem = nextelem.nextSibling
else:
break
def check_nextelem(elem):
if isinstance(elem, Tag):
if elem['class'] != ['date_container', 'date_font']:
return True
else:
return False
else:
return True
def get_thekla():
web_page = urllib2.urlopen("http://www.theklabristol.co.uk/live").read()
soup = BeautifulSoup(web_page)
gigs = soup.find_all("div", attrs={"class": "details_wrapper_inner"})
band = [gig.select("h4.gig_artist")[0].contents[0] for gig in gigs]
date = [gig.select("h3.gig_date")[0].contents[0] for gig in gigs]
def get_colstonhall():
web_page = urllib2.urlopen("http://www.colstonhall.org/whats-on/").read()
soup = BeautifulSoup(web_page)
a = soup.find_all("h2", attrs={"itemprop": "summary"})
def get_ents(url):
web_page = urllib2.urlopen(url).read()
soup = BeautifulSoup(web_page)
gigs = soup.find_all("div", attrs={"class": "event-list-item-container"})
band = [gig.find("h3").contents[-1] for gig in gigs]
date = [gig.find("abbr", attrs={"class": "dtstart"})['title'] for gig in gigs]
res = [band, date]
return res
thekla = get_ents("https://www.ents24.com/bristol-events/thekla")
colston = get_ents("https://www.ents24.com/bristol-events/colston-hall")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment