Created
November 7, 2017 09:42
-
-
Save neoplacer/ff01f8a7e4a0587e8421562f6238c4c2 to your computer and use it in GitHub Desktop.
IT-SA Event extractor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
import urllib2 | |
import re | |
html_page = urllib2.urlopen("https://www.it-sa.de/de/events/tagesuebersicht/2016-10-19") | |
soup = BeautifulSoup(html_page, 'html') | |
JSdata = {} | |
print '{0:10} ; {1:10} ; {2:10}'.format('Time', 'Thema', 'Ort') | |
f = open('event-10-19.txt', 'w+') | |
f.write('{0:10} ; {1:10} ; {2:10}'.format('Time', 'Thema', 'Ort')) | |
f.write('\n') | |
for aaa in soup.findAll('div', attrs={'class':'cdb-lecturelist__entry-inner'}): | |
try: | |
eventTime = aaa.find('div', attrs={'class','cdb-lecturelist__time'}).span.strong | |
getEventData = aaa.find('div', attrs={'cdb-lecturelist__content'}).find('h3').span.a | |
getOrt = aaa.find('div', attrs={'cdb-lecturelist__content'}).findAll('p') | |
# print eventTime.string | |
# print getEventData.contents #.string | |
# print getOrt[1].string #.find('strong') | |
# print '' | |
print '{0:10} ; {1:10} ; {2:10}'.format(eventTime.string, getEventData.contents, getOrt[1].string) | |
f.write('{0:10} ; {1:10} ; {2:10}'.format(eventTime.string, getEventData.contents, getOrt[1].string)) | |
f.write('\n') | |
except Exception: | |
continue | |
# insanerTag = BeautifulSoup(aaa) | |
#for getTimeOb in insanerTag.findAll('div', attrs={'class','cdb-lecturelist__time'}): | |
# print getTimeOb.span[0].strong | |
#for getEventData in insanerTag.findAll('div', attrs={'cdb-lecturelist__content'}): | |
# print getEventData.h3.span[0].a | |
f.close() | |
# print soup.div['cdb-lecturelist__entry-inner'] | |
#for link in soup.findAll('a', attrs={'href': re.compile("^http://")}): | |
# print link.get('href') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment