cboulanger · June 20, 2023 14:08
diff --git a/sessions_by_track.py b/sessions_by_track.py
 import dateparser
 import re
 import requests
 from bs4 import BeautifulSoup
 import csv
 from collections import defaultdict

 url = ".../program.html"
 track_url = ".../tracks.html"
 css_url = "..../program.css"
 page_title = "XXX - Tracks und Sessions"

 def download_session_data(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')


    # Initialize CSV writer
    with open('conference_data.csv', 'w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["session_id", "session", "date", "interval", "track", "title"])

        # Initialize date
        date = None

        for div in soup.find_all('div'):
            # Check if div contains date
            if div.get('class') == ['date']:
                date = dateparser.parse(div.text)
                date = date.strftime('%d.%m.%Y')

            # Check if div is a session
            elif div.get('class') and 'session' in div.get('class'):
                session_id = div.find('a').get('name').replace('session:', '')
                heading = div.find('div', class_='heading').text
                matches = re.search(r'(.+)\s*Session\s*(\w+)\s*:\s*Track\s*(\w+):\s*(.+)', heading)
                if matches:
                    interval, session, track, title = matches.groups()
                    writer.writerow([session_id, session, date, interval.strip(), track, title])

 def create_track_overview(track_url, program_url, page_title, css_url):
    # Download and parse the track page
    response = requests.get(track_url)
    soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')

    # Extract track titles
    tracks = {}
    i = 1
    for h3 in soup.find_all('h3'):
        a = h3.find('a')
        if a:
            tracks[str(i)] = a.text.strip()
            i+=1

    # Parse the CSV file
    sessions = defaultdict(list)
    with open('conference_data.csv', 'r', newline='', encoding='utf-8') as file:
        reader = csv.reader(file)
        next(reader)  # Skip header
        for row in reader:
            session_id, session, date, interval, track, title = row
            if session_id != "" and track != "":
                sessions[track].append((date, interval, title, session_id, session))

    # Sort sessions by date and interval
    for track in sessions:
        sessions[track].sort()

    # Create the HTML page
    html = '<html><head>'
    html += '<meta charset="UTF-8">'
    html += f'<title>{page_title}</title>'
    html += f'<link rel="stylesheet" type="text/css" href="{css_url}">'
    html += '<style>td { padding-right: 20px; }</style>'
    html += '</head><body>'

    for track in sorted(sessions.keys()):
        session_list = sessions[track]
        html += f'<h2>{tracks[track]}</h2>'
        html += '<table style="">'
        for date, interval, title, session_id, session in session_list:
            html += f'<tr><td>{date}</td><td>{interval}</td><td>{session}</td><td><a href="{program_url}#session:{session_id}">{title}</a></td></tr>'
        html += '</table>'
    html += '</body></html>'

    # Write the HTML to a file
    with open('sessions_by_track.html', 'w', encoding='utf-8') as file:
        file.write(html)



 download_session_data(url)
 create_track_overview(track_url, url, page_title= page_title, css_url=css_url)
	import dateparser
	import re
	import requests
	from bs4 import BeautifulSoup
	import csv
	from collections import defaultdict

	url = ".../program.html"
	track_url = ".../tracks.html"
	css_url = "..../program.css"
	page_title = "XXX - Tracks und Sessions"

	def download_session_data(url):
	response = requests.get(url)
	soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')


	# Initialize CSV writer
	with open('conference_data.csv', 'w', newline='', encoding='utf-8') as file:
	writer = csv.writer(file)
	writer.writerow(["session_id", "session", "date", "interval", "track", "title"])

	# Initialize date
	date = None

	for div in soup.find_all('div'):
	# Check if div contains date
	if div.get('class') == ['date']:
	date = dateparser.parse(div.text)
	date = date.strftime('%d.%m.%Y')

	# Check if div is a session
	elif div.get('class') and 'session' in div.get('class'):
	session_id = div.find('a').get('name').replace('session:', '')
	heading = div.find('div', class_='heading').text
	matches = re.search(r'(.+)\sSession\s(\w+)\s:\sTrack\s(\w+):\s(.+)', heading)
	if matches:
	interval, session, track, title = matches.groups()
	writer.writerow([session_id, session, date, interval.strip(), track, title])

	def create_track_overview(track_url, program_url, page_title, css_url):
	# Download and parse the track page
	response = requests.get(track_url)
	soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')

	# Extract track titles
	tracks = {}
	i = 1
	for h3 in soup.find_all('h3'):
	a = h3.find('a')
	if a:
	tracks[str(i)] = a.text.strip()
	i+=1

	# Parse the CSV file
	sessions = defaultdict(list)
	with open('conference_data.csv', 'r', newline='', encoding='utf-8') as file:
	reader = csv.reader(file)
	next(reader) # Skip header
	for row in reader:
	session_id, session, date, interval, track, title = row
	if session_id != "" and track != "":
	sessions[track].append((date, interval, title, session_id, session))

	# Sort sessions by date and interval
	for track in sessions:
	sessions[track].sort()

	# Create the HTML page
	html = '<html><head>'
	html += '<meta charset="UTF-8">'
	html += f'<title>{page_title}</title>'
	html += f'<link rel="stylesheet" type="text/css" href="{css_url}">'
	html += '<style>td { padding-right: 20px; }</style>'
	html += '</head><body>'

	for track in sorted(sessions.keys()):
	session_list = sessions[track]
	html += f'<h2>{tracks[track]}</h2>'
	html += '<table style="">'
	for date, interval, title, session_id, session in session_list:
	html += f'<tr><td>{date}</td><td>{interval}</td><td>{session}</td><td><a href="{program_url}#session:{session_id}">{title}</a></td></tr>'
	html += '</table>'
	html += '</body></html>'

	# Write the HTML to a file
	with open('sessions_by_track.html', 'w', encoding='utf-8') as file:
	file.write(html)



	download_session_data(url)
	create_track_overview(track_url, url, page_title= page_title, css_url=css_url)