Skip to content

Instantly share code, notes, and snippets.

@nlitsme
Last active July 25, 2022 12:07
Show Gist options
  • Save nlitsme/186dfa7e000579ed0d6521f43526232c to your computer and use it in GitHub Desktop.
Save nlitsme/186dfa7e000579ed0d6521f43526232c to your computer and use it in GitHub Desktop.
python script which I used to generate my mch2022 program overview
"""
Author: [email protected]
"""
import html.parser
import html
import urllib.request
import json
from datetime import datetime, timedelta
import re
import os
from xml.parsers.expat import ParserCreate
import sys
sys.path += ["/home/itsme/myprj/wikitool"]
# code for parsing media wiki text.
from wikiinfo import parseWikitext, findAllTemplates, parseInfobox, findTemplate, extractValue
def loadhtml(url):
with urllib.request.urlopen(url) as response:
text = response.read()
if text.startswith(b"\x1f\x8b"):
text = zlib.decompress(text, wbits=47)
return text.decode('utf-8')
def loadjson(url):
with urllib.request.urlopen(url) as response:
text = response.read()
if text.startswith(b"\x1f\x8b"):
text = zlib.decompress(text, wbits=47)
return json.loads(text)
def get(j, *path):
for p in path:
if type(p)==str:
j = j.get(p)
elif type(p)==int:
j = j[p]
if not j: break
return j
def fromiso(t):
""" Sanitize and decode timestamps """
if not t:
return
if t[-1:] == 'Z':
t = t[:-1] + '+02:00'
elif t[-3:] == 'CET':
t = t[:-3] + '+02:00'
elif t[-5:] == '+0200':
t = t[:-5] + '+02:00'
elif len(t)==19:
t += '+02:00'
else:
i = t.find(' ')
if i>0:
#print("with space", t, "->", t[:i]+'T'+t[i+1:]+':00+02:00')
t = t[:i]+'T'+t[i+1:]+':00+02:00'
return datetime.fromisoformat(t)
def mchxml(fh):
"""
Extract wiki page text from saved and concatenated wiki export files
"""
stack = []
items = []
txtdepth = titdepth = -1
text = title = None
state = 0
def handle_begin_element(element, attrs):
nonlocal text, title, state, txtdepth, titdepth
if element == 'text':
text = ""
txtdepth = len(stack)
elif element == 'title':
title = ""
titdepth = len(stack)
text = None
txtdepth = -1
state = 0
stack.append(element)
def handle_end_element(element):
nonlocal text, title, state, txtdepth, titdepth
if stack.pop() != element:
print("warn: %s" % element)
if element == 'text' and txtdepth == len(stack):
txtdepth = -1
state += 1
elif element == 'title' and titdepth == len(stack):
titdepth = -1
state += 1
if state == 2:
items.append((title, text))
title = text = None
state = 0
def handle_data(data):
nonlocal text, title, state, txtdepth, titdepth
if txtdepth>=0:
text += data
elif titdepth>=0:
title += data
parser = ParserCreate()
parser.StartElementHandler = handle_begin_element
parser.EndElementHandler = handle_end_element
parser.CharacterDataHandler = handle_data
parser.ParseFile(fh)
return items
def get_latest_wiki_export():
""" search current dir for the latest wiki export file """
files = [ ent.name for ent in os.scandir('.') if ent.is_file() ]
files = [ n for n in files if n.endswith('.xml') ]
files = sorted(files)
return files[-1]
def getsessions():
""" get info on Sessions from a wiki export file. """
xmlfilename = get_latest_wiki_export()
with open(xmlfilename, "rb") as fh:
for title, txt in mchxml(fh):
#print("==>", title, "<==")
try:
tree = parseWikitext(txt)
a = findTemplate(tree, "Session")
if a:
a = parseInfobox(a)
b = findTemplate(tree, "Event")
if b:
b = parseInfobox(b)
if a and b:
result = { "title": title }
result.update(a)
result.update(b)
yield result
except Exception as e:
print(e)
class FromDict:
def __init__(self, t):
for k, v in t.items():
setattr(self, k, v)
class Room(FromDict):
# id:room-id
# name.en: text
def __init__(self, t):
super().__init__(t)
if type(self.name) == dict:
self.name = self.name.get('en')
def asjson(self):
return json.dumps(dict(name=self.name, id=self.id))
def __repr__(self): return "Room:"+self.asjson()
class Track(FromDict):
# color: #xxxxxx
# id: track-id
# name.en: text
def __init__(self, t):
super().__init__(t)
if type(self.name) == dict:
self.name = self.name.get('en')
def asjson(self):
return json.dumps(dict(name=self.name, id=self.id, color=self.color))
class Speaker(FromDict):
# code:speaker-code
# name: text
# avatar: url
def __init__(self, t):
super().__init__(t)
def url(self):
return f"https://program.mch2022.org/mch2022/speaker/{self.code}/"
def asjson(self):
return json.dumps(dict(name=self.name, code=self.code, avatar=self.avatar))
class Talk(FromDict):
# code:talk-id
# title:text
# abstract:text
# speakers[spk-id]
# track:track-num
# start, end: timestamp
# room: room-num
def __init__(self, t):
super().__init__(t)
for k in ('track', 'room'):
if not hasattr(self, k):
setattr(self, k, 0)
for k in ('code', 'title', 'abstract', 'start', 'end'):
if not hasattr(self, k):
setattr(self, k, None)
for k in ('speakers',):
if not hasattr(self, k):
setattr(self, k, [])
if type(self.start)==str:
self.start = fromiso(self.start)
if type(self.end)==str:
self.end = fromiso(self.end)
if type(self.title) == dict:
self.title = self.title.get('en')
def date(self):
return f"{self.start:%Y-%m-%d}"
def frm(self):
return f"{self.start:%H:%M}"
def unt(self):
return f"{self.end:%H:%M}"
def url(self):
return f"https://program.mch2022.org/mch2022/talk/{self.code}/"
def asjson(self):
return json.dumps(dict(title=self.title, code=self.code, abstract=self.abstract, speakers=self.speakers, track=self.track, start=self.start.isoformat(), end=self.end.isoformat(), room=self.room))
def ordering(self): return (self.start, self.room)
def is_day_break(prev, cur):
""" return True when prev event is more than 1 hr before next one """
dtmin = timedelta(hours=1)
return prev.end+dtmin < cur.start
class Resolver:
"""
resolves track, room and speaker id's to their names.
"""
def __init__(self, tracks, rooms, speakers):
self.tracks = { _.id:_ for _ in tracks }
if len(tracks) != len(self.tracks):
print("tracks: %d -> %d" % (len(tracks), len(self.tracks)))
self.rooms = { _.id:_ for _ in rooms }
if len(rooms) != len(self.rooms):
print("rooms: %d -> %d" % (len(rooms), len(self.rooms)))
self.speakers = { _.code for _ in speakers }
if len(speakers) != len(self.speakers):
print("speakers: %d -> %d" % (len(speakers), len(self.speakers)))
def room(self, id):
return self.rooms.get(id)
def speaker(self, code):
return self.speakers.get(code)
def track(self, id):
return self.tracks.get(id)
def astext(e, r):
""" output event as text table """
return f"{e.frm()}-{e.unt()} {r.room(e.room).name:<20} {e.title}"
def ashtml(e, r):
"""
Output event as html table.
This should be wrapped in htmlheader and htmlfooter
"""
items = [ e.frm(), e.unt(), r.room(e.room).name, r.track(e.track).name ]
if e.code:
items.append( (e.title, e.url()) )
else:
items.append( e.title )
def encode(x):
if type(x)==tuple:
name, url = x
return f'<a href="{url}">{html.escape(name)}</a>'
return html.escape(x)
return "<tr>" + "".join("<td>%s</td>" % encode(_) for _ in items) + "</tr>"
htmlheader = """
<html>
<head>
<title>MCH2022 programma</title>
<meta charset=utf-8 />
</head>
<body>
<div id=mode></div>
<table border=1 cellspacing=0 cellpadding=0 id=tab>
"""
htmlfooter = """
</table>
<script>
function decoderange(date, t0, t1) {
var t0 = Date.parse(date+" "+t0)
var t1 = Date.parse(date+" "+t1)
if (t0 > t1) {
t1 += 24*3600*1000;
}
return [t0, t1];
}
var tstart = Date.parse("2022-07-22 08:00");
var tend = Date.parse("2022-07-26 17:00");
var now = Date.now();
var timetext;
var demomode = false;
// demomode when mch is still in the future
if (now < tstart) {
demomode = true;
now = tstart;
var div = document.getElementById('mode');
div.append(document.createTextNode("Demo mode - tijd loopt door. Tijdens het festival gaat dit vanzelf uit"));
div.append(document.createElement("br"));
timetext = document.createTextNode("");
div.append(timetext);
div.append(document.createElement("br"));
}
function update() {
var date;
var table = document.getElementById('tab');
for (var tr of table.rows) {
if (tr.cells.length <= 1) {
if (tr.cells[0].textContent.length > 4) {
date = tr.cells[0].textContent;
}
continue;
}
/*
0 - from
1 - until
2 - room
3 - track
4 - choice
5 - title
*/
var [t0, t1] = decoderange(date, tr.cells[0].textContent, tr.cells[1].textContent);
// determine row color based on how far in the future the event is.
var color = "";
if (t0 <= now && now < t1) {
// playing now
color = "cyan";
}
else if (t0 <= now+30*60000 && now+30*60000 < t1) {
// playing in 30 minutes
color = "lightgreen";
}
else if (t0 <= now+60*60000 && now+60*60000 < t1) {
// playing in 60 minutes
color = "lightblue";
}
// apply color
for (var i=0 ; i<4 ; i++) {
tr.cells[i].style.background = color;
}
}
if (demomode) {
now += 15*60*1000;
timetext.nodeValue = new Date(now).toISOString().substr(0, 19);
if (now>tend) { now = tstart; }
setTimeout(update, 100);
}
else {
setTimeout(update, 60000);
}
}
// creates checkbox, and sets title-td background accordingly.
function make_choice(talk, talktd, ch)
{
var check = document.createElement('input');
check.type = "checkbox";
if (ch) {
// row is selected
check.click();
talktd.style.background = "lightpink";
talktd.style.height = 50;
}
check.onclick = (e) => {
// toggle selection
localStorage.setItem("choice-"+talk, e.target.checked);
talktd.style.background = e.target.checked ? "lightpink" : "";
talktd.style.height = e.target.checked ? 50 : 10;
};
var td = document.createElement('td');
td.appendChild(check);
return td;
}
function add_choices()
{
// apply choices stored in localstorage.
var table = document.getElementById('tab');
var re = new RegExp("/talks/([^/]+)/");
for (var tr of table.rows) {
if (tr.cells.length <= 1) continue;
var talkhref = tr.cells[4].firstChild.href;
var talk = re.exec(talkhref);
if (talk) { talk = talk[1]; }
var choice;
if (talk) {
choice = localStorage.getItem("choice-"+talk);
if (choice)
choice = JSON.parse(choice);
}
tr.insertBefore(make_choice(talk, tr.cells[4], choice), tr.cells[4]);
}
}
add_choices();
update();
</script>
</body>
</html>
"""
def listasjs(lst, key, name):
""" output a js dict """
print("var %s = {" % name)
for t in lst:
if key=='id':
print(" %d: %s," % (getattr(t, key), t.asjson()))
else:
print(" \"%s\": %s," % (getattr(t, key), t.asjson()))
print("};")
def listasjslist(lst, key, name):
""" output a js array """
print("var %s = [" % name)
for t in lst:
print(" %s," % (t.asjson()))
print("];")
def outputjs(tracks, rooms, speakers, talks, versions):
"""
Output javascript based version.
"""
print(jsheader)
listasjs(tracks, "id", "tracks")
listasjs(rooms, "id", "rooms")
listasjs(speakers, "code", "speakers")
listasjslist(talks, "code", "talks")
print(f"""var programversion = "{versions.program}";""")
print(f"""var wikiversion = "{versions.wiki}";""")
print(jsfooter)
jsheader = """
<html>
<head>
<title>MCH2022 programma</title>
<meta charset=utf-8 />
<script>
function txt(t) { return document.createTextNode(t); }
function el()
{
// create an html element by tagname, optionally adding children and attributes.
// arguments of:
// - instance 'Element' are added as children,
// - type string, are passed to replaceChildren, and converted to text
// - objects and Map are treated as attribute lists.
if (arguments.length==0)
throw "el needs at least one argument";
var tag = arguments[0];
var e = document.createElement(tag);
var args = [];
var attr;
for (var i=1 ; i<arguments.length ; i++)
{
var arg = arguments[i];
if (arg instanceof Element) // children
args.push(arg);
else if (arg instanceof Map) // attributes for the tag
arg.forEach( (v,k)=>e.setAttribute(k, v) );
else if (typeof(arg) == "string") // text child element
args.push(arg);
else if (typeof(arg) == "object") // attributes for the tag.
Object.entries(arg).forEach(([k,v])=>e.setAttribute(k, v))
else
throw "unsupported el argument";
}
if (args)
e.replaceChildren(...args);
return e;
}
function fmtdate(t)
{
t = new Date(t);
// format a time as yyyy-mm-dd
var y = t.getFullYear();
var m = (t.getMonth()+1).toLocaleString('en-US', {minimumIntegerDigits: 2, useGrouping:false});
var d = t.getDate().toLocaleString('en-US', {minimumIntegerDigits: 2, useGrouping:false});
return y+"-"+m+"-"+d;
}
function fmttime(t)
{
t = new Date(t);
var h = t.getHours().toLocaleString('en-US', {minimumIntegerDigits: 2, useGrouping:false});
var m = t.getMinutes().toLocaleString('en-US', {minimumIntegerDigits: 2, useGrouping:false});
return h+":"+m;
}
"""
jsfooter = """
function maketable()
{
var tab = el('table');
tab.cellSpacing = 0;
tab.cellPadding = 5;
tab.border = true;
createheader(tab.createTHead());
var tprev;
var tbody = el('tbody');
for (var [code, t] of Object.entries(talks)) {
if (!tprev || is_next_day(tprev, t)) {
tbody.append(el('tr', {'colspan':6}));
tbody.append(el('tr', {'colspan':6}, el('td', fmtdate(t.start))));
tbody.append(el('tr', {'colspan':6}));
}
tbody.append(makerow(t));
tprev = t;
}
tab.replaceChildren(tab.tHead, tbody);
return tab;
}
function is_next_day(a, b)
{
a = new Date(a.end);
b = new Date(b.start);
return (b - a > 3600000)
}
function createheader(e)
{
// todo
}
function maketalklink(t)
{
if (t.code) {
if (t.code.length > 10) {
// session
return el('a', {href:"https://wiki.mch2022.org/"+t.title}, t.title);
}
else {
// talk
return el('a', {href:"https://program.mch2022.org/mch2022/talk/"+t.code+"/"}, t.title);
}
}
else {
return t.title;
}
}
function makesplink(s)
{
return el('a', {href:"https://program.mch2022.org/mch2022/speaker/"+s+"/"}, speakers[s].name);
}
function makerow(t)
{
var tr = el('tr');
tr.append(el('td', fmttime(t.start)));
tr.append(el('td', fmttime(t.end)));
tr.append(el('td', rooms[t.room].name));
tr.append(el('td', tracks[t.track].name));
var sp = el('td');
for (var s of t.speakers) {
if (sp.children)
sp.appendChild(el('br'));
sp.appendChild(makesplink(s));
}
tr.append(sp);
var choice = localStorage.getItem("choice-"+t.code);
if (choice)
choice = JSON.parse(choice);
tr.append(make_choice(t, choice));
tr.append(el('td', maketalklink(t)));
return tr;
}
function decoderange(date, t0, t1) {
var t0 = Date.parse(date+" "+t0)
var t1 = Date.parse(date+" "+t1)
if (t0 > t1) {
t1 += 24*3600*1000;
}
return [t0, t1];
}
var tstart;
var tend;
var demomode = false;
var timetext;
function update() {
var now = Date.now();
var date; // keep track of the current date
var tables = document.getElementsByTagName('table');
var scrollto; // which row to scroll to
for (var tr of tables[0].rows) {
if (tr.cells.length <= 1) {
if (tr.cells.length && tr.cells[0].textContent.length > 4) {
date = tr.cells[0].textContent;
}
continue;
}
/*
//
0 - from
1 - until
2 - room
3 - track
4 - speakers
5 - choice
6 - title
*/
var [t0, t1] = decoderange(date, tr.cells[0].textContent, tr.cells[1].textContent);
// determine row color based on how far in the future the event is.
var color = "";
if (t0 <= now && now < t1) {
// playing now
color = "cyan";
}
else if (t0 <= now+30*60000 && now+30*60000 < t1) {
// playing in 30 minutes
color = "lightgreen";
}
else if (t0 <= now+60*60000 && now+60*60000 < t1) {
// playing in 60 minutes
color = "lightblue";
}
// apply color
for (var i=0 ; i<4 ; i++) {
tr.cells[i].style.background = color;
}
// remember where to scroll to
if (!scrollto && t0+60*60000 >= now)
scrollto = tr;
}
if (demomode) {
now += 15*60*1000;
timetext.nodeValue = new Date(now).toISOString().substr(0, 19);
if (now>tend) { now = tstart; }
setTimeout(update, 100);
}
if (scrollto)
scrollto.scrollIntoView();
}
// creates checkbox, and sets background accordingly.
function make_choice(talk, ch)
{
var td = el('td');
var check = el('input');
check.type = "checkbox";
if (ch) {
// row is selected
check.click();
td.style.background = "lightpink";
td.style.height = 50;
}
check.onclick = (e) => {
// toggle selection
localStorage.setItem("choice-"+talk.code, e.target.checked);
td.style.background = e.target.checked ? "lightpink" : "";
td.style.height = e.target.checked ? 50 : 10;
};
td.appendChild(check);
return td;
}
function inittimer()
{
tstart = Date.parse("2022-07-22 08:00");
tend = Date.parse("2022-07-26 17:00");
var now = Date.now();
demomode = false;
// demomode when mch is still in the future
if (now < tstart) {
demomode = true;
now = tstart;
var div = document.getElementById('mode');
div.append(txt("Demo mode - tijd loopt door. Tijdens het festival gaat dit vanzelf uit"));
div.append(el("br"));
timetext = txt("");
div.append(timetext);
div.append(el("br"));
}
}
function makeheader()
{
return el('span', "Program version: "+programversion, ", ",
"Wiki version: "+wikiversion, ". By: ", el('a', {href:"mailto:[email protected]"}, "[email protected]"), el('br'),
"Colors indicate which events are current, or in half an hour. Selections are remembered.", el('br'),
"Generated using ", el('a', {href:"https://gist.github.com/nlitsme/186dfa7e000579ed0d6521f43526232c"}, "this python script")
);
}
function start()
{
document.body.append(makeheader());
document.body.append(maketable());
inittimer();
update();
}
</script>
</head>
<body onload="start()">
</body>
</html>
"""
def getroomid(rooms, roomname):
""" get or allocate a room-id """
for v in rooms:
if v.name == roomname:
return v.id
k = max(_.id for _ in rooms) + 1
rooms.append(Room(dict(id=k, name=roomname)))
return k
def gencode(title, tstart):
""" generate a talk-code for a session """
from hashlib import md5
m = md5()
m.update(str(tstart).encode('utf-8'))
m.update(title.encode('utf-8'))
return m.hexdigest()
class DecodeProgramHtml(html.parser.HTMLParser):
"""
Extract current program version from the main page.
"""
# <pretalx-schedule event-url="/mch2022/" version="DayThreeNoonUpdate" locale="en" timezone="Europe/Amsterdam" style="--pretalx-clr-primary: #491D88" ></pretalx-schedule>
def __init__(self):
super().__init__()
self.pgm = None
def handle_starttag(self, tag, attrs):
if tag == 'pretalx-schedule':
self.pgm = dict(attrs)
def get_program_url():
""" get url for the current program """
url = "https://program.mch2022.org"
pgmhtml = loadhtml(f"{url}/")
p = DecodeProgramHtml()
p.feed(pgmhtml)
# has url-quoted string
version = p.pgm.get('version')
eventurl = p.pgm.get('event-url')
return f"{url}{eventurl}schedule/v/{version}/widget/v2.json", urllib.parse.unquote(version)
def main():
import argparse
parser = argparse.ArgumentParser(description='generate mch program')
parser.add_argument('--html', action='store_true', help='generate html version of the program')
parser.add_argument('--js', action='store_true', help='generate javascript version of the program')
parser.add_argument('--text', action='store_true', help='generate text version of the program ( the default )')
args = parser.parse_args()
# get schedule name from "https://program.mch2022.org/"
url, pgmversion = get_program_url()
try:
pgm = loadjson(url)
except Exception as e:
print(e)
with open("program-v2.json", "rb") as f:
pgm = json.load(f)
wikipath = get_latest_wiki_export()
class Versions:
pass
v = Versions()
v.program = pgmversion
v.wiki = wikipath[:-4]
def loadobjects(cls, items):
return [ cls(_) for _ in items ]
tracks = loadobjects(Track, pgm.get('tracks'))
tracks.append(Track({'id':0, 'name':'Break', 'color':'#000000'}))
tracks.append(Track({'id':999, 'name':'Session', 'color':'#000000'}))
rooms = loadobjects(Room, pgm.get('rooms'))
speakers = loadobjects(Speaker, pgm.get('speakers'))
talks = loadobjects(Talk, pgm.get('talks'))
for s in getsessions():
lname = get(s, 'Has session location', 0) or get(s, 'Location', 0) or ""
lid = getroomid(rooms, lname)
tstart = fromiso(get(s, 'Has start time', 0))
try:
nminutes = int(get(s, 'Has duration', 0))
except:
nminutes = 0
tend = tstart
if tstart and nminutes:
tend = tstart+timedelta(minutes=nminutes)
title = get(s, 'title')
description = get(s, 'Has description', 0)
if tstart:
talks.append(Talk(dict(track=999, code=gencode(title, tstart), title=title, abstract=description, room=lid, start=tstart, end=tend)))
else:
print("???no starttime: ", title)
talks = sorted(talks, key=lambda e:e.ordering())
if args.js:
outputjs(tracks, rooms, speakers, talks, v)
return
resolver = Resolver(tracks, rooms, speakers)
if args.html:
print(htmlheader)
eprev = None
for e in talks:
if not eprev or is_day_break(eprev, e):
if args.html:
print("<tr><td colspan=6>&nbsp;</td></tr>")
print("<tr><td colspan=6>%s</td></tr>" % e.date())
print("<tr><td colspan=6>&nbsp;</td></tr>")
else:
print("== %s" % e.date())
if args.html:
print(ashtml(e, resolver))
else:
print(astext(e, resolver))
eprev = e
if args.html:
print(htmlfooter)
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment