floe · October 17, 2018 08:06
diff --git a/sigchi_conf_dump.py b/sigchi_conf_dump.py
 #!/usr/bin/python3

 # written at 7am before coffee. don't @ me.

 import os
 import re
 import tarfile
 import sqlite3
 import subprocess
 import urllib.request

 dbfile = "apps/org.sigchi/db/conference_db"
 cachepath = "cache.html"

 # conference proceedings (proc. and adjunct)
 # example ids here are for UIST 2018
 #    "https://dl.acm.org/citation.cfm?id=3266037&preflayout=flat",
 #    "https://dl.acm.org/citation.cfm?id=3242587&preflayout=flat"
 confdata = [
    "http://uist.acm.org/uist2018/pages/toc.html",
    "http://uist.acm.org/uist2018/pages/toca.html"
 ]

 # create the backup file
 subprocess.run("adb backup -f org.sigchi.ab org.sigchi",shell=True,check=True)

 # convert to gzip by changing header
 with open("org.sigchi.ab","rb") as abfile:
    data = abfile.read()
    #print("Original header:" + str(data[0:24]))

 header = bytes([ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00 ])
 with open("org.sigchi.tgz","wb") as tgzfile:
    tgzfile.write(header+data[24:])

 # untar https://docs.python.org/3.4/library/tarfile.html?highlight=tar
 tar = tarfile.open("org.sigchi.tgz")
 tar.extract(dbfile)
 tar.close()

 # get reading list from dbfile
 # https://sebastianraschka.com/Articles/2014_sqlite_in_python_tutorial.html
 papers = []
 db = sqlite3.connect(dbfile)
 cur = db.cursor()
 cur.execute("SELECT * FROM MY_READING_MODEL")
 rows = cur.fetchall()
 for row in rows:
    pid = '"'+row[1]+"-"+row[2]+'"'
    cur.execute("SELECT TITLE,TYPE,EXTERNAL_ID,SIMPLE_AUTHOR_LIST FROM PAPER_MODEL WHERE ID="+pid)
    result = cur.fetchall()[0]
    papers.append(result)

 # get the proceedings index from urls or cache (if available)
 html = ""
 if os.path.isfile(cachepath):
    with open(cachepath,"r") as cachefile:
        html = cachefile.read()
 else:
    for url in confdata:
        with urllib.request.urlopen(url) as response:
            html += response.read().decode("utf-8")
    with open(cachepath,"w") as cachefile:
        cachefile.write(html)

 # match titles to DOIs/URLs via proceedings index
 for paper in papers:
    print(paper[0]+" -> ",end="")
    regexp = 'href="(.*?)".*?'+paper[0]
    paper_url = re.search(regexp,html)
    print(paper_url.group(1) if paper_url else "")
	#!/usr/bin/python3

	# written at 7am before coffee. don't @ me.

	import os
	import re
	import tarfile
	import sqlite3
	import subprocess
	import urllib.request

	dbfile = "apps/org.sigchi/db/conference_db"
	cachepath = "cache.html"

	# conference proceedings (proc. and adjunct)
	# example ids here are for UIST 2018
	# "https://dl.acm.org/citation.cfm?id=3266037&preflayout=flat",
	# "https://dl.acm.org/citation.cfm?id=3242587&preflayout=flat"
	confdata = [
	"http://uist.acm.org/uist2018/pages/toc.html",
	"http://uist.acm.org/uist2018/pages/toca.html"
	]

	# create the backup file
	subprocess.run("adb backup -f org.sigchi.ab org.sigchi",shell=True,check=True)

	# convert to gzip by changing header
	with open("org.sigchi.ab","rb") as abfile:
	data = abfile.read()
	#print("Original header:" + str(data[0:24]))

	header = bytes([ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00 ])
	with open("org.sigchi.tgz","wb") as tgzfile:
	tgzfile.write(header+data[24:])

	# untar https://docs.python.org/3.4/library/tarfile.html?highlight=tar
	tar = tarfile.open("org.sigchi.tgz")
	tar.extract(dbfile)
	tar.close()

	# get reading list from dbfile
	# https://sebastianraschka.com/Articles/2014_sqlite_in_python_tutorial.html
	papers = []
	db = sqlite3.connect(dbfile)
	cur = db.cursor()
	cur.execute("SELECT * FROM MY_READING_MODEL")
	rows = cur.fetchall()
	for row in rows:
	pid = '"'+row[1]+"-"+row[2]+'"'
	cur.execute("SELECT TITLE,TYPE,EXTERNAL_ID,SIMPLE_AUTHOR_LIST FROM PAPER_MODEL WHERE ID="+pid)
	result = cur.fetchall()[0]
	papers.append(result)

	# get the proceedings index from urls or cache (if available)
	html = ""
	if os.path.isfile(cachepath):
	with open(cachepath,"r") as cachefile:
	html = cachefile.read()
	else:
	for url in confdata:
	with urllib.request.urlopen(url) as response:
	html += response.read().decode("utf-8")
	with open(cachepath,"w") as cachefile:
	cachefile.write(html)

	# match titles to DOIs/URLs via proceedings index
	for paper in papers:
	print(paper[0]+" -> ",end="")
	regexp = 'href="(.?)".?'+paper[0]
	paper_url = re.search(regexp,html)
	print(paper_url.group(1) if paper_url else "")
No results found