thedod · December 10, 2015 16:58
diff --git a/README.md b/README.md
diff --git a/cablesearch2mrns.py b/cablesearch2mrns.py
 #!/usr/bin/env python
 import sys, urllib2, simplejson as json
 CABLESEARCH_API_URL_TEMPLATE = 'http://cablesearch.org/cable/api/search?q={0}&o={1}'
 class cablesearchError(Exception): pass

 if len(sys.argv)!=2: 
    print("Usage: %s 'some cablesearch query'\n" % sys.argv[0])
    sys.exit()

 query = urllib2.quote(sys.argv[1])
 offset = 0
 done = False

 while not done:
    data = 'connecting...'
    url = CABLESEARCH_API_URL_TEMPLATE.format(query,offset)
    sys.stderr.write('{0}\n'.format(url))
    data = json.load(urllib2.urlopen(url))
    if data.get('info',{}).get('response','meh')!='OK':
        done = True
        raise cablesearchError(json.dumps(data.get('info','malformed API reply'),indent=4))
    for item in data.get('items',None) or []:
        print(item.get('id','meh'))
    if data['info'].get('more','meh')=='true':
        offset=data['info']['nextoffset']
    else:
        done=True
diff --git a/count-mrn-graphs.py b/count-mrn-graphs.py
 #!/usr/bin/env python
 import sys,simplejson as json

 if len(sys.argv) != 2:
    sys.stderr.write("Usage: {0} /path/to/mrn2graph.json < /path/to/list-of-mrns\n".format(sys.argv[0]))
    sys.exit(1)

 m2g = json.load(file(sys.argv[1]))

 histogram = {}

 for mrn in sys.stdin.xreadlines():
    mrn=mrn.strip()
    for g in m2g.get(mrn,[]): 
       histogram[g] = histogram.get(g,0)+1

 for graph,count in sorted([(g,histogram[g]) for g in histogram],lambda x,y: -cmp(x[1],y[1])):
    print("""<a href="html/{0}.html">{0} ({1})</a><br>""".format(graph,count))
diff --git a/example-result.html b/example-result.html
 <a href="html/db4e669e8796e247aa2d9f17a736702b.graphml.html">db4e669e8796e247aa2d9f17a736702b.graphml (21)</a><br>
 <a href="html/778519516837147060135d87121dd3ca.graphml.html">778519516837147060135d87121dd3ca.graphml (4)</a><br>
 <a href="html/06c575f8c3e5c4f7e5b1fc1fe42a23c4.graphml.html">06c575f8c3e5c4f7e5b1fc1fe42a23c4.graphml (4)</a><br>
 <a href="html/e22d1992150e02231b9163c80d10b09d.graphml.html">e22d1992150e02231b9163c80d10b09d.graphml (2)</a><br>
 <a href="html/2682f04a0e249929ec10535164aa39cc.graphml.html">2682f04a0e249929ec10535164aa39cc.graphml (2)</a><br>
 <a href="html/6892fdd5fb0fb345c0c1629a0314f405.graphml.html">6892fdd5fb0fb345c0c1629a0314f405.graphml (2)</a><br>
 <a href="html/c6607264fc5280485b0e23337b455019.graphml.html">c6607264fc5280485b0e23337b455019.graphml (1)</a><br>
 <a href="html/fc3858f05a5b54e0009c22e74ddaf927.graphml.html">fc3858f05a5b54e0009c22e74ddaf927.graphml (1)</a><br>
 <a href="html/ce35ffd2f2188825f62060ea015618fc.graphml.html">ce35ffd2f2188825f62060ea015618fc.graphml (1)</a><br>
 <a href="html/fc9044e0be38cf184f7e89249bd06238.graphml.html">fc9044e0be38cf184f7e89249bd06238.graphml (1)</a><br>
 <a href="html/4207fdfe4dedc5c2d52b58e3f3ae6fe7.graphml.html">4207fdfe4dedc5c2d52b58e3f3ae6fe7.graphml (1)</a><br>
diff --git a/make-mrn2graphs.py b/make-mrn2graphs.py
 #!/usr/bin/env python
 import sys,simplejson as json
 d={}
 for l in sys.stdin.xreadlines():
    l=l.strip()
    graph,mrn = l.split('|')
    s=d.get(mrn,set())
    s.add(graph)
    d[mrn]=s
 for k in d:
    d[k]=list(d[k])
 json.dump(d,sys.stdout,indent=1)
diff --git a/preprocess.sh b/preprocess.sh
 #!/bin/sh
 if [ ! -n "$1" ]; then
    echo "Usage: $0 path/to/output/folder"
    exit 1
 fi
 if whiptail --yesno "Generate full graph?\n(if you have it in some other folder,\nsymlink it instead)" --defaultno 9 60 ; then
    ./c2g "$1/full.graphml"
 fi
 if whiptail --yesno "split to subgraphs?" --defaultno 7 40 ; then
    rm -rf "$1/graphs"
    mkdir -p "$1/graphs"
    ./splitgraph --source "$1/full.graphml" -d "$1/graphs" --multilevel --clusters
    ls "$1/graphs/*graphml" > "$1/list-of-graphs"
 fi
 if whiptail --yesno "Generate html files?" --defaultno 7 40 ; then
    rm -rf "$1/html"
    mkdir -p "$1/html"
    cp *css *js "$1/html"
    ./g2svg -t graph-timeline.tmpl -i "$1/list-of-graphs" -d "$1/html"
    ./g2idx -i "$1/list-of-graphs" -d "$1/html"
 fi
 # No need for a prompt for this. It's pretty fast.
 echo Generating "$1/mrn2graphs.json"
 cd "$1/graphs"
 grep 'v_label">' *.graphml | sed -e 's/:.*">/|/' -e 's/<.*$//' | python ../../make-mrn2graphs.py > ../mrn2graphs.json
diff --git a/search-subgraphs.sh b/search-subgraphs.sh
 #!/bin/sh
 if [ $# -ne 2 ]; then
    echo "Usage: $0 /path/to/output/folder \"some search term\""
    exit 1
 fi
 echo searching for MRNs matching \""$2"\" ...
 python cablesearch2mrns.py "$2" > $1/search-$$.list
 echo Wordcount of MRN list at $1/search-$$.list:
 wc $1/search-$$.list
 python count-mrn-graphs.py $1/mrn2graphs.json < $1/search-$$.list > $1/search-$$.html
 echo result is at $1/search-$$.html
	#!/usr/bin/env python
	import sys, urllib2, simplejson as json
	CABLESEARCH_API_URL_TEMPLATE = 'http://cablesearch.org/cable/api/search?q={0}&o={1}'
	class cablesearchError(Exception): pass

	if len(sys.argv)!=2:
	print("Usage: %s 'some cablesearch query'\n" % sys.argv[0])
	sys.exit()

	query = urllib2.quote(sys.argv[1])
	offset = 0
	done = False

	while not done:
	data = 'connecting...'
	url = CABLESEARCH_API_URL_TEMPLATE.format(query,offset)
	sys.stderr.write('{0}\n'.format(url))
	data = json.load(urllib2.urlopen(url))
	if data.get('info',{}).get('response','meh')!='OK':
	done = True
	raise cablesearchError(json.dumps(data.get('info','malformed API reply'),indent=4))
	for item in data.get('items',None) or []:
	print(item.get('id','meh'))
	if data['info'].get('more','meh')=='true':
	offset=data['info']['nextoffset']
	else:
	done=True
	#!/usr/bin/env python
	import sys,simplejson as json

	if len(sys.argv) != 2:
	sys.stderr.write("Usage: {0} /path/to/mrn2graph.json < /path/to/list-of-mrns\n".format(sys.argv[0]))
	sys.exit(1)

	m2g = json.load(file(sys.argv[1]))

	histogram = {}

	for mrn in sys.stdin.xreadlines():
	mrn=mrn.strip()
	for g in m2g.get(mrn,[]):
	histogram[g] = histogram.get(g,0)+1

	for graph,count in sorted([(g,histogram[g]) for g in histogram],lambda x,y: -cmp(x[1],y[1])):
	print("""<a href="html/{0}.html">{0} ({1})</a><br>""".format(graph,count))
	<a href="html/db4e669e8796e247aa2d9f17a736702b.graphml.html">db4e669e8796e247aa2d9f17a736702b.graphml (21)</a><br>
	<a href="html/778519516837147060135d87121dd3ca.graphml.html">778519516837147060135d87121dd3ca.graphml (4)</a><br>
	<a href="html/06c575f8c3e5c4f7e5b1fc1fe42a23c4.graphml.html">06c575f8c3e5c4f7e5b1fc1fe42a23c4.graphml (4)</a><br>
	<a href="html/e22d1992150e02231b9163c80d10b09d.graphml.html">e22d1992150e02231b9163c80d10b09d.graphml (2)</a><br>
	<a href="html/2682f04a0e249929ec10535164aa39cc.graphml.html">2682f04a0e249929ec10535164aa39cc.graphml (2)</a><br>
	<a href="html/6892fdd5fb0fb345c0c1629a0314f405.graphml.html">6892fdd5fb0fb345c0c1629a0314f405.graphml (2)</a><br>
	<a href="html/c6607264fc5280485b0e23337b455019.graphml.html">c6607264fc5280485b0e23337b455019.graphml (1)</a><br>
	<a href="html/fc3858f05a5b54e0009c22e74ddaf927.graphml.html">fc3858f05a5b54e0009c22e74ddaf927.graphml (1)</a><br>
	<a href="html/ce35ffd2f2188825f62060ea015618fc.graphml.html">ce35ffd2f2188825f62060ea015618fc.graphml (1)</a><br>
	<a href="html/fc9044e0be38cf184f7e89249bd06238.graphml.html">fc9044e0be38cf184f7e89249bd06238.graphml (1)</a><br>
	<a href="html/4207fdfe4dedc5c2d52b58e3f3ae6fe7.graphml.html">4207fdfe4dedc5c2d52b58e3f3ae6fe7.graphml (1)</a><br>
	#!/usr/bin/env python
	import sys,simplejson as json
	d={}
	for l in sys.stdin.xreadlines():
	l=l.strip()
	graph,mrn = l.split('\|')
	s=d.get(mrn,set())
	s.add(graph)
	d[mrn]=s
	for k in d:
	d[k]=list(d[k])
	json.dump(d,sys.stdout,indent=1)
	#!/bin/sh
	if [ ! -n "$1" ]; then
	echo "Usage: $0 path/to/output/folder"
	exit 1
	fi
	if whiptail --yesno "Generate full graph?\n(if you have it in some other folder,\nsymlink it instead)" --defaultno 9 60 ; then
	./c2g "$1/full.graphml"
	fi
	if whiptail --yesno "split to subgraphs?" --defaultno 7 40 ; then
	rm -rf "$1/graphs"
	mkdir -p "$1/graphs"
	./splitgraph --source "$1/full.graphml" -d "$1/graphs" --multilevel --clusters
	ls "$1/graphs/*graphml" > "$1/list-of-graphs"
	fi
	if whiptail --yesno "Generate html files?" --defaultno 7 40 ; then
	rm -rf "$1/html"
	mkdir -p "$1/html"
	cp css js "$1/html"
	./g2svg -t graph-timeline.tmpl -i "$1/list-of-graphs" -d "$1/html"
	./g2idx -i "$1/list-of-graphs" -d "$1/html"
	fi
	# No need for a prompt for this. It's pretty fast.
	echo Generating "$1/mrn2graphs.json"
	cd "$1/graphs"
	grep 'v_label">' .graphml \| sed -e 's/:.">/\|/' -e 's/<.*$//' \| python ../../make-mrn2graphs.py > ../mrn2graphs.json
	#!/bin/sh
	if [ $# -ne 2 ]; then
	echo "Usage: $0 /path/to/output/folder \"some search term\""
	exit 1
	fi
	echo searching for MRNs matching \""$2"\" ...
	python cablesearch2mrns.py "$2" > $1/search-$$.list
	echo Wordcount of MRN list at $1/search-$$.list:
	wc $1/search-$$.list
	python count-mrn-graphs.py $1/mrn2graphs.json < $1/search-$$.list > $1/search-$$.html
	echo result is at $1/search-$$.html