Skip to content

Instantly share code, notes, and snippets.

@kantale
Last active January 16, 2017 10:34
Show Gist options
  • Select an option

  • Save kantale/01e5b42289c37786652ceadce57c07dd to your computer and use it in GitHub Desktop.

Select an option

Save kantale/01e5b42289c37786652ceadce57c07dd to your computer and use it in GitHub Desktop.
Number of new workflows submitted per year in the myExperiment platform
# Fetch data with:
# wget -O myExperiment.html "http://www.myexperiment.org/workflows?num=10000"
# Resulted plot: http://imgur.com/a/7Zzlu
import re
import datetime
import matplotlib.pyplot as plt
import seaborn
from bs4 import BeautifulSoup
fn = "myExperiment.html"
with open(fn) as f:
content = f.read()
soup = BeautifulSoup(content, "lxml")
data =[]
entries = soup.find_all(class_="main_panel")
for e in entries:
created = None
updated = None
found = re.search(r"Created: ([\d]+)\-([\d]+)\-([\d]+)", e.find_all('p')[1].text.split('\n')[1])
if found:
created_year = int(found.group(1))
created_month = int(found.group(2))
created_day = int(found.group(3))
#print created_year, created_month, created_day
created = datetime.datetime(created_year, created_month, created_day, 0, 0, 0)
found = re.search(r"Last updated: ([\d]+)\-([\d]+)\-([\d]+)", e.find_all('p')[1].text.split('\n')[2])
if found:
updated_year = int(found.group(1))
updated_month = int(found.group(2))
updated_day = int(found.group(3))
#print updated_year, updated_month, updated_day
updated = datetime.datetime(updated_year, updated_month, updated_day, 0, 0, 0)
if created and updated:
data.append((created, updated))
elif created:
data.append((created, None))
else:
print e
a=1/0
epoch = datetime.datetime.utcfromtimestamp(0)
fig, ax = plt.subplots()
if True:
to_plot = {}
for d in data:
year = d[0].year
to_plot[year] = to_plot.get(year, 0) + 1
data_2 = sorted(list(to_plot.iteritems()))
print data_2
ax.plot([x[0] for x in data_2], [x[1] for x in data_2], color="black")
ticks = ax.get_xticks().tolist()
new_ticks = [str(int(x)) for x in ticks]
ax.set_xticklabels(new_ticks)
ax.set_ylabel('Number of new Workflows')
plt.show()
@kantale
Copy link
Copy Markdown
Author

kantale commented Nov 20, 2016

Result

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment