Last active
January 16, 2017 10:34
-
-
Save kantale/01e5b42289c37786652ceadce57c07dd to your computer and use it in GitHub Desktop.
Number of new workflows submitted per year in the myExperiment platform
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Fetch data with: | |
| # wget -O myExperiment.html "http://www.myexperiment.org/workflows?num=10000" | |
| # Resulted plot: http://imgur.com/a/7Zzlu | |
| import re | |
| import datetime | |
| import matplotlib.pyplot as plt | |
| import seaborn | |
| from bs4 import BeautifulSoup | |
| fn = "myExperiment.html" | |
| with open(fn) as f: | |
| content = f.read() | |
| soup = BeautifulSoup(content, "lxml") | |
| data =[] | |
| entries = soup.find_all(class_="main_panel") | |
| for e in entries: | |
| created = None | |
| updated = None | |
| found = re.search(r"Created: ([\d]+)\-([\d]+)\-([\d]+)", e.find_all('p')[1].text.split('\n')[1]) | |
| if found: | |
| created_year = int(found.group(1)) | |
| created_month = int(found.group(2)) | |
| created_day = int(found.group(3)) | |
| #print created_year, created_month, created_day | |
| created = datetime.datetime(created_year, created_month, created_day, 0, 0, 0) | |
| found = re.search(r"Last updated: ([\d]+)\-([\d]+)\-([\d]+)", e.find_all('p')[1].text.split('\n')[2]) | |
| if found: | |
| updated_year = int(found.group(1)) | |
| updated_month = int(found.group(2)) | |
| updated_day = int(found.group(3)) | |
| #print updated_year, updated_month, updated_day | |
| updated = datetime.datetime(updated_year, updated_month, updated_day, 0, 0, 0) | |
| if created and updated: | |
| data.append((created, updated)) | |
| elif created: | |
| data.append((created, None)) | |
| else: | |
| print e | |
| a=1/0 | |
| epoch = datetime.datetime.utcfromtimestamp(0) | |
| fig, ax = plt.subplots() | |
| if True: | |
| to_plot = {} | |
| for d in data: | |
| year = d[0].year | |
| to_plot[year] = to_plot.get(year, 0) + 1 | |
| data_2 = sorted(list(to_plot.iteritems())) | |
| print data_2 | |
| ax.plot([x[0] for x in data_2], [x[1] for x in data_2], color="black") | |
| ticks = ax.get_xticks().tolist() | |
| new_ticks = [str(int(x)) for x in ticks] | |
| ax.set_xticklabels(new_ticks) | |
| ax.set_ylabel('Number of new Workflows') | |
| plt.show() | |
Author
kantale
commented
Nov 20, 2016
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
