Created
April 6, 2017 16:02
-
-
Save olgabot/67124af03b923d7ee7bb6483fc43f798 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"import io\n", | |
"\n", | |
"import pandas as pd\n", | |
"import matplotlib.pyplot as plt\n", | |
"import altair" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"Searched MEDLINE for the query `\"single-cell\" transcriptome` [here](http://dan.corlan.net/medline-trend.html) and got the data below. An explanation of the columns is below:\n", | |
"\n", | |
"> The first column will be the number of papers in each year, the second column--the year, and the third column the number of papers per 100,000 in the medline database in that year.\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'/Users/olga/anaconda3/envs/jupyter-py3/lib/python3.5/io.py'" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"io.__file__" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1013" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import sys\n", | |
"sys.api_version" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Number</th>\n", | |
" <th>Year</th>\n", | |
" <th>%000</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>165</td>\n", | |
" <td>2016</td>\n", | |
" <td>13.149</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>148</td>\n", | |
" <td>2015</td>\n", | |
" <td>11.854</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>86</td>\n", | |
" <td>2014</td>\n", | |
" <td>7.171</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>64</td>\n", | |
" <td>2013</td>\n", | |
" <td>5.590</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>42</td>\n", | |
" <td>2012</td>\n", | |
" <td>3.877</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Number Year %000\n", | |
"0 165 2016 13.149\n", | |
"1 148 2015 11.854\n", | |
"2 86 2014 7.171\n", | |
"3 64 2013 5.590\n", | |
"4 42 2012 3.877" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"s = ''' Number Year %000\n", | |
" 165 2016 13.149\n", | |
" 148 2015 11.854\n", | |
" 86 2014 7.171\n", | |
" 64 2013 5.590\n", | |
" 42 2012 3.877\n", | |
" 29 2011 2.850\n", | |
" 13 2010 1.383\n", | |
" 11 2009 1.256\n", | |
" 10 2008 1.198\n", | |
" 2 2007 0.255\n", | |
" 6 2006 0.804\n", | |
" 1 2005 0.143\n", | |
" 0 2004 0.000\n", | |
" 2 2003 0.337\n", | |
" 0 2002 0.000\n", | |
" 1 2001 0.183\n", | |
" 0 2000 0.000\n", | |
" 0 1999 0.000\n", | |
" 0 1998 0.000\n", | |
" 0 1997 0.000\n", | |
" 0 1996 0.000\n", | |
" 0 1995 0.000\n", | |
" 0 1994 0.000\n", | |
" 0 1993 0.000\n", | |
" 0 1992 0.000\n", | |
" 0 1991 0.000\n", | |
" 0 1990 0.000\n", | |
" 0 1989 0.000\n", | |
" 0 1988 0.000\n", | |
" 0 1987 0.000\n", | |
" 0 1986 0.000\n", | |
" 0 1985 0.000\n", | |
" 0 1984 0.000\n", | |
" 0 1983 0.000\n", | |
" 0 1982 0.000\n", | |
" 0 1981 0.000\n", | |
" 0 1980 0.000\n", | |
" 0 1979 0.000\n", | |
" 0 1978 0.000\n", | |
" 0 1977 0.000\n", | |
" 0 1976 0.000\n", | |
" 0 1975 0.000\n", | |
" 0 1974 0.000\n", | |
" 0 1973 0.000\n", | |
" 0 1972 0.000\n", | |
" 0 1971 0.000\n", | |
" 0 1970 0.000\n", | |
" 0 1969 0.000\n", | |
" 0 1968 0.000\n", | |
" 0 1967 0.000\n", | |
" 0 1966 0.000\n", | |
" 0 1965 0.000\n", | |
" 0 1964 0.000\n", | |
" 0 1963 0.000\n", | |
" 0 1962 0.000\n", | |
" 0 1961 0.000\n", | |
" 0 1960 0.000\n", | |
" 0 1959 0.000\n", | |
" 0 1958 0.000\n", | |
" 0 1957 0.000\n", | |
" 0 1956 0.000\n", | |
" 0 1955 0.000\n", | |
" 0 1954 0.000\n", | |
" 0 1953 0.000\n", | |
" 0 1952 0.000\n", | |
" 0 1951 0.000\n", | |
" 0 1950 0.000'''\n", | |
"\n", | |
"\n", | |
"data = pd.read_table(io.StringIO(s), sep='\\s+')\n", | |
"data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"(17, 3)\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Number</th>\n", | |
" <th>Year</th>\n", | |
" <th>%000</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>12</th>\n", | |
" <td>0</td>\n", | |
" <td>2004</td>\n", | |
" <td>0.000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13</th>\n", | |
" <td>2</td>\n", | |
" <td>2003</td>\n", | |
" <td>0.337</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>14</th>\n", | |
" <td>0</td>\n", | |
" <td>2002</td>\n", | |
" <td>0.000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>15</th>\n", | |
" <td>1</td>\n", | |
" <td>2001</td>\n", | |
" <td>0.183</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>16</th>\n", | |
" <td>0</td>\n", | |
" <td>2000</td>\n", | |
" <td>0.000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Number Year %000\n", | |
"12 0 2004 0.000\n", | |
"13 2 2003 0.337\n", | |
"14 0 2002 0.000\n", | |
"15 1 2001 0.183\n", | |
"16 0 2000 0.000" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"subset = data.query('Year >= 2000')\n", | |
"print(subset.shape)\n", | |
"subset.tail()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div class=\"vega-embed\" id=\"a58d7332-1f12-445a-b532-9a37b4b13b9a\"></div>\n", | |
"\n", | |
"<style>\n", | |
".vega-embed svg, .vega-embed canvas {\n", | |
" border: 1px dotted gray;\n", | |
"}\n", | |
"\n", | |
".vega-embed .vega-actions a {\n", | |
" margin-right: 6px;\n", | |
"}\n", | |
"</style>\n" | |
] | |
}, | |
"metadata": { | |
"jupyter-vega": "#a58d7332-1f12-445a-b532-9a37b4b13b9a" | |
}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"application/javascript": [ | |
"var spec = {\"mark\": \"line\", \"encoding\": {\"y\": {\"field\": \"Number\", \"type\": \"quantitative\"}, \"x\": {\"field\": \"Year\", \"type\": \"quantitative\"}}, \"config\": {\"cell\": {\"width\": 500, \"height\": 350}}, \"data\": {\"values\": [{\"%000\": 13.149000000000001, \"Year\": 2016, \"Number\": 165}, {\"%000\": 11.854000000000001, \"Year\": 2015, \"Number\": 148}, {\"%000\": 7.171, \"Year\": 2014, \"Number\": 86}, {\"%000\": 5.59, \"Year\": 2013, \"Number\": 64}, {\"%000\": 3.877, \"Year\": 2012, \"Number\": 42}, {\"%000\": 2.85, \"Year\": 2011, \"Number\": 29}, {\"%000\": 1.383, \"Year\": 2010, \"Number\": 13}, {\"%000\": 1.256, \"Year\": 2009, \"Number\": 11}, {\"%000\": 1.198, \"Year\": 2008, \"Number\": 10}, {\"%000\": 0.255, \"Year\": 2007, \"Number\": 2}, {\"%000\": 0.804, \"Year\": 2006, \"Number\": 6}, {\"%000\": 0.14300000000000002, \"Year\": 2005, \"Number\": 1}, {\"%000\": 0.0, \"Year\": 2004, \"Number\": 0}, {\"%000\": 0.337, \"Year\": 2003, \"Number\": 2}, {\"%000\": 0.0, \"Year\": 2002, \"Number\": 0}, {\"%000\": 0.183, \"Year\": 2001, \"Number\": 1}, {\"%000\": 0.0, \"Year\": 2000, \"Number\": 0}]}};\n", | |
"var selector = \"#a58d7332-1f12-445a-b532-9a37b4b13b9a\";\n", | |
"var type = \"vega-lite\";\n", | |
"\n", | |
"var output_area = this;\n", | |
"require(['nbextensions/jupyter-vega/index'], function(vega) {\n", | |
" vega.render(selector, spec, type, output_area);\n", | |
"}, function (err) {\n", | |
" if (err.requireType !== 'scripterror') {\n", | |
" throw(err);\n", | |
" }\n", | |
"});\n" | |
] | |
}, | |
"metadata": { | |
"jupyter-vega": "#a58d7332-1f12-445a-b532-9a37b4b13b9a" | |
}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"image/png": "" | |
}, | |
"metadata": { | |
"jupyter-vega": "#a58d7332-1f12-445a-b532-9a37b4b13b9a" | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"altair.Chart(subset).mark_line().encode(\n", | |
" x='Year',\n", | |
" y='Number'\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/olga/anaconda3/envs/jupyter-py3/lib/python3.5/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: \n", | |
"A value is trying to be set on a copy of a slice from a DataFrame.\n", | |
"Try using .loc[row_indexer,col_indexer] = value instead\n", | |
"\n", | |
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", | |
" if __name__ == '__main__':\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Number</th>\n", | |
" <th>Year</th>\n", | |
" <th>%000</th>\n", | |
" <th>Year_date</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>165</td>\n", | |
" <td>2016</td>\n", | |
" <td>13.149</td>\n", | |
" <td>2016</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>148</td>\n", | |
" <td>2015</td>\n", | |
" <td>11.854</td>\n", | |
" <td>2015</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>86</td>\n", | |
" <td>2014</td>\n", | |
" <td>7.171</td>\n", | |
" <td>2014</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>64</td>\n", | |
" <td>2013</td>\n", | |
" <td>5.590</td>\n", | |
" <td>2013</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>42</td>\n", | |
" <td>2012</td>\n", | |
" <td>3.877</td>\n", | |
" <td>2012</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Number Year %000 Year_date\n", | |
"0 165 2016 13.149 2016\n", | |
"1 148 2015 11.854 2015\n", | |
"2 86 2014 7.171 2014\n", | |
"3 64 2013 5.590 2013\n", | |
"4 42 2012 3.877 2012" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"subset['Year_date'] = subset.Year.astype(pd.datetime)\n", | |
"subset.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"Number int64\n", | |
"Year int64\n", | |
"%000 float64\n", | |
"Year_date object\n", | |
"dtype: object" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"subset.dtypes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div class=\"vega-embed\" id=\"8b8bcfe8-342e-4218-90e3-86043f51c70f\"></div>\n", | |
"\n", | |
"<style>\n", | |
".vega-embed svg, .vega-embed canvas {\n", | |
" border: 1px dotted gray;\n", | |
"}\n", | |
"\n", | |
".vega-embed .vega-actions a {\n", | |
" margin-right: 6px;\n", | |
"}\n", | |
"</style>\n" | |
] | |
}, | |
"metadata": { | |
"jupyter-vega": "#8b8bcfe8-342e-4218-90e3-86043f51c70f" | |
}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"application/javascript": [ | |
"var spec = {\"mark\": \"line\", \"encoding\": {\"y\": {\"field\": \"Number\", \"type\": \"quantitative\"}, \"x\": {\"field\": \"Year\", \"type\": \"quantitative\"}}, \"config\": {\"cell\": {\"width\": 500, \"height\": 350}}, \"data\": {\"values\": [{\"%000\": 13.149000000000001, \"Year\": 2016, \"Year_date\": 2016, \"Number\": 165}, {\"%000\": 11.854000000000001, \"Year\": 2015, \"Year_date\": 2015, \"Number\": 148}, {\"%000\": 7.171, \"Year\": 2014, \"Year_date\": 2014, \"Number\": 86}, {\"%000\": 5.59, \"Year\": 2013, \"Year_date\": 2013, \"Number\": 64}, {\"%000\": 3.877, \"Year\": 2012, \"Year_date\": 2012, \"Number\": 42}, {\"%000\": 2.85, \"Year\": 2011, \"Year_date\": 2011, \"Number\": 29}, {\"%000\": 1.383, \"Year\": 2010, \"Year_date\": 2010, \"Number\": 13}, {\"%000\": 1.256, \"Year\": 2009, \"Year_date\": 2009, \"Number\": 11}, {\"%000\": 1.198, \"Year\": 2008, \"Year_date\": 2008, \"Number\": 10}, {\"%000\": 0.255, \"Year\": 2007, \"Year_date\": 2007, \"Number\": 2}, {\"%000\": 0.804, \"Year\": 2006, \"Year_date\": 2006, \"Number\": 6}, {\"%000\": 0.14300000000000002, \"Year\": 2005, \"Year_date\": 2005, \"Number\": 1}, {\"%000\": 0.0, \"Year\": 2004, \"Year_date\": 2004, \"Number\": 0}, {\"%000\": 0.337, \"Year\": 2003, \"Year_date\": 2003, \"Number\": 2}, {\"%000\": 0.0, \"Year\": 2002, \"Year_date\": 2002, \"Number\": 0}, {\"%000\": 0.183, \"Year\": 2001, \"Year_date\": 2001, \"Number\": 1}, {\"%000\": 0.0, \"Year\": 2000, \"Year_date\": 2000, \"Number\": 0}]}};\n", | |
"var selector = \"#8b8bcfe8-342e-4218-90e3-86043f51c70f\";\n", | |
"var type = \"vega-lite\";\n", | |
"\n", | |
"var output_area = this;\n", | |
"require(['nbextensions/jupyter-vega/index'], function(vega) {\n", | |
" vega.render(selector, spec, type, output_area);\n", | |
"}, function (err) {\n", | |
" if (err.requireType !== 'scripterror') {\n", | |
" throw(err);\n", | |
" }\n", | |
"});\n" | |
] | |
}, | |
"metadata": { | |
"jupyter-vega": "#8b8bcfe8-342e-4218-90e3-86043f51c70f" | |
}, | |
"output_type": "display_data" | |
}, | |
{ | |
"data": { | |
"image/png": "" | |
}, | |
"metadata": { | |
"jupyter-vega": "#8b8bcfe8-342e-4218-90e3-86043f51c70f" | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"altair.Chart(subset).mark_line().encode(\n", | |
" x='Year',\n", | |
" y='Number'\n", | |
")" | |
] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "Python3 (Jupyter)", | |
"language": "python", | |
"name": "myenv" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.5.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment