Created
July 20, 2021 12:19
-
-
Save alonsosilvaallende/685f042b97ff6f267e268fdf99fdc0e0 to your computer and use it in GitHub Desktop.
Rdatasets-examples.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Rdatasets-examples.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyMB+pe3uVrRLayDQsgZflx2", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/alonsosilvaallende/685f042b97ff6f267e268fdf99fdc0e0/rdatasets-examples.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Ap73-QRsxiuY", | |
"outputId": "94845930-e205-454a-f05b-5cf557ac2aef" | |
}, | |
"source": [ | |
"import statsmodels.api as sm" | |
], | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"/usr/local/lib/python3.7/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n", | |
" import pandas.util.testing as tm\n" | |
], | |
"name": "stderr" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "AbrCySswxm2H" | |
}, | |
"source": [ | |
"pbc = sm.datasets.get_rdataset(\"pbc\", \"survival\")" | |
], | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"id": "NuBRGaMZx-R0", | |
"outputId": "74ad2b7b-0b63-48ee-a4eb-844e249be7e0" | |
}, | |
"source": [ | |
"pbc.data.head()" | |
], | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>id</th>\n", | |
" <th>time</th>\n", | |
" <th>status</th>\n", | |
" <th>trt</th>\n", | |
" <th>age</th>\n", | |
" <th>sex</th>\n", | |
" <th>ascites</th>\n", | |
" <th>hepato</th>\n", | |
" <th>spiders</th>\n", | |
" <th>edema</th>\n", | |
" <th>bili</th>\n", | |
" <th>chol</th>\n", | |
" <th>albumin</th>\n", | |
" <th>copper</th>\n", | |
" <th>alk.phos</th>\n", | |
" <th>ast</th>\n", | |
" <th>trig</th>\n", | |
" <th>platelet</th>\n", | |
" <th>protime</th>\n", | |
" <th>stage</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1</td>\n", | |
" <td>400</td>\n", | |
" <td>2</td>\n", | |
" <td>1.0</td>\n", | |
" <td>58.765229</td>\n", | |
" <td>f</td>\n", | |
" <td>1.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>14.5</td>\n", | |
" <td>261.0</td>\n", | |
" <td>2.60</td>\n", | |
" <td>156.0</td>\n", | |
" <td>1718.0</td>\n", | |
" <td>137.95</td>\n", | |
" <td>172.0</td>\n", | |
" <td>190.0</td>\n", | |
" <td>12.2</td>\n", | |
" <td>4.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>2</td>\n", | |
" <td>4500</td>\n", | |
" <td>0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>56.446270</td>\n", | |
" <td>f</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.1</td>\n", | |
" <td>302.0</td>\n", | |
" <td>4.14</td>\n", | |
" <td>54.0</td>\n", | |
" <td>7394.8</td>\n", | |
" <td>113.52</td>\n", | |
" <td>88.0</td>\n", | |
" <td>221.0</td>\n", | |
" <td>10.6</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3</td>\n", | |
" <td>1012</td>\n", | |
" <td>2</td>\n", | |
" <td>1.0</td>\n", | |
" <td>70.072553</td>\n", | |
" <td>m</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.5</td>\n", | |
" <td>1.4</td>\n", | |
" <td>176.0</td>\n", | |
" <td>3.48</td>\n", | |
" <td>210.0</td>\n", | |
" <td>516.0</td>\n", | |
" <td>96.10</td>\n", | |
" <td>55.0</td>\n", | |
" <td>151.0</td>\n", | |
" <td>12.0</td>\n", | |
" <td>4.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>4</td>\n", | |
" <td>1925</td>\n", | |
" <td>2</td>\n", | |
" <td>1.0</td>\n", | |
" <td>54.740589</td>\n", | |
" <td>f</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.5</td>\n", | |
" <td>1.8</td>\n", | |
" <td>244.0</td>\n", | |
" <td>2.54</td>\n", | |
" <td>64.0</td>\n", | |
" <td>6121.8</td>\n", | |
" <td>60.63</td>\n", | |
" <td>92.0</td>\n", | |
" <td>183.0</td>\n", | |
" <td>10.3</td>\n", | |
" <td>4.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>5</td>\n", | |
" <td>1504</td>\n", | |
" <td>1</td>\n", | |
" <td>2.0</td>\n", | |
" <td>38.105407</td>\n", | |
" <td>f</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>3.4</td>\n", | |
" <td>279.0</td>\n", | |
" <td>3.53</td>\n", | |
" <td>143.0</td>\n", | |
" <td>671.0</td>\n", | |
" <td>113.15</td>\n", | |
" <td>72.0</td>\n", | |
" <td>136.0</td>\n", | |
" <td>10.9</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" id time status trt age ... ast trig platelet protime stage\n", | |
"0 1 400 2 1.0 58.765229 ... 137.95 172.0 190.0 12.2 4.0\n", | |
"1 2 4500 0 1.0 56.446270 ... 113.52 88.0 221.0 10.6 3.0\n", | |
"2 3 1012 2 1.0 70.072553 ... 96.10 55.0 151.0 12.0 4.0\n", | |
"3 4 1925 2 1.0 54.740589 ... 60.63 92.0 183.0 10.3 4.0\n", | |
"4 5 1504 1 2.0 38.105407 ... 113.15 72.0 136.0 10.9 3.0\n", | |
"\n", | |
"[5 rows x 20 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 4 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "_zLrRmbex_bc" | |
}, | |
"source": [ | |
"cancer = sm.datasets.get_rdataset(\"cancer\", \"survival\")" | |
], | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"id": "Wstb_M9vyXV1", | |
"outputId": "fc99487d-1728-401a-9578-846abee9a222" | |
}, | |
"source": [ | |
"cancer.data.head()" | |
], | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>inst</th>\n", | |
" <th>time</th>\n", | |
" <th>status</th>\n", | |
" <th>age</th>\n", | |
" <th>sex</th>\n", | |
" <th>ph.ecog</th>\n", | |
" <th>ph.karno</th>\n", | |
" <th>pat.karno</th>\n", | |
" <th>meal.cal</th>\n", | |
" <th>wt.loss</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>3.0</td>\n", | |
" <td>306</td>\n", | |
" <td>2</td>\n", | |
" <td>74</td>\n", | |
" <td>1</td>\n", | |
" <td>1.0</td>\n", | |
" <td>90.0</td>\n", | |
" <td>100.0</td>\n", | |
" <td>1175.0</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>3.0</td>\n", | |
" <td>455</td>\n", | |
" <td>2</td>\n", | |
" <td>68</td>\n", | |
" <td>1</td>\n", | |
" <td>0.0</td>\n", | |
" <td>90.0</td>\n", | |
" <td>90.0</td>\n", | |
" <td>1225.0</td>\n", | |
" <td>15.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3.0</td>\n", | |
" <td>1010</td>\n", | |
" <td>1</td>\n", | |
" <td>56</td>\n", | |
" <td>1</td>\n", | |
" <td>0.0</td>\n", | |
" <td>90.0</td>\n", | |
" <td>90.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>15.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>5.0</td>\n", | |
" <td>210</td>\n", | |
" <td>2</td>\n", | |
" <td>57</td>\n", | |
" <td>1</td>\n", | |
" <td>1.0</td>\n", | |
" <td>90.0</td>\n", | |
" <td>60.0</td>\n", | |
" <td>1150.0</td>\n", | |
" <td>11.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1.0</td>\n", | |
" <td>883</td>\n", | |
" <td>2</td>\n", | |
" <td>60</td>\n", | |
" <td>1</td>\n", | |
" <td>0.0</td>\n", | |
" <td>100.0</td>\n", | |
" <td>90.0</td>\n", | |
" <td>NaN</td>\n", | |
" <td>0.0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" inst time status age ... ph.karno pat.karno meal.cal wt.loss\n", | |
"0 3.0 306 2 74 ... 90.0 100.0 1175.0 NaN\n", | |
"1 3.0 455 2 68 ... 90.0 90.0 1225.0 15.0\n", | |
"2 3.0 1010 1 56 ... 90.0 90.0 NaN 15.0\n", | |
"3 5.0 210 2 57 ... 90.0 60.0 1150.0 11.0\n", | |
"4 1.0 883 2 60 ... 100.0 90.0 NaN 0.0\n", | |
"\n", | |
"[5 rows x 10 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 6 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "-hNywb-KyZBc" | |
}, | |
"source": [ | |
"flchain = sm.datasets.get_rdataset(\"flchain\", \"survival\")" | |
], | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"id": "k68KmA3Yyjg1", | |
"outputId": "78710bd6-9496-4f56-f395-668c2e31805d" | |
}, | |
"source": [ | |
"flchain.data.head()" | |
], | |
"execution_count": 8, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>age</th>\n", | |
" <th>sex</th>\n", | |
" <th>sample.yr</th>\n", | |
" <th>kappa</th>\n", | |
" <th>lambda</th>\n", | |
" <th>flc.grp</th>\n", | |
" <th>creatinine</th>\n", | |
" <th>mgus</th>\n", | |
" <th>futime</th>\n", | |
" <th>death</th>\n", | |
" <th>chapter</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>97</td>\n", | |
" <td>F</td>\n", | |
" <td>1997</td>\n", | |
" <td>5.70</td>\n", | |
" <td>4.860</td>\n", | |
" <td>10</td>\n", | |
" <td>1.7</td>\n", | |
" <td>0</td>\n", | |
" <td>85</td>\n", | |
" <td>1</td>\n", | |
" <td>Circulatory</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>92</td>\n", | |
" <td>F</td>\n", | |
" <td>2000</td>\n", | |
" <td>0.87</td>\n", | |
" <td>0.683</td>\n", | |
" <td>1</td>\n", | |
" <td>0.9</td>\n", | |
" <td>0</td>\n", | |
" <td>1281</td>\n", | |
" <td>1</td>\n", | |
" <td>Neoplasms</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>94</td>\n", | |
" <td>F</td>\n", | |
" <td>1997</td>\n", | |
" <td>4.36</td>\n", | |
" <td>3.850</td>\n", | |
" <td>10</td>\n", | |
" <td>1.4</td>\n", | |
" <td>0</td>\n", | |
" <td>69</td>\n", | |
" <td>1</td>\n", | |
" <td>Circulatory</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>92</td>\n", | |
" <td>F</td>\n", | |
" <td>1996</td>\n", | |
" <td>2.42</td>\n", | |
" <td>2.220</td>\n", | |
" <td>9</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0</td>\n", | |
" <td>115</td>\n", | |
" <td>1</td>\n", | |
" <td>Circulatory</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>93</td>\n", | |
" <td>F</td>\n", | |
" <td>1996</td>\n", | |
" <td>1.32</td>\n", | |
" <td>1.690</td>\n", | |
" <td>6</td>\n", | |
" <td>1.1</td>\n", | |
" <td>0</td>\n", | |
" <td>1039</td>\n", | |
" <td>1</td>\n", | |
" <td>Circulatory</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" age sex sample.yr kappa ... mgus futime death chapter\n", | |
"0 97 F 1997 5.70 ... 0 85 1 Circulatory\n", | |
"1 92 F 2000 0.87 ... 0 1281 1 Neoplasms\n", | |
"2 94 F 1997 4.36 ... 0 69 1 Circulatory\n", | |
"3 92 F 1996 2.42 ... 0 115 1 Circulatory\n", | |
"4 93 F 1996 1.32 ... 0 1039 1 Circulatory\n", | |
"\n", | |
"[5 rows x 11 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 8 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ZZ79-uyTyk9U" | |
}, | |
"source": [ | |
"aids2 = sm.datasets.get_rdataset(\"Aids2\",\"MASS\")" | |
], | |
"execution_count": 9, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 204 | |
}, | |
"id": "kJrRMCUn1su2", | |
"outputId": "a6f251c2-153a-42af-d376-29b8e7fca840" | |
}, | |
"source": [ | |
"aids2.data.head()" | |
], | |
"execution_count": 10, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>state</th>\n", | |
" <th>sex</th>\n", | |
" <th>diag</th>\n", | |
" <th>death</th>\n", | |
" <th>status</th>\n", | |
" <th>T.categ</th>\n", | |
" <th>age</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>NSW</td>\n", | |
" <td>M</td>\n", | |
" <td>10905</td>\n", | |
" <td>11081</td>\n", | |
" <td>D</td>\n", | |
" <td>hs</td>\n", | |
" <td>35</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>NSW</td>\n", | |
" <td>M</td>\n", | |
" <td>11029</td>\n", | |
" <td>11096</td>\n", | |
" <td>D</td>\n", | |
" <td>hs</td>\n", | |
" <td>53</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>NSW</td>\n", | |
" <td>M</td>\n", | |
" <td>9551</td>\n", | |
" <td>9983</td>\n", | |
" <td>D</td>\n", | |
" <td>hs</td>\n", | |
" <td>42</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>NSW</td>\n", | |
" <td>M</td>\n", | |
" <td>9577</td>\n", | |
" <td>9654</td>\n", | |
" <td>D</td>\n", | |
" <td>haem</td>\n", | |
" <td>44</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>NSW</td>\n", | |
" <td>M</td>\n", | |
" <td>10015</td>\n", | |
" <td>10290</td>\n", | |
" <td>D</td>\n", | |
" <td>hs</td>\n", | |
" <td>39</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" state sex diag death status T.categ age\n", | |
"0 NSW M 10905 11081 D hs 35\n", | |
"1 NSW M 11029 11096 D hs 53\n", | |
"2 NSW M 9551 9983 D hs 42\n", | |
"3 NSW M 9577 9654 D haem 44\n", | |
"4 NSW M 10015 10290 D hs 39" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 10 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "0khJz4wT1uMk" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment