Skip to content

Instantly share code, notes, and snippets.

@alonsosilvaallende
Created July 20, 2021 12:01
Show Gist options
  • Save alonsosilvaallende/6efd698f11a3ce9df718ae68624b9c43 to your computer and use it in GitHub Desktop.
Save alonsosilvaallende/6efd698f11a3ce9df718ae68624b9c43 to your computer and use it in GitHub Desktop.
Rdatasets-examples.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Rdatasets-examples.ipynb",
"provenance": [],
"authorship_tag": "ABX9TyMbLdXFOOdBMsHaBkorP3ou",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/alonsosilvaallende/6efd698f11a3ce9df718ae68624b9c43/rdatasets-examples.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Ap73-QRsxiuY",
"outputId": "94845930-e205-454a-f05b-5cf557ac2aef"
},
"source": [
"import statsmodels.api as sm"
],
"execution_count": 1,
"outputs": [
{
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
" import pandas.util.testing as tm\n"
],
"name": "stderr"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "AbrCySswxm2H"
},
"source": [
"pbc = sm.datasets.get_rdataset(\"pbc\", \"survival\")"
],
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "NuBRGaMZx-R0",
"outputId": "74ad2b7b-0b63-48ee-a4eb-844e249be7e0"
},
"source": [
"pbc.data.head()"
],
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>time</th>\n",
" <th>status</th>\n",
" <th>trt</th>\n",
" <th>age</th>\n",
" <th>sex</th>\n",
" <th>ascites</th>\n",
" <th>hepato</th>\n",
" <th>spiders</th>\n",
" <th>edema</th>\n",
" <th>bili</th>\n",
" <th>chol</th>\n",
" <th>albumin</th>\n",
" <th>copper</th>\n",
" <th>alk.phos</th>\n",
" <th>ast</th>\n",
" <th>trig</th>\n",
" <th>platelet</th>\n",
" <th>protime</th>\n",
" <th>stage</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>400</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>58.765229</td>\n",
" <td>f</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>14.5</td>\n",
" <td>261.0</td>\n",
" <td>2.60</td>\n",
" <td>156.0</td>\n",
" <td>1718.0</td>\n",
" <td>137.95</td>\n",
" <td>172.0</td>\n",
" <td>190.0</td>\n",
" <td>12.2</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>4500</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>56.446270</td>\n",
" <td>f</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.1</td>\n",
" <td>302.0</td>\n",
" <td>4.14</td>\n",
" <td>54.0</td>\n",
" <td>7394.8</td>\n",
" <td>113.52</td>\n",
" <td>88.0</td>\n",
" <td>221.0</td>\n",
" <td>10.6</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>1012</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>70.072553</td>\n",
" <td>m</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.5</td>\n",
" <td>1.4</td>\n",
" <td>176.0</td>\n",
" <td>3.48</td>\n",
" <td>210.0</td>\n",
" <td>516.0</td>\n",
" <td>96.10</td>\n",
" <td>55.0</td>\n",
" <td>151.0</td>\n",
" <td>12.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>1925</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>54.740589</td>\n",
" <td>f</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.5</td>\n",
" <td>1.8</td>\n",
" <td>244.0</td>\n",
" <td>2.54</td>\n",
" <td>64.0</td>\n",
" <td>6121.8</td>\n",
" <td>60.63</td>\n",
" <td>92.0</td>\n",
" <td>183.0</td>\n",
" <td>10.3</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>1504</td>\n",
" <td>1</td>\n",
" <td>2.0</td>\n",
" <td>38.105407</td>\n",
" <td>f</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>3.4</td>\n",
" <td>279.0</td>\n",
" <td>3.53</td>\n",
" <td>143.0</td>\n",
" <td>671.0</td>\n",
" <td>113.15</td>\n",
" <td>72.0</td>\n",
" <td>136.0</td>\n",
" <td>10.9</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id time status trt age ... ast trig platelet protime stage\n",
"0 1 400 2 1.0 58.765229 ... 137.95 172.0 190.0 12.2 4.0\n",
"1 2 4500 0 1.0 56.446270 ... 113.52 88.0 221.0 10.6 3.0\n",
"2 3 1012 2 1.0 70.072553 ... 96.10 55.0 151.0 12.0 4.0\n",
"3 4 1925 2 1.0 54.740589 ... 60.63 92.0 183.0 10.3 4.0\n",
"4 5 1504 1 2.0 38.105407 ... 113.15 72.0 136.0 10.9 3.0\n",
"\n",
"[5 rows x 20 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "_zLrRmbex_bc"
},
"source": [
"cancer = sm.datasets.get_rdataset(\"cancer\", \"survival\")"
],
"execution_count": 5,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "Wstb_M9vyXV1",
"outputId": "fc99487d-1728-401a-9578-846abee9a222"
},
"source": [
"cancer.data.head()"
],
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>inst</th>\n",
" <th>time</th>\n",
" <th>status</th>\n",
" <th>age</th>\n",
" <th>sex</th>\n",
" <th>ph.ecog</th>\n",
" <th>ph.karno</th>\n",
" <th>pat.karno</th>\n",
" <th>meal.cal</th>\n",
" <th>wt.loss</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3.0</td>\n",
" <td>306</td>\n",
" <td>2</td>\n",
" <td>74</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>90.0</td>\n",
" <td>100.0</td>\n",
" <td>1175.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3.0</td>\n",
" <td>455</td>\n",
" <td>2</td>\n",
" <td>68</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>90.0</td>\n",
" <td>90.0</td>\n",
" <td>1225.0</td>\n",
" <td>15.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3.0</td>\n",
" <td>1010</td>\n",
" <td>1</td>\n",
" <td>56</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>90.0</td>\n",
" <td>90.0</td>\n",
" <td>NaN</td>\n",
" <td>15.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>5.0</td>\n",
" <td>210</td>\n",
" <td>2</td>\n",
" <td>57</td>\n",
" <td>1</td>\n",
" <td>1.0</td>\n",
" <td>90.0</td>\n",
" <td>60.0</td>\n",
" <td>1150.0</td>\n",
" <td>11.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.0</td>\n",
" <td>883</td>\n",
" <td>2</td>\n",
" <td>60</td>\n",
" <td>1</td>\n",
" <td>0.0</td>\n",
" <td>100.0</td>\n",
" <td>90.0</td>\n",
" <td>NaN</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" inst time status age ... ph.karno pat.karno meal.cal wt.loss\n",
"0 3.0 306 2 74 ... 90.0 100.0 1175.0 NaN\n",
"1 3.0 455 2 68 ... 90.0 90.0 1225.0 15.0\n",
"2 3.0 1010 1 56 ... 90.0 90.0 NaN 15.0\n",
"3 5.0 210 2 57 ... 90.0 60.0 1150.0 11.0\n",
"4 1.0 883 2 60 ... 100.0 90.0 NaN 0.0\n",
"\n",
"[5 rows x 10 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "-hNywb-KyZBc"
},
"source": [
"flchain = sm.datasets.get_rdataset(\"flchain\", \"survival\")"
],
"execution_count": 7,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 204
},
"id": "k68KmA3Yyjg1",
"outputId": "78710bd6-9496-4f56-f395-668c2e31805d"
},
"source": [
"flchain.data.head()"
],
"execution_count": 8,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>sex</th>\n",
" <th>sample.yr</th>\n",
" <th>kappa</th>\n",
" <th>lambda</th>\n",
" <th>flc.grp</th>\n",
" <th>creatinine</th>\n",
" <th>mgus</th>\n",
" <th>futime</th>\n",
" <th>death</th>\n",
" <th>chapter</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>97</td>\n",
" <td>F</td>\n",
" <td>1997</td>\n",
" <td>5.70</td>\n",
" <td>4.860</td>\n",
" <td>10</td>\n",
" <td>1.7</td>\n",
" <td>0</td>\n",
" <td>85</td>\n",
" <td>1</td>\n",
" <td>Circulatory</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>92</td>\n",
" <td>F</td>\n",
" <td>2000</td>\n",
" <td>0.87</td>\n",
" <td>0.683</td>\n",
" <td>1</td>\n",
" <td>0.9</td>\n",
" <td>0</td>\n",
" <td>1281</td>\n",
" <td>1</td>\n",
" <td>Neoplasms</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>94</td>\n",
" <td>F</td>\n",
" <td>1997</td>\n",
" <td>4.36</td>\n",
" <td>3.850</td>\n",
" <td>10</td>\n",
" <td>1.4</td>\n",
" <td>0</td>\n",
" <td>69</td>\n",
" <td>1</td>\n",
" <td>Circulatory</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>92</td>\n",
" <td>F</td>\n",
" <td>1996</td>\n",
" <td>2.42</td>\n",
" <td>2.220</td>\n",
" <td>9</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>115</td>\n",
" <td>1</td>\n",
" <td>Circulatory</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>93</td>\n",
" <td>F</td>\n",
" <td>1996</td>\n",
" <td>1.32</td>\n",
" <td>1.690</td>\n",
" <td>6</td>\n",
" <td>1.1</td>\n",
" <td>0</td>\n",
" <td>1039</td>\n",
" <td>1</td>\n",
" <td>Circulatory</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age sex sample.yr kappa ... mgus futime death chapter\n",
"0 97 F 1997 5.70 ... 0 85 1 Circulatory\n",
"1 92 F 2000 0.87 ... 0 1281 1 Neoplasms\n",
"2 94 F 1997 4.36 ... 0 69 1 Circulatory\n",
"3 92 F 1996 2.42 ... 0 115 1 Circulatory\n",
"4 93 F 1996 1.32 ... 0 1039 1 Circulatory\n",
"\n",
"[5 rows x 11 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 8
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "ZZ79-uyTyk9U"
},
"source": [
""
],
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment