lukauskas · March 13, 2021 09:09 · lukauskas · Mar 13, 2021
diff --git a/a-scipy-1.6.1-slow-spermanr-with-nans.ipynb b/a-scipy-1.6.1-slow-spermanr-with-nans.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from scipy.stats import spearmanr\n",
    "from sinfo import sinfo"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Version numbers are printed at the end.\n",
    "\n",
    "I did some digging and while I can reproduce it for my dataset (`data.tsv.gz`), I cannot reproduce the issue for random data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('data.tsv.gz', sep='\\t', index_col=0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.052921</td>\n",
       "      <td>0.203387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.181125</td>\n",
       "      <td>-0.107936</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.562084</td>\n",
       "      <td>-0.050814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.460667</td>\n",
       "      <td>-0.066390</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.081773</td>\n",
       "      <td>0.101136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552351</th>\n",
       "      <td>0.343175</td>\n",
       "      <td>0.028522</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552352</th>\n",
       "      <td>0.257745</td>\n",
       "      <td>0.230120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552353</th>\n",
       "      <td>0.127060</td>\n",
       "      <td>0.063844</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552354</th>\n",
       "      <td>-0.167225</td>\n",
       "      <td>0.027455</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552355</th>\n",
       "      <td>-0.028771</td>\n",
       "      <td>-0.163098</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>552356 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               x         y\n",
       "0      -0.052921  0.203387\n",
       "1       0.181125 -0.107936\n",
       "2      -0.562084 -0.050814\n",
       "3      -0.460667 -0.066390\n",
       "4       0.081773  0.101136\n",
       "...          ...       ...\n",
       "552351  0.343175  0.028522\n",
       "552352  0.257745  0.230120\n",
       "552353  0.127060  0.063844\n",
       "552354 -0.167225  0.027455\n",
       "552355 -0.028771 -0.163098\n",
       "\n",
       "[552356 rows x 2 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This does not seem to be a NaN=None thing as dtype is object:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x    float64\n",
      "y    float64\n",
      "dtype: object\n"
     ]
    }
   ],
   "source": [
    "print(df.dtypes)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "But there are NaNs:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "x    2488\n",
       "y    1620\n",
       "dtype: int64"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Without further ado, on my laptop this takes around 30s:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 32.1 s, sys: 209 ms, total: 32.3 s\n",
      "Wall time: 32.5 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "SpearmanrResult(correlation=-0.0022916182928414985, pvalue=0.08973233486988992)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time spearmanr(df['x'], df['y'], nan_policy=\"omit\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This does not seem to be a pandas issue:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 32 s, sys: 178 ms, total: 32.2 s\n",
      "Wall time: 32.3 s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "SpearmanrResult(correlation=-0.0022916182928414985, pvalue=0.08973233486988992)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time spearmanr(np.asarray(df['x'].values), np.asarray(df['y'].values), nan_policy=\"omit\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "It also didn't seem to be an issue of the order of entries in the files (the dataset is already a shuffled version of my actual dataset). \n",
    "\n",
    "Droping NaNs removes only a few rows and solves the issue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>x</th>\n",
       "      <th>y</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.052921</td>\n",
       "      <td>0.203387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.181125</td>\n",
       "      <td>-0.107936</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-0.562084</td>\n",
       "      <td>-0.050814</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>-0.460667</td>\n",
       "      <td>-0.066390</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0.081773</td>\n",
       "      <td>0.101136</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552351</th>\n",
       "      <td>0.343175</td>\n",
       "      <td>0.028522</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552352</th>\n",
       "      <td>0.257745</td>\n",
       "      <td>0.230120</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552353</th>\n",
       "      <td>0.127060</td>\n",
       "      <td>0.063844</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552354</th>\n",
       "      <td>-0.167225</td>\n",
       "      <td>0.027455</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>552355</th>\n",
       "      <td>-0.028771</td>\n",
       "      <td>-0.163098</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>548256 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               x         y\n",
       "0      -0.052921  0.203387\n",
       "1       0.181125 -0.107936\n",
       "2      -0.562084 -0.050814\n",
       "3      -0.460667 -0.066390\n",
       "4       0.081773  0.101136\n",
       "...          ...       ...\n",
       "552351  0.343175  0.028522\n",
       "552352  0.257745  0.230120\n",
       "552353  0.127060  0.063844\n",
       "552354 -0.167225  0.027455\n",
       "552355 -0.028771 -0.163098\n",
       "\n",
       "[548256 rows x 2 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_no_na = df.dropna()\n",
    "df_no_na"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Super fast now:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 146 ms, sys: 22.2 ms, total: 168 ms\n",
      "Wall time: 167 ms\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "SpearmanrResult(correlation=-0.002291618292841498, pvalue=0.08973233486989)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%time spearmanr(df_no_na['x'], df_no_na['y'], nan_policy=\"raise\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "🤷🏼‍♂️"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Version numbers etc:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-----\n",
      "numpy       1.20.1\n",
      "pandas      1.2.3\n",
      "scipy       1.6.1\n",
      "sinfo       0.3.1\n",
      "-----\n",
      "IPython             7.21.0\n",
      "jupyter_client      6.1.11\n",
      "jupyter_core        4.7.1\n",
      "notebook            6.2.0\n",
      "-----\n",
      "Python 3.9.2 (default, Feb 24 2021, 13:30:36) [Clang 12.0.0 (clang-1200.0.32.29)]\n",
      "macOS-10.15.7-x86_64-i386-64bit\n",
      "8 logical CPU cores, i386\n",
      "-----\n",
      "Session information updated at 2021-03-12 19:32\n"
     ]
    }
   ],
   "source": [
    "sinfo()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
diff --git a/requirements.txt b/requirements.txt
diff --git a/spearmanr slow with NaNs on scipy 1.6.1-requirements.txt b/spearmanr slow with NaNs on scipy 1.6.1-requirements.txt
diff --git a/data.tsv.gz b/data.tsv.gz
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import numpy as np\n",
	"from scipy.stats import spearmanr\n",
	"from sinfo import sinfo"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Version numbers are printed at the end.\n",
	"\n",
	"I did some digging and while I can reproduce it for my dataset (`data.tsv.gz`), I cannot reproduce the issue for random data"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [],
	"source": [
	"df = pd.read_csv('data.tsv.gz', sep='\\t', index_col=0)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>x</th>\n",
	" <th>y</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>-0.052921</td>\n",
	" <td>0.203387</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>0.181125</td>\n",
	" <td>-0.107936</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>-0.562084</td>\n",
	" <td>-0.050814</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>-0.460667</td>\n",
	" <td>-0.066390</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>0.081773</td>\n",
	" <td>0.101136</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>...</th>\n",
	" <td>...</td>\n",
	" <td>...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>552351</th>\n",
	" <td>0.343175</td>\n",
	" <td>0.028522</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>552352</th>\n",
	" <td>0.257745</td>\n",
	" <td>0.230120</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>552353</th>\n",
	" <td>0.127060</td>\n",
	" <td>0.063844</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>552354</th>\n",
	" <td>-0.167225</td>\n",
	" <td>0.027455</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>552355</th>\n",
	" <td>-0.028771</td>\n",
	" <td>-0.163098</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"<p>552356 rows × 2 columns</p>\n",
	"</div>"
	],
	"text/plain": [
	" x y\n",
	"0 -0.052921 0.203387\n",
	"1 0.181125 -0.107936\n",
	"2 -0.562084 -0.050814\n",
	"3 -0.460667 -0.066390\n",
	"4 0.081773 0.101136\n",
	"... ... ...\n",
	"552351 0.343175 0.028522\n",
	"552352 0.257745 0.230120\n",
	"552353 0.127060 0.063844\n",
	"552354 -0.167225 0.027455\n",
	"552355 -0.028771 -0.163098\n",
	"\n",
	"[552356 rows x 2 columns]"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"This does not seem to be a NaN=None thing as dtype is object:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"x float64\n",
	"y float64\n",
	"dtype: object\n"
	]
	}
	],
	"source": [
	"print(df.dtypes)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"But there are NaNs:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"x 2488\n",
	"y 1620\n",
	"dtype: int64"
	]
	},
	"execution_count": 5,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df.isnull().sum()"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Without further ado, on my laptop this takes around 30s:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"CPU times: user 32.1 s, sys: 209 ms, total: 32.3 s\n",
	"Wall time: 32.5 s\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"SpearmanrResult(correlation=-0.0022916182928414985, pvalue=0.08973233486988992)"
	]
	},
	"execution_count": 6,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"%time spearmanr(df['x'], df['y'], nan_policy=\"omit\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"This does not seem to be a pandas issue:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"CPU times: user 32 s, sys: 178 ms, total: 32.2 s\n",
	"Wall time: 32.3 s\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"SpearmanrResult(correlation=-0.0022916182928414985, pvalue=0.08973233486988992)"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"%time spearmanr(np.asarray(df['x'].values), np.asarray(df['y'].values), nan_policy=\"omit\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"It also didn't seem to be an issue of the order of entries in the files (the dataset is already a shuffled version of my actual dataset). \n",
	"\n",
	"Droping NaNs removes only a few rows and solves the issue"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>x</th>\n",
	" <th>y</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>-0.052921</td>\n",
	" <td>0.203387</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>0.181125</td>\n",
	" <td>-0.107936</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>-0.562084</td>\n",
	" <td>-0.050814</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>-0.460667</td>\n",
	" <td>-0.066390</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>0.081773</td>\n",
	" <td>0.101136</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>...</th>\n",
	" <td>...</td>\n",
	" <td>...</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>552351</th>\n",
	" <td>0.343175</td>\n",
	" <td>0.028522</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>552352</th>\n",
	" <td>0.257745</td>\n",
	" <td>0.230120</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>552353</th>\n",
	" <td>0.127060</td>\n",
	" <td>0.063844</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>552354</th>\n",
	" <td>-0.167225</td>\n",
	" <td>0.027455</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>552355</th>\n",
	" <td>-0.028771</td>\n",
	" <td>-0.163098</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"<p>548256 rows × 2 columns</p>\n",
	"</div>"
	],
	"text/plain": [
	" x y\n",
	"0 -0.052921 0.203387\n",
	"1 0.181125 -0.107936\n",
	"2 -0.562084 -0.050814\n",
	"3 -0.460667 -0.066390\n",
	"4 0.081773 0.101136\n",
	"... ... ...\n",
	"552351 0.343175 0.028522\n",
	"552352 0.257745 0.230120\n",
	"552353 0.127060 0.063844\n",
	"552354 -0.167225 0.027455\n",
	"552355 -0.028771 -0.163098\n",
	"\n",
	"[548256 rows x 2 columns]"
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"df_no_na = df.dropna()\n",
	"df_no_na"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Super fast now:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"CPU times: user 146 ms, sys: 22.2 ms, total: 168 ms\n",
	"Wall time: 167 ms\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"SpearmanrResult(correlation=-0.002291618292841498, pvalue=0.08973233486989)"
	]
	},
	"execution_count": 9,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"%time spearmanr(df_no_na['x'], df_no_na['y'], nan_policy=\"raise\")"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"🤷🏼‍♂️"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Version numbers etc:"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"-----\n",
	"numpy 1.20.1\n",
	"pandas 1.2.3\n",
	"scipy 1.6.1\n",
	"sinfo 0.3.1\n",
	"-----\n",
	"IPython 7.21.0\n",
	"jupyter_client 6.1.11\n",
	"jupyter_core 4.7.1\n",
	"notebook 6.2.0\n",
	"-----\n",
	"Python 3.9.2 (default, Feb 24 2021, 13:30:36) [Clang 12.0.0 (clang-1200.0.32.29)]\n",
	"macOS-10.15.7-x86_64-i386-64bit\n",
	"8 logical CPU cores, i386\n",
	"-----\n",
	"Session information updated at 2021-03-12 19:32\n"
	]
	}
	],
	"source": [
	"sinfo()"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.9.2"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}