Last active
February 12, 2021 19:12
-
-
Save rejsmont/ae0269b541ca768c9b869e7fda325d39 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"scrolled": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"DC Comics superheroines: 166\n", | |
"Marvel Comics superheroines: 170\n" | |
] | |
} | |
], | |
"source": [ | |
"import requests\n", | |
"from bs4 import BeautifulSoup\n", | |
"\n", | |
"url = 'https://en.wikipedia.org/wiki/List_of_superheroines'\n", | |
"with requests.get(url) as r:\n", | |
" soup = BeautifulSoup(r.content, 'lxml')\n", | |
" ulist = soup.select('h2 + ul > li')\n", | |
" print('DC Comics superheroines:',\n", | |
" len([ x for x in ulist if 'DC' in str(x) ]))\n", | |
" print('Marvel Comics superheroines:',\n", | |
" len([ x for x in ulist if 'Marvel' in str(x) ]))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Marvel superheroes: 684 including 180 female (26.32%)\n", | |
"DC superheroes: 622 including 144 female (23.15%)\n" | |
] | |
} | |
], | |
"source": [ | |
"import requests\n", | |
"\n", | |
"template = {'action': \"query\", 'list': 'categorymembers', 'cmlimit': '200', 'format': \"json\"}\n", | |
"categories = ['Marvel Comics superheroes', 'DC Comics superheroes',\n", | |
" 'Marvel Comics female superheroes', 'DC Comics female superheroes']\n", | |
"tags = ['Marvel', 'DC', 'Marvel female', 'DC female']\n", | |
"\n", | |
"params = [dict({'cmtitle': 'Category: ' + c}, **template) for c in categories]\n", | |
"results = {}\n", | |
"\n", | |
"for c, p in zip(tags, params):\n", | |
" superheroes = []\n", | |
" do_continue = True\n", | |
" while do_continue:\n", | |
" r = requests.get('https://en.wikipedia.org/w/api.php', params=p)\n", | |
" if r:\n", | |
" data = r.json()\n", | |
" do_continue = 'continue' in data\n", | |
" if do_continue:\n", | |
" p['cmcontinue'] = data['continue']['cmcontinue']\n", | |
" candidates = (i['title'].split(' (')[0] for i in data['query']['categorymembers'])\n", | |
" superheroes += [c for c in candidates if not c.startswith('Category:')]\n", | |
" results[c] = list(set(superheroes))\n", | |
"\n", | |
"results['Marvel female'] = [superheroin for superheroin in results['Marvel female']\n", | |
" if superheroin in results['Marvel']]\n", | |
"results['DC female'] = [superheroin for superheroin in results['DC female']\n", | |
" if superheroin in results['DC']]\n", | |
" \n", | |
"ml_total = len(results['Marvel'])\n", | |
"ml_female = len(results['Marvel female'])\n", | |
"ml_percent = 100 * ml_female / ml_total\n", | |
"dc_total = len(results['DC'])\n", | |
"dc_female = len(results['DC female'])\n", | |
"dc_percent = 100 * dc_female / dc_total\n", | |
"\n", | |
"print(f'Marvel superheroes: {ml_total} including {ml_female} female ({ml_percent:.2f}%)')\n", | |
"print(f'DC superheroes: {dc_total} including {dc_female} female ({dc_percent:.2f}%)')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Significant: False, p-value:0.09457\n" | |
] | |
}, | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAEGCAYAAACgt3iRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/d3fzzAAAACXBIWXMAAAsTAAALEwEAmpwYAAAaIklEQVR4nO3de7iVdZ338fcXBPGUJWpjkoBJoCiDcjAPGWGKpZk5eqXpRNFB53E81GhXmTjlZIdHxsfRSHPMB/OAFXhg7OQReTRS2EIIeaASlclLCUdmIjHA7/PHuvdusd3svfHea++9WO/Xde1rrfu4vr977bU/+z6s3x2ZiSRJZfTp6QIkSfXPMJEklWaYSJJKM0wkSaUZJpKk0rbp6QJ6yq677ppDhgzp6TIkqa40NTX9MTN3az2+YcNkyJAhLFy4sKfLkKS6EhHPtjXew1ySpNIME0lSaYaJJKm0hj1n0pb169ezcuVK1q1b19Ol9DoDBgxg0KBB9OvXr6dLkdQLGSZVVq5cyU477cSQIUOIiJ4up9fITFavXs3KlSsZOnRoT5cjqRfyMFeVdevWMXDgQIOklYhg4MCB7rFJ2izDpBWDpG1uF0ntMUwkSaUZJjXy4osv8vGPf5y9996bMWPGcMghh3D77beXXu/cuXM57rjjuqBCSeo6noCvgczkhBNOYPLkydxyyy0APPvss8yZM6fba9mwYQPbbNPYb/OYC37Q0yVskdt3uqynS1Cd2+vix7v9Nd0zqYH777+f/v37c+aZZ7aMGzx4MGeffTYbN27kggsuYNy4cYwaNYrvfe97QGWPY8KECZx00kmMGDGC0047jea7YP785z9nxIgRHH744dx2220t61y7di1Tpkxh3LhxHHjggdx5550AzJgxg5NPPpkPf/jDHH300d3YckmNqrH/Za2RZcuWcdBBB7U57fvf/z4777wzCxYs4LXXXuOwww5r+YO/aNEili1bxjve8Q4OO+wwHn74YcaOHctnP/tZ7r//fvbZZx8+9rGPtazr0ksvZeLEiVx//fW88sorjB8/ng984AMAzJ8/nyVLlrDLLrvUvsGSGp5h0g3OOussHnroIfr378/gwYNZsmQJs2bNAmDNmjUsX76c/v37M378eAYNGgTA6NGjWbFiBTvuuCNDhw5l2LBhAJx++ulce+21ANx9993MmTOHadOmAZVLm5977jkAjjrqKINEUrcxTGpg5MiRzJ49u2V4+vTp/PGPf2Ts2LHstddeXHXVVUyaNGmTZebOncu2227bMty3b182bNgAbP6y3Mxk9uzZDB8+fJPxjzzyCDvssENXNUeSOuQ5kxqYOHEi69at4+qrr24Z9+c//xmASZMmcfXVV7N+/XoAnn76adauXbvZdY0YMYJnnnmG3/3udwDMnDmzZdqkSZO46qqrWs6tLFq0qMvbIkmdYZjUQERwxx138OCDDzJ06FDGjx/P5MmT+fa3v81nPvMZ9ttvPw466CD2339/zjjjjJY9kLYMGDCAa6+9lmOPPZbDDz+cwYMHt0ybOnUq69evZ9SoUey///5MnTq1O5onSW8Qzf/VNpqxY8dm65tjPfHEE+y77749VFHvV6/bx0uD1WhqeWlwRDRl5tjW490zkSSVZphIkkozTCRJpRkmkqTSDBNJUmmGiSSpNL8B346uvqS06bJPdDhP3759OeCAA1qG77jjDoYMGdKldTQbMmQICxcuZNddd63J+iU1DsOkl9luu+1YvHhxT5chSVvEw1x1oKmpife9732MGTOGSZMm8cILLwAwYcIEPv/5z3PEEUew7777smDBAk488USGDRvGRRdd1LL8CSecwJgxYxg5cmRLJ5Gt3XTTTYwfP57Ro0dzxhlnsHHjxm5pm6Stg2HSy7z66quMHj2a0aNH89GPfpT169dz9tlnM2vWLJqampgyZQpf+cpXWubv378/8+bN48wzz+QjH/kI06dPZ+nSpcyYMYPVq1cDcP3119PU1MTChQu58sorW8Y3e+KJJ/jhD3/Iww8/zOLFi+nbty8333xzt7ZbUn3zMFcv0/ow19KlS1m6dClHHXUUABs3bmSPPfZomX788ccDcMABBzBy5MiWaXvvvTfPP/88AwcO5Morr2y5ZfDzzz/P8uXLGThwYMs67rvvPpqamhg3bhxQCbTdd9+9pu2UtHUxTHq5zGTkyJHMnz+/zenN3db36dNnky7s+/Tpw4YNG5g7dy733nsv8+fPZ/vtt2fChAmsW7fuDa8xefJkvvnNb9auIZK2ah7m6uWGDx/OqlWrWsJk/fr1LFu2rNPLr1mzhre97W1sv/32PPnkk/zqV796wzxHHnkks2bN4qWXXgLg5Zdf5tlnn+2aBkhqCO6ZtKMzl/LWWv/+/Zk1axbnnHMOa9asYcOGDZx33nmMHDmyU8sfc8wxXHPNNYwaNYrhw4fznve85w3z7Lfffnz961/n6KOP5vXXX6dfv35Mnz59k+7uJak9dkFfpV67WO8u9bp97IJejcYu6CVJdckwkSSVZphIkkozTCRJpRkmkqTSDBNJUml+z6Qdz11yQMczbYHOXK4XEZx++unceOONAGzYsIE99tiDgw8+mLvuumuzy82dO5dp06a1O48k1Yp7Jr3MDjvswNKlS3n11VcBuOeee9hzzz17uCpJap9h0gt98IMf5Cc/+QkAM2fO5NRTT22Z9uijj3LooYdy4IEHcuihh/LUU0+9Yfm1a9cyZcoUxo0bx4EHHsidd97ZbbVLakyGSS90yimncOutt7Ju3TqWLFnCwQcf3DJtxIgRzJs3j0WLFnHJJZdw4YUXvmH5Sy+9lIkTJ7JgwQIeeOABLrjgAtauXdudTZDUYDxn0guNGjWKFStWMHPmTD70oQ9tMm3NmjVMnjyZ5cuXExGsX7/+DcvffffdzJkzh2nTpgGwbt06nnvuubrsCkVSfTBMeqnjjz+e888/n7lz525yM6upU6fy/ve/n9tvv50VK1YwYcKENyybmcyePZvhw4d3Y8WSGpmHuXqpKVOmcPHFF3PAAZteUbZmzZqWE/IzZsxoc9lJkyZx1VVX0dyJ56JFi2paqyS5Z9KOWva82ZFBgwZx7rnnvmH8F7/4RSZPnszll1/OxIkT21x26tSpnHfeeYwaNYrMZMiQIV4yLKmm7IK+Sr12sd5d6nX72AW9Go1d0EuS6pJhIkkqzTBppVEP+3XE7SKpPXURJhGxMSIWR8SyiPh1RHwhIvpUTR8fEfMi4qmIeDIirouI7bf0dQYMGMDq1av9w9lKZrJ69WoGDBjQ06VI6qXq5WquVzNzNEBE7A7cAuwM/HNEvB34MXBKZs6PiAD+DtgJ+POWvMigQYNYuXIlq1at6tLitwYDBgxg0KBBPV2GpF6qXsKkRWa+FBGfAxZExFeBs4AbMnN+MT2BWW9m3f369WPo0KFdVqskNYq6OMzVWmb+nkrtuwP7A02dWS4iPhcRCyNioXsfktR16jJMCrGlC2TmtZk5NjPH7rbbbrWoSZIaUl2GSUTsDWwEXgKWAWN6tiJJamx1FyYRsRtwDfCd4vzId4DJEXFw1TynR8Tf9FSNktRo6uUE/HYRsRjoB2wAbgQuB8jMFyPiFGBacaXX68A84LYeqlWSGk5dhElm9u1g+nzgvd1UjiSplbo7zCVJ6n0ME0lSaYaJJKk0w0SSVJphIkkqzTCRJJVmmEiSSjNMJEmlGSaSpNIME0lSaYaJJKk0w0SSVJphIkkqzTCRJJVmmEiSSjNMJEmlGSaSpNIME0lSaYaJJKk0w0SSVJphIkkqzTCRJJVmmEiSSjNMJEmlGSaSpNIME0lSaYaJJKk0w0SSVJphIkkqrVNhEhWnR8TFxfBeETG+tqVJkupFZ/dMvgscApxaDP8PML0mFUmS6s42nZzv4Mw8KCIWAWTmf0VE/xrWJUmqI53dM1kfEX2BBIiI3YDXa1aVJKmudDZMrgRuB3aPiEuBh4Bv1KwqSVJd6dRhrsy8OSKagCOBAE7IzCdqWpkkqW509mqudwHPZOZ0YClwVES8tZaFSZLqR2cPc80GNkbEPsB1wFDglppVJUmqK529muv1zNwQEScC/5aZVzVf2SX1dk2XfaKnS9hC9VavtGVXc51K5bf8rmJcv9qUJEmqN50Nk09R+dLipZn5TEQMBW6qXVmSpHrS4WGu4vslF2bm6c3jMvMZ4Fu1LEySVD863DPJzI3Abn7jXZK0OZ09Ab8CeDgi5gBrm0dm5uW1KEqSVF86GyZ/KH76ADvVrhxJUj3q7DfgvwYQETtk5tqO5pckNZbOfgP+kIj4DfBEMfy3EfHdmlYmSaobnb00+ApgErAaIDN/DRxRo5okSXWm07ftzcznW43a2MW1SJLqVGdPwD8fEYcCWVwifA7FIS9Jkjq7Z3ImcBawJ7ASGF0MS5LU6T2TyMzTalqJJKludXbP5JcRcXdEfNr7mEiSWutUmGTmMOAiYCTwWETcFRGnd7CYJKlBbMnVXI9m5heA8cDLwA01q0qSVFc6+6XFt0TE5Ij4GfBL4AUqoSJJUqdPwP8auAO4JDPn164cSVI96myY7J2ZWdNKJEl1q90wiYgrMvM8YE5EvCFMMvP4WhUmSaofHe2Z3Fg8Tqt1IZKk+tVumGRmU/H4YPeUI3XemAt+0NMl1MTtO13W0yVoK7fXxY93+To7dc4kIg4DvgoMLpYJIDNz7y6vSJJUdzp7Av77wOeBJuwtWJLUSmfDZE1m/qymlUiS6lZnw+SBiLgMuA14rXlkZj5Wk6okSXWls2FycPE4pngMIIGJXV6RJKnudPQ9ky8UT+8qHhNYBTyUmc/UsjBJUv3oqG+unYqfHYufnYCxwM8i4pQa1yZJqhMdfc/ka22Nj4hdgHuBW2tRlCSpvnS6C/pqmfkylfMmkiS9uTCJiInAf3VxLZKkOtXRCfjHqZx0r7YL8AfgE7UqSpJUXzq6NPi4VsMJrM7MtTWqR5JUhzo6Af9sdxUiSapfb+qciSRJ1QwTSVJphokkqTTDRJJUmmEiSSrNMJEklWaYSJJKM0wkSaUZJpKk0gwTSVJphokkqTTDRJJUmmEiSSrNMJEklWaYSJJKM0wkSaUZJpKk0gwTSVJphokkqTTDRJJUmmEiSSrNMJEklWaYSJJKM0wkSaUZJpKk0gwTSVJpNQuTiMiIuLFqeJuIWBURd9XqNYvXGRIRS2v5GpKkTdVyz2QtsH9EbFcMHwX855asICK26fKqJEldrtaHuX4GHFs8PxWY2TwhIsZHxC8jYlHxOLwY/8mI+HFE/Adwd0T8MCI+VLXcjIj4u4joGxGXRcSCiFgSEWfUuC2SpM2odZjcCpwSEQOAUcAjVdOeBI7IzAOBi4FvVE07BJicmROLdXwMICL6A0cCPwU+DazJzHHAOOCzETG0vWIi4nMRsTAiFq5atapLGihJgpoeRsrMJRExhMpeyU9bTd4ZuCEihgEJ9Kuadk9mvlw8/xlwZURsCxwDzMvMVyPiaGBURJxUtb5hwNPt1HMtcC3A2LFjs1TjJEktuuOcxBxgGjABGFg1/l+ABzLzo0XgzK2atrb5SWaui4i5wCQqeyjNh8oCODszf1H9YsW6JEndqDsuDb4euCQzH281fmf+ekL+kx2s41bgU8B7gebw+AXwDxHRDyAi3h0RO3RJxZKkLVLzMMnMlZn5b21M+t/ANyPiYaBvB6u5GzgCuDcz/1KMuw74DfBYcSnw9+iePS1JUis1++ObmTu2MW4uxeGszJwPvLtq8tRi/AxgRqvl1rPpITIy83XgwuKn2hpg/xKlS5K2kN+AlySVZphIkkozTCRJpRkmkqTSDBNJUmmGiSSpNMNEklSaYSJJKs0wkSSVZphIkkozTCRJpRkmkqTSDBNJUmmGiSSpNMNEklSaYSJJKs0wkSSVZphIkkozTCRJpRkmkqTSDBNJUmmGiSSpNMNEklSaYSJJKs0wkSSVZphIkkozTCRJpRkmkqTSDBNJUmmGiSSpNMNEklSaYSJJKs0wkSSVZphIkkozTCRJpRkmkqTSDBNJUmmGiSSptG16ugDpzWq67BM9XUKNbK3t0tbMPRNJUmmGiSSpNMNEklSaYSJJKs0wkSSVZphIkkozTCRJpRkmkqTSDBNJUmmGiSSptMjMnq6hR0TEKuDZnq7jTdgV+GNPF9HNGq3NjdZeaLw213N7B2fmbq1HNmyY1KuIWJiZY3u6ju7UaG1utPZC47V5a2yvh7kkSaUZJpKk0gyT+nNtTxfQAxqtzY3WXmi8Nm917fWciSSpNPdMJEmlGSaSpNIMk14oIvpGxKKIuKsY3iUi7omI5cXj26rm/XJE/DYinoqIST1X9ZsTEe+MiAci4omIWBYR5xbjt9o2tycijina9duI+FJP19MVGvU9bqTPMRgmvdW5wBNVw18C7svMYcB9xTARsR9wCjASOAb4bkT07eZay9oA/FNm7gu8BziraNfW3OY2Fe2YDnwQ2A84tWhvvWvU97iRPseGSW8TEYOAY4HrqkZ/BLiheH4DcELV+Fsz87XMfAb4LTC+m0rtEpn5QmY+Vjz/Hyofvj3ZitvcjvHAbzPz95n5F+BWKu2ta434Hjfa5xgMk97oCuCLwOtV496emS9A5YMJ7F6M3xN4vmq+lcW4uhQRQ4ADgUdokDa3sjW3DWio9/gKGuxzbJj0IhFxHPBSZjZ1dpE2xtXltd4RsSMwGzgvM/+7vVnbGFeXbW7D1ty2hnmPG/VzvE1PF6BNHAYcHxEfAgYAb4mIm4AXI2KPzHwhIvYAXirmXwm8s2r5QcAfurXiLhAR/aj8kbk5M28rRm/Vbd6MrbZtDfYeN+Tn2D2TXiQzv5yZgzJzCJUTcvdn5unAHGByMdtk4M7i+RzglIjYNiKGAsOAR7u57FIiIoDvA09k5uVVk7baNrdjATAsIoZGRH8qvwNzerim0hrtPW7EzzG4Z1IvvgX8KCI+DTwHnAyQmcsi4kfAb6hcMXNWZm7suTLflMOAvwcej4jFxbgL2brb3KbM3BAR/wj8AugLXJ+Zy3q4rK7ge1yxVbfX7lQkSaV5mEuSVJphIkkqzTCRJJVmmEiSSjNMJEmlGSYqLSIyIv61avj8iPhqF617RkSc1BXr6uB1Ti56tX2g1q/VQR0TmnuZ7S0i4pMR8Z2erqO17vrdUOcYJuoKrwEnRsSuPV1ItS3sefXTwP/KzPfXqp6OdEVPsRHhd8c2w21TW4aJusIGKve0/nzrCa3/e4yIPxWPEyLiwYj4UUQ8HRHfiojTIuLRiHg8It5VtZoPRMT/K+Y7rli+b0RcFhELImJJRJxRtd4HIuIW4PE26jm1WP/SiPh2Me5i4HDgmoi4rNX8e0TEvIhYXCzz3up2FM9PiogZVe29pmS9O0bErIh4MiJuLr5BTkSMKbZZU0T8ouiSg4iYGxHfiIgHgXMj4sio3Efj8Yi4PiK27WD5cyLiN0Vdt27mPX5nRPw8Kvfb+OdiuX+J4t4kxfClEXFOG9t8atGWeyJiZkScX4x/V7HOpmJ7jajahldGxC8j4vfNvz9R8Z2i1p/w144SO71tNtM2dYXM9MefUj/An4C3ACuAnYHzga8W02YAJ1XPWzxOAF4B9gC2Bf4T+Fox7Vzgiqrlf07lH59hVPoxGgB8DriomGdbYCEwtFjvWmBoG3W+g8o3j3ej0vvD/cAJxbS5wNg2lvkn4CvF877ATtXtKJ6fBMzoinqL4TVU+mfqA8ynEnT9gF8CuxXzfYzKN+Sba/9u8XwAlR5o310M/wA4r4Pl/wBsWzx/axvb4JPAC8BAYDtgKTAWGAI8VszTB/gdMLDVsmOBxcVyOwHLgfOLafcBw4rnB1PpdqR5G/64WOd+VLrlBzgRuKd4H95B5ffnpM5uG39q++Nun7pEZv53RPwAOAd4tZOLLciiS+6I+B1wdzH+caD6cNOPMvN1YHlE/B4YARwNjKra69mZyh/vvwCPZuW+EK2NA+Zm5qriNW8GjgDuaK9G4PqodFR4R2Yu7kS7ytb7aGauLGpcTOWP9ivA/sA9xY5KXyp/4Jv9sHgcDjyTmU8XwzcAZwH3trP8EuDmiLiDzW+LezJzdVHTbcDhmXlFRKyOiAOBtwOLmuepcjhwZ2a+Wiz7H8XjjsChwI+LeqASss3uKLbhbyLi7cW4I4CZWelq5A8RcX9VmzuzbVRDhom60hXAY8D/rRq3geJwanG4pn/VtNeqnr9eNfw6m/5utu7zJ6l02312Zv6iekJETKDyn35b2urqu12ZOS8ijqByo6MbI+KyzPxBq5oGtFFfmXqrt8tGKtsigGWZechmSm1ex+ba2N7yx1L5Q308MDUiRmbmhjba0NbwdVT2XP4GuH4zr9uWPsArmTl6M9Ort0H1Otrq/6mz20Y15DkTdZnMfBn4EZWT2c1WAGOK5x+hckhiS50cEX2ich5lb+ApKp0h/kOxx0BEvDsiduhgPY8A74uIXaNysvtU4MH2FoiIwVTuTfHvVHq+PaiY9GJE7BsRfYCP1qjeak8Bu0XEIcXy/SJiZBvzPQkMiYh9iuG/L9rY5vJF/e/MzAeo3MzprcCObaz3qKjcw3w7KncIfLgYfzuVW82OK9rY2kPAhyNiQLE3cixU9mSBZyLi5KKeiIi/7WAbzKPSu27f4pxI895rZ7eNasg9E3W1fwX+sWr434E7I+JRKsfI38x/iU9R+YP4duDMzFwXEddRHLMv9nhW8dfboLYpK/eR+DLwAJX/Zn+amXe2twyVcxgXRMR6KueGPlGM/xJwF5XzE0vZ9A9wl9Tbqva/FIfIroyInal8dq8AlrWab11EfIrK4aNtqBymu6ad5Z8GbirGBfB/MvOVNkp4CLgR2Ae4JTMXVtX1AJW9jDf0dJuZCyJiDvBr4Fkq54rWFJNPA66OiIuo/JNxazHf5twOTKRyGPRpin8EOrttVFv2Gix1oahc1XVXZs7q6Vq6Q7Fn8xhwcmYu38w8O2bmnyJieyp7F5/L4p7w2np4mEvSmxIR+wG/Be7bXJAUri0uJHgMmG2QbJ3cM5EkleaeiSSpNMNEklSaYSJJKs0wkSSVZphIkkr7/40Ia/ObfjDjAAAAAElFTkSuQmCC\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"needs_background": "light" | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"import seaborn as sns\n", | |
"import matplotlib.ticker as mticker\n", | |
"import pandas as pd\n", | |
"from scipy.stats import mannwhitneyu\n", | |
"\n", | |
"superheroin_names = results['Marvel female'] + results['DC female']\n", | |
"superhero_names = results['Marvel'] + results['DC']\n", | |
"superhero_universe = ['Marvel' for _ in results['Marvel']] + ['DC' for _ in results['DC']]\n", | |
"superhero_gender = ['Female' if superhero in superheroin_names else 'Male' for superhero in superhero_names]\n", | |
"\n", | |
"superheroes = pd.DataFrame({'Name': superhero_names, \n", | |
" 'Universe': superhero_universe, \n", | |
" 'Gender': superhero_gender}).sort_values('Name')\n", | |
"\n", | |
"data = superheroes.groupby(['Universe', 'Gender'], as_index=False).size()\n", | |
"data.loc[data['Gender'] == 'Female', 'size'] = -data.loc[data['Gender'] == 'Female', 'size']\n", | |
"ax = sns.barplot(x='size', y='Universe', hue='Gender', data=data, dodge=False, orient='h')\n", | |
"ax.set_xlim(-data['size'].max() * 1.1, data['size'].max() * 1.1)\n", | |
"ticks_loc = ax.get_xticks().tolist()\n", | |
"labels = [str(abs(int(t))) for t in ticks_loc]\n", | |
"ax.xaxis.set_major_locator(mticker.FixedLocator(ticks_loc))\n", | |
"ax.set_xticklabels(labels)\n", | |
"ax.set_ylabel('Universe')\n", | |
"ax.set_xlabel('Number of superheroes by gender')\n", | |
"\n", | |
"stats = mannwhitneyu(superheroes.loc[superheroes['Universe'] == 'Marvel', 'Gender'],\n", | |
" superheroes.loc[superheroes['Universe'] == 'DC', 'Gender'])\n", | |
"print(f'Significant: {stats.pvalue < 0.05}, p-value:{stats.pvalue:0.5f}')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment