Created
November 8, 2018 16:36
-
-
Save ischurov/fa28afb920c5d0297b0016f6960dcc42 to your computer and use it in GitHub Desktop.
scrap FIFA rating.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "import pandas as pd", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df = pd.read_html(\"http://www.championat.com/football/fifa/2013/12\", \n header=0, thousands=\" \")[0].rename(columns={'Unnamed: 3': 'team',\n 'Очки': 'score'})[['team', 'score']]", | |
"execution_count": 33, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "df", | |
"execution_count": 34, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 34, | |
"data": { | |
"text/plain": " team score\n0 Испания 1507\n1 Германия 1318\n2 Аргентина 1251\n3 Колумбия 1200\n4 Португалия 1172\n5 Уругвай 1132\n6 Италия 1120\n7 Швейцария 1113\n8 Нидерланды 1106\n9 Бразилия 1102\n10 Бельгия 1098\n11 Греция 1055\n12 Англия 1041\n13 США 1019\n14 Чили 1014\n15 Хорватия 971\n16 Кот-д'Ивуар 918\n17 Украина 907\n18 Босния и Герцеговина 899\n19 Франция 893\n20 Мексика 892\n21 Россия 870\n22 Эквадор 852\n23 Гана 849\n24 Дания 831\n25 Алжир 800\n26 Швеция 793\n27 Чехия 766\n28 Словения 762\n29 Сербия 752\n.. ... ...\n179 Вануату 53\n180 Монголия 49\n181 Фиджи 47\n182 Самоа 45\n183 Гвинея-Биссау 42\n184 Багамские острова 40\n185 Свазиленд 37\n186 Монтсеррат 33\n187 Мадагаскар 33\n188 Камбоджа 28\n189 Бруней 26\n190 Тонга 26\n191 Восточный Тимор 26\n192 Американские Виргинские острова 23\n193 Папуа - Новая Гвинея 21\n194 Каймановы острова 21\n195 Американское Самоа 18\n196 Британские Виргинские острова 18\n197 Андорра 17\n198 Коморские острова 17\n199 Эритрея 11\n200 Южный Судан 10\n201 Макао 10\n202 Сомали 8\n203 Джибути 6\n204 Острова Кука 5\n205 Ангилья 3\n206 Бутан 0\n207 Тёркс и Кайкос 0\n208 Сан-Марино 0\n\n[209 rows x 2 columns]", | |
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>team</th>\n <th>score</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>Испания</td>\n <td>1507</td>\n </tr>\n <tr>\n <th>1</th>\n <td>Германия</td>\n <td>1318</td>\n </tr>\n <tr>\n <th>2</th>\n <td>Аргентина</td>\n <td>1251</td>\n </tr>\n <tr>\n <th>3</th>\n <td>Колумбия</td>\n <td>1200</td>\n </tr>\n <tr>\n <th>4</th>\n <td>Португалия</td>\n <td>1172</td>\n </tr>\n <tr>\n <th>5</th>\n <td>Уругвай</td>\n <td>1132</td>\n </tr>\n <tr>\n <th>6</th>\n <td>Италия</td>\n <td>1120</td>\n </tr>\n <tr>\n <th>7</th>\n <td>Швейцария</td>\n <td>1113</td>\n </tr>\n <tr>\n <th>8</th>\n <td>Нидерланды</td>\n <td>1106</td>\n </tr>\n <tr>\n <th>9</th>\n <td>Бразилия</td>\n <td>1102</td>\n </tr>\n <tr>\n <th>10</th>\n <td>Бельгия</td>\n <td>1098</td>\n </tr>\n <tr>\n <th>11</th>\n <td>Греция</td>\n <td>1055</td>\n </tr>\n <tr>\n <th>12</th>\n <td>Англия</td>\n <td>1041</td>\n </tr>\n <tr>\n <th>13</th>\n <td>США</td>\n <td>1019</td>\n </tr>\n <tr>\n <th>14</th>\n <td>Чили</td>\n <td>1014</td>\n </tr>\n <tr>\n <th>15</th>\n <td>Хорватия</td>\n <td>971</td>\n </tr>\n <tr>\n <th>16</th>\n <td>Кот-д'Ивуар</td>\n <td>918</td>\n </tr>\n <tr>\n <th>17</th>\n <td>Украина</td>\n <td>907</td>\n </tr>\n <tr>\n <th>18</th>\n <td>Босния и Герцеговина</td>\n <td>899</td>\n </tr>\n <tr>\n <th>19</th>\n <td>Франция</td>\n <td>893</td>\n </tr>\n <tr>\n <th>20</th>\n <td>Мексика</td>\n <td>892</td>\n </tr>\n <tr>\n <th>21</th>\n <td>Россия</td>\n <td>870</td>\n </tr>\n <tr>\n <th>22</th>\n <td>Эквадор</td>\n <td>852</td>\n </tr>\n <tr>\n <th>23</th>\n <td>Гана</td>\n <td>849</td>\n </tr>\n <tr>\n <th>24</th>\n <td>Дания</td>\n <td>831</td>\n </tr>\n <tr>\n <th>25</th>\n <td>Алжир</td>\n <td>800</td>\n </tr>\n <tr>\n <th>26</th>\n <td>Швеция</td>\n <td>793</td>\n </tr>\n <tr>\n <th>27</th>\n <td>Чехия</td>\n <td>766</td>\n </tr>\n <tr>\n <th>28</th>\n <td>Словения</td>\n <td>762</td>\n </tr>\n <tr>\n <th>29</th>\n <td>Сербия</td>\n <td>752</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>179</th>\n <td>Вануату</td>\n <td>53</td>\n </tr>\n <tr>\n <th>180</th>\n <td>Монголия</td>\n <td>49</td>\n </tr>\n <tr>\n <th>181</th>\n <td>Фиджи</td>\n <td>47</td>\n </tr>\n <tr>\n <th>182</th>\n <td>Самоа</td>\n <td>45</td>\n </tr>\n <tr>\n <th>183</th>\n <td>Гвинея-Биссау</td>\n <td>42</td>\n </tr>\n <tr>\n <th>184</th>\n <td>Багамские острова</td>\n <td>40</td>\n </tr>\n <tr>\n <th>185</th>\n <td>Свазиленд</td>\n <td>37</td>\n </tr>\n <tr>\n <th>186</th>\n <td>Монтсеррат</td>\n <td>33</td>\n </tr>\n <tr>\n <th>187</th>\n <td>Мадагаскар</td>\n <td>33</td>\n </tr>\n <tr>\n <th>188</th>\n <td>Камбоджа</td>\n <td>28</td>\n </tr>\n <tr>\n <th>189</th>\n <td>Бруней</td>\n <td>26</td>\n </tr>\n <tr>\n <th>190</th>\n <td>Тонга</td>\n <td>26</td>\n </tr>\n <tr>\n <th>191</th>\n <td>Восточный Тимор</td>\n <td>26</td>\n </tr>\n <tr>\n <th>192</th>\n <td>Американские Виргинские острова</td>\n <td>23</td>\n </tr>\n <tr>\n <th>193</th>\n <td>Папуа - Новая Гвинея</td>\n <td>21</td>\n </tr>\n <tr>\n <th>194</th>\n <td>Каймановы острова</td>\n <td>21</td>\n </tr>\n <tr>\n <th>195</th>\n <td>Американское Самоа</td>\n <td>18</td>\n </tr>\n <tr>\n <th>196</th>\n <td>Британские Виргинские острова</td>\n <td>18</td>\n </tr>\n <tr>\n <th>197</th>\n <td>Андорра</td>\n <td>17</td>\n </tr>\n <tr>\n <th>198</th>\n <td>Коморские острова</td>\n <td>17</td>\n </tr>\n <tr>\n <th>199</th>\n <td>Эритрея</td>\n <td>11</td>\n </tr>\n <tr>\n <th>200</th>\n <td>Южный Судан</td>\n <td>10</td>\n </tr>\n <tr>\n <th>201</th>\n <td>Макао</td>\n <td>10</td>\n </tr>\n <tr>\n <th>202</th>\n <td>Сомали</td>\n <td>8</td>\n </tr>\n <tr>\n <th>203</th>\n <td>Джибути</td>\n <td>6</td>\n </tr>\n <tr>\n <th>204</th>\n <td>Острова Кука</td>\n <td>5</td>\n </tr>\n <tr>\n <th>205</th>\n <td>Ангилья</td>\n <td>3</td>\n </tr>\n <tr>\n <th>206</th>\n <td>Бутан</td>\n <td>0</td>\n </tr>\n <tr>\n <th>207</th>\n <td>Тёркс и Кайкос</td>\n <td>0</td>\n </tr>\n <tr>\n <th>208</th>\n <td>Сан-Марино</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n<p>209 rows × 2 columns</p>\n</div>" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "def get_score(year, month, team):\n df = (pd.read_html(f\"http://www.championat.com/football/fifa/{year}/{month}\", \n header=0, thousands=\" \")[0]\n .rename(columns={'Unnamed: 3': 'team', 'Очки': 'score'})\n [['team', 'score']])\n return int(df.loc[df['team'] == team, 'score'])", | |
"execution_count": 31, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "queries = [(2013, 9, 'Нидерланды'), (2014, 10, 'Германия')]\nresults = []\nfor year, month, team in queries:\n results.append(get_score(year, month, team))", | |
"execution_count": 38, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "results", | |
"execution_count": 39, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 39, | |
"data": { | |
"text/plain": "[1058, 1669]" | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.7.0", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "scrap FIFA rating.ipynb", | |
"public": true | |
} | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment