Created
December 25, 2021 21:03
-
-
Save vol1ura/0e2f8aa924753c763a2c0bb868705a8e to your computer and use it in GitHub Desktop.
clubmates_engirunners.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "clubmates_engirunners.ipynb", | |
"provenance": [], | |
"authorship_tag": "ABX9TyOWSRcN76BxAOsrr77rG3Hz", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
}, | |
"widgets": { | |
"application/vnd.jupyter.widget-state+json": { | |
"4a3d8cc726d04888a03aa610ceaced1e": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HBoxModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HBoxView", | |
"_dom_classes": [], | |
"_model_name": "HBoxModel", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"box_style": "", | |
"layout": "IPY_MODEL_820063756eb54fc8a39a84db865d7ea4", | |
"_model_module": "@jupyter-widgets/controls", | |
"children": [ | |
"IPY_MODEL_8c8627c52d344f8cac5b209f2eaa56f9", | |
"IPY_MODEL_294f52e6617b4f3b922bd647419e6cd2", | |
"IPY_MODEL_4d869a25e2f54da2a28c926279e511db" | |
] | |
} | |
}, | |
"820063756eb54fc8a39a84db865d7ea4": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"8c8627c52d344f8cac5b209f2eaa56f9": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_770911812c774074a283d41798d1ff5b", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": "100%", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_be94329f0da54421a3f16b25bbc07548" | |
} | |
}, | |
"294f52e6617b4f3b922bd647419e6cd2": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "FloatProgressModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "ProgressView", | |
"style": "IPY_MODEL_cc345771c1374fc5a18e67aad15358fb", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "FloatProgressModel", | |
"bar_style": "success", | |
"max": 108, | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": 108, | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"orientation": "horizontal", | |
"min": 0, | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_a2238e026f2748748920d7dcadf8f93a" | |
} | |
}, | |
"4d869a25e2f54da2a28c926279e511db": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "HTMLModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "HTMLView", | |
"style": "IPY_MODEL_139c00373a514ceb9edbc7710431483b", | |
"_dom_classes": [], | |
"description": "", | |
"_model_name": "HTMLModel", | |
"placeholder": "", | |
"_view_module": "@jupyter-widgets/controls", | |
"_model_module_version": "1.5.0", | |
"value": " 108/108 [17:16<00:00, 8.81s/it]", | |
"_view_count": null, | |
"_view_module_version": "1.5.0", | |
"description_tooltip": null, | |
"_model_module": "@jupyter-widgets/controls", | |
"layout": "IPY_MODEL_2c502534ff18428187f87629a87bfc8d" | |
} | |
}, | |
"770911812c774074a283d41798d1ff5b": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"be94329f0da54421a3f16b25bbc07548": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"cc345771c1374fc5a18e67aad15358fb": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "ProgressStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "ProgressStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"bar_color": null, | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"a2238e026f2748748920d7dcadf8f93a": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
}, | |
"139c00373a514ceb9edbc7710431483b": { | |
"model_module": "@jupyter-widgets/controls", | |
"model_name": "DescriptionStyleModel", | |
"model_module_version": "1.5.0", | |
"state": { | |
"_view_name": "StyleView", | |
"_model_name": "DescriptionStyleModel", | |
"description_width": "", | |
"_view_module": "@jupyter-widgets/base", | |
"_model_module_version": "1.5.0", | |
"_view_count": null, | |
"_view_module_version": "1.2.0", | |
"_model_module": "@jupyter-widgets/controls" | |
} | |
}, | |
"2c502534ff18428187f87629a87bfc8d": { | |
"model_module": "@jupyter-widgets/base", | |
"model_name": "LayoutModel", | |
"model_module_version": "1.2.0", | |
"state": { | |
"_view_name": "LayoutView", | |
"grid_template_rows": null, | |
"right": null, | |
"justify_content": null, | |
"_view_module": "@jupyter-widgets/base", | |
"overflow": null, | |
"_model_module_version": "1.2.0", | |
"_view_count": null, | |
"flex_flow": null, | |
"width": null, | |
"min_width": null, | |
"border": null, | |
"align_items": null, | |
"bottom": null, | |
"_model_module": "@jupyter-widgets/base", | |
"top": null, | |
"grid_column": null, | |
"overflow_y": null, | |
"overflow_x": null, | |
"grid_auto_flow": null, | |
"grid_area": null, | |
"grid_template_columns": null, | |
"flex": null, | |
"_model_name": "LayoutModel", | |
"justify_items": null, | |
"grid_row": null, | |
"max_height": null, | |
"align_content": null, | |
"visibility": null, | |
"align_self": null, | |
"height": null, | |
"min_height": null, | |
"padding": null, | |
"grid_auto_rows": null, | |
"grid_gap": null, | |
"max_width": null, | |
"order": null, | |
"_view_module_version": "1.2.0", | |
"grid_template_areas": null, | |
"object_position": null, | |
"object_fit": null, | |
"grid_auto_columns": null, | |
"margin": null, | |
"display": null, | |
"left": null | |
} | |
} | |
} | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/vol1ura/0e2f8aa924753c763a2c0bb868705a8e/clubmates.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"id": "zRg0J0r6ztxV" | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import random\n", | |
"import re\n", | |
"import requests\n", | |
"import time\n", | |
"from tqdm.notebook import tqdm\n", | |
"\n", | |
"pd.set_option('display.max_rows', None)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"club_id = 24630 # ENGIRUNNERS" | |
], | |
"metadata": { | |
"id": "clo0v2xzzyLU" | |
}, | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"headers = {\n", | |
" 'Host': 'www.parkrun.ru',\n", | |
" 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:88.0) Gecko/20100101 Firefox/88.0',\n", | |
" 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',\n", | |
" 'Accept-Language': 'ru-RU,ru;q=0.8,en-US;q=0.5,en;q=0.3',\n", | |
" 'Accept-Encoding': 'gzip, deflate, br',\n", | |
" 'Connection': 'keep-alive',\n", | |
" 'Upgrade-Insecure-Requests': '1',\n", | |
" 'Sec-GPC': '1',\n", | |
" 'TE': 'Trailers'\n", | |
" }" | |
], | |
"metadata": { | |
"id": "JMntqd19z4bw" | |
}, | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"page_all_results = requests.get('https://www.parkrun.ru/results/courserecords/', headers=headers)\n", | |
"data = pd.read_html(page_all_results.text)[0]\n", | |
"russian_parkruns = data[data.columns[0]]" | |
], | |
"metadata": { | |
"id": "ctIMuJuVz7T9" | |
}, | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"dfs = []\n", | |
"for parkrun in tqdm(russian_parkruns):\n", | |
" time.sleep(3 + 5*random.random())\n", | |
" parkrun_trim = re.sub(r'[\\s-]', '', parkrun)\n", | |
" url = f'https://www.parkrun.ru/{parkrun_trim}/results/clubhistory/?clubNum={club_id}'\n", | |
" club_results = requests.get(url, headers=headers)\n", | |
" try:\n", | |
" df = pd.read_html(club_results.text)[0]\n", | |
" dfs.append(df[df.columns[0]])\n", | |
" except:\n", | |
" print('ОШИБКА - операция завершилась досрочно. Паркран временно заблокировал IP.')\n", | |
" break" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 49, | |
"referenced_widgets": [ | |
"4a3d8cc726d04888a03aa610ceaced1e", | |
"820063756eb54fc8a39a84db865d7ea4", | |
"8c8627c52d344f8cac5b209f2eaa56f9", | |
"294f52e6617b4f3b922bd647419e6cd2", | |
"4d869a25e2f54da2a28c926279e511db", | |
"770911812c774074a283d41798d1ff5b", | |
"be94329f0da54421a3f16b25bbc07548", | |
"cc345771c1374fc5a18e67aad15358fb", | |
"a2238e026f2748748920d7dcadf8f93a", | |
"139c00373a514ceb9edbc7710431483b", | |
"2c502534ff18428187f87629a87bfc8d" | |
] | |
}, | |
"id": "pB86PSu40JiI", | |
"outputId": "c8369b97-53c8-4b8b-b0aa-3a195e802fc3" | |
}, | |
"execution_count": 5, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"application/vnd.jupyter.widget-view+json": { | |
"model_id": "4a3d8cc726d04888a03aa610ceaced1e", | |
"version_minor": 0, | |
"version_major": 2 | |
}, | |
"text/plain": [ | |
" 0%| | 0/108 [00:00<?, ?it/s]" | |
] | |
}, | |
"metadata": {} | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"def last_name_first(full_name: str):\n", | |
" names = full_name.split()\n", | |
" last_name = names.pop(1).capitalize()\n", | |
" names.insert(0, last_name)\n", | |
" return ' '.join(names)" | |
], | |
"metadata": { | |
"id": "JGv4TY_5LOho" | |
}, | |
"execution_count": 6, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"pd.concat(dfs).drop_duplicates(keep='last').apply(last_name_first).sort_values().reset_index(drop=True).shift(1, fill_value='________Фамилия_Имя___')" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "u-ij29bnAOaq", | |
"outputId": "1462dc7f-bf69-4293-9944-84c7dee29dd2" | |
}, | |
"execution_count": 7, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"0 ________Фамилия_Имя___\n", | |
"1 Kondratenko Gleb\n", | |
"2 Martynov Dmitry\n", | |
"3 Smirnov Kirill\n", | |
"4 Акинин Алексей\n", | |
"5 Бакланов Михаил\n", | |
"6 Ванаг Константин\n", | |
"7 Дмитренко Наталья\n", | |
"8 Зенов Роман\n", | |
"9 Иванов Сергей\n", | |
"10 Колокольников Алексей\n", | |
"11 Комаров Алексей\n", | |
"12 Кравцов Даниил\n", | |
"13 Лавлинская Дарья\n", | |
"14 Мартынова Диана\n", | |
"15 Маяков Денис\n", | |
"16 Нестеров Алексей\n", | |
"17 Осипов Федор\n", | |
"18 Петров Дмитрий\n", | |
"19 Пьянов Артем\n", | |
"20 Самошин Илья\n", | |
"21 Симонов Юрий\n", | |
"22 Сосновский Виктор\n", | |
"23 Телепень Николай\n", | |
"24 Титаренко Наталья\n", | |
"25 Ходакова Людмила\n", | |
"26 Шемякина Надежда\n", | |
"27 Щукина Ольга\n", | |
"Name: (Unnamed: 0_level_0, Участник), dtype: object" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 7 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"" | |
], | |
"metadata": { | |
"id": "526myT8CFL74" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment