-
-
Save CJ-Wright/278f0c4a050555d796e8b1e0c065aa5b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from asyncio import get_event_loop, Semaphore\n", | |
"import aiohttp\n", | |
"loop = get_event_loop()\n", | |
"import asyncio" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import tqdm" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"TRAVIS_AUTH_TOKEN = 'TRAVIS_AUTH_TOKEN'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"headers={'Travis-API-Version': '3', 'Authorization': f'token {TRAVIS_AUTH_TOKEN}'}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"all_repos = []\n", | |
"\n", | |
"async def list_repos(session, url):\n", | |
" response = await session.get(url, headers=headers)\n", | |
" print(url)\n", | |
" try:\n", | |
" data = await response.json()\n", | |
" except:\n", | |
" print(await respsonse.content())\n", | |
" raise\n", | |
" all_repos.append(data)\n", | |
" num_rows = data['@pagination']['count']\n", | |
" \n", | |
" list_repos = []\n", | |
" progress = tqdm.tqdm(total=data['@pagination']['count'] // 100)\n", | |
" if 'next' in data['@pagination']:\n", | |
" for offset in range(100, num_rows, 100):\n", | |
" list_repos.append(list_repo_chunk(session, offset, progress))\n", | |
" all_repos.extend(await asyncio.gather(*list_repos))\n", | |
" progress.close()\n", | |
" \n", | |
" #async with Semaphore(50) as sem:\n", | |
" listed_builds = []\n", | |
" progress = tqdm.tqdm(total=data['@pagination']['count'])\n", | |
" for data in all_repos:\n", | |
" for repo in data['repositories']:\n", | |
" listed_builds.append(list_builds(session, repo, progress))\n", | |
" await asyncio.gather(*listed_builds)\n", | |
" progress.close()\n", | |
" \n", | |
"async def list_repo_chunk(session, offset, progress):\n", | |
" url = f'https://api.travis-ci.org/owner/conda-forge/repos?limit=100&offset={offset}&sort_by=active%2Cname'\n", | |
" response = await session.get(url)\n", | |
" r = await response.json() \n", | |
" progress.update()\n", | |
" return r\n", | |
" \n", | |
"async def list_builds(session, repo, progress):\n", | |
" url_repo = f'https://api.travis-ci.org/repo/{repo[\"id\"]}/builds?state=created'\n", | |
" data_build = await session.get(url_repo)\n", | |
" builds = await data_build.json()\n", | |
" progress.update()\n", | |
" repo_name = repo['name']\n", | |
" if len(builds['builds']) > 0:\n", | |
" all_builds[repo_name] = builds['builds']\n", | |
" elif repo_name in all_builds:\n", | |
" del all_builds[repo_name]\n", | |
"\n", | |
"async def outer(firsturl):\n", | |
" conn = aiohttp.TCPConnector(limit=20)\n", | |
" timeout = aiohttp.ClientTimeout(total=60 * 10)\n", | |
" async with aiohttp.ClientSession(connector=conn, headers=headers, timeout=timeout) as session:\n", | |
" await list_repos(session, url)\n", | |
" \n", | |
" \n", | |
"url = 'https://api.travis-ci.org/owner/conda-forge/repos?sort_by=active,name&limit=100'\n", | |
"all_builds = {}\n", | |
"out = loop.run_until_complete(outer(url))\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"len(all_builds)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def flatten():\n", | |
" for k, v in all_builds.items():\n", | |
" for i, vv in enumerate(v):\n", | |
" yield dict(REPO=k, INDEX=i, **vv)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame(list(flatten()))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df2 = df.set_index(['REPO', 'INDEX'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pd.to_datetime(df2.updated_at).dt.date.value_counts().sort_index()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"pd.to_datetime(df2.updated_at).dt.date.value_counts().sort_index()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df2.sort_index(level=[1,0], ascending=False)" | |
] | |
}, | |
{ | |
"cell_type": "raw", | |
"metadata": {}, | |
"source": [ | |
"df2[df2.updated_at > '2017-04-23'].sort_values('updated_at')" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "py36", | |
"language": "python", | |
"name": "py36" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment