Created
May 17, 2023 16:47
-
-
Save ericdill/80a189b09d35f9493ee18f3816204e2e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"id": "5b56c628-06c7-4eed-9fcf-39fb07dae5b4", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"from conda_forge_metadata.artifact_info import get_artifact_info_as_json\n", | |
"import networkx as nx\n", | |
"import graphviz as gv\n", | |
"import pandas as pd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "675170b1-e213-4710-87c8-628904a751d5", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
" " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "b560e622-5547-4f58-89be-08285bfb7e0a", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"rd = requests.get('https://conda.anaconda.org/conda-forge/linux-64/repodata.json').json()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "dc5fb344-2cde-4f6c-93c4-e2c4453918ac", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"dict_keys(['info', 'packages', 'packages.conda', 'removed', 'repodata_version'])" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"rd.keys()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"id": "b185420b-e7c4-4c4d-9c61-8b4f3d559f63", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"pkgs = rd['packages']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "bd272d78-3507-46cd-a814-8fc2f88493ce", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"id": "d13af4cb-d0d2-4637-beb6-898071e16f3c", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"dg = nx.DiGraph()\n", | |
"info = {}\n", | |
"for pkg_name, pkg_info in pkgs.items():\n", | |
" name = pkg_info['name']\n", | |
" dg.add_node(name)\n", | |
" dg.add_node(pkg_name)\n", | |
" md = {'pkg_name': pkg_name, 'python_listed': False}\n", | |
" for depend in pkg_info['depends']:\n", | |
" node = depend\n", | |
" if ' ' in node:\n", | |
" node,_ = depend.split(' ', maxsplit=1)\n", | |
" if 'python' in node:\n", | |
" md['python_listed'] = True\n", | |
" dg.add_edge(name, node)\n", | |
" dg.add_edge(pkg_name, node)\n", | |
" info[pkg_name] = md\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "8ab88063-03e3-4049-a637-b61b491c3876", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"('zziplib-0.13.69-hed695b0_1.tar.bz2',\n", | |
" {'build': 'hed695b0_1',\n", | |
" 'build_number': 1,\n", | |
" 'depends': ['libgcc-ng >=7.3.0', 'zlib >=1.2.11,<1.3.0a0'],\n", | |
" 'license': 'GPL-2.0',\n", | |
" 'license_family': 'GPL',\n", | |
" 'md5': 'a41a0433db5b33992226d2f9c3076e87',\n", | |
" 'name': 'zziplib',\n", | |
" 'sha256': '217ce1b813b1273fa2ff379ef1c1fb8a1d281133174d6f834efff054bc714746',\n", | |
" 'size': 101482,\n", | |
" 'subdir': 'linux-64',\n", | |
" 'timestamp': 1586464758560,\n", | |
" 'version': '0.13.69'})" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"pkg_name, pkg_info" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"id": "3442e5e1-81a3-41cf-a4e1-72006135c620", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0%: 0 / 318667\n", | |
"3%: 10000 / 318667\n", | |
"6%: 20000 / 318667\n", | |
"9%: 30000 / 318667\n", | |
"12%: 40000 / 318667\n", | |
"15%: 50000 / 318667\n", | |
"18%: 60000 / 318667\n", | |
"21%: 70000 / 318667\n", | |
"25%: 80000 / 318667\n", | |
"28%: 90000 / 318667\n", | |
"31%: 100000 / 318667\n", | |
"34%: 110000 / 318667\n", | |
"37%: 120000 / 318667\n", | |
"40%: 130000 / 318667\n", | |
"43%: 140000 / 318667\n", | |
"47%: 150000 / 318667\n", | |
"50%: 160000 / 318667\n", | |
"53%: 170000 / 318667\n", | |
"56%: 180000 / 318667\n", | |
"59%: 190000 / 318667\n", | |
"62%: 200000 / 318667\n", | |
"65%: 210000 / 318667\n", | |
"69%: 220000 / 318667\n", | |
"72%: 230000 / 318667\n", | |
"75%: 240000 / 318667\n", | |
"78%: 250000 / 318667\n", | |
"81%: 260000 / 318667\n", | |
"84%: 270000 / 318667\n", | |
"87%: 280000 / 318667\n", | |
"91%: 290000 / 318667\n", | |
"94%: 300000 / 318667\n", | |
"97%: 310000 / 318667\n" | |
] | |
} | |
], | |
"source": [ | |
"total_pkgs = len(pkgs)\n", | |
"for i, (pkg_name, pkg_info) in enumerate(pkgs.items()):\n", | |
" name = pkg_info['name']\n", | |
" desc = nx.descendants(dg, pkg_name)\n", | |
" python_transitive = False\n", | |
" if 'python' in desc:\n", | |
" python_transitive = True\n", | |
" info[pkg_name]['python_transitive'] = python_transitive\n", | |
" if i % 10000 == 0:\n", | |
" print(f'{int((i / total_pkgs) * 100)}%: {i} / {total_pkgs}')\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "7a3ea99a-2086-4ffe-9a37-69ebdae3c69c", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame(list(info.values()))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "5932242c-dbb7-4ff5-92e1-64ea05c15337", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>pkg_name</th>\n", | |
" <th>python_listed</th>\n", | |
" <th>python_transitive</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>21cmfast-3.0.2-py36h1af98f8_1.tar.bz2</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>21cmfast-3.0.2-py36h2e3f83d_0.tar.bz2</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>21cmfast-3.0.2-py37h48b2cff_0.tar.bz2</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>21cmfast-3.0.2-py37hd45b216_1.tar.bz2</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>21cmfast-3.0.2-py38h9a4a7a8_1.tar.bz2</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>...</th>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>318662</th>\n", | |
" <td>zxpy-1.6.2-py39hf3d152e_1.tar.bz2</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>318663</th>\n", | |
" <td>zxpy-1.6.2-py39hf3d152e_2.tar.bz2</td>\n", | |
" <td>True</td>\n", | |
" <td>True</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>318664</th>\n", | |
" <td>zziplib-0.13.69-h27826a3_1.tar.bz2</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>318665</th>\n", | |
" <td>zziplib-0.13.69-hed695b0_0.tar.bz2</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>318666</th>\n", | |
" <td>zziplib-0.13.69-hed695b0_1.tar.bz2</td>\n", | |
" <td>False</td>\n", | |
" <td>False</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>318667 rows × 3 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" pkg_name python_listed \n", | |
"0 21cmfast-3.0.2-py36h1af98f8_1.tar.bz2 True \\\n", | |
"1 21cmfast-3.0.2-py36h2e3f83d_0.tar.bz2 True \n", | |
"2 21cmfast-3.0.2-py37h48b2cff_0.tar.bz2 True \n", | |
"3 21cmfast-3.0.2-py37hd45b216_1.tar.bz2 True \n", | |
"4 21cmfast-3.0.2-py38h9a4a7a8_1.tar.bz2 True \n", | |
"... ... ... \n", | |
"318662 zxpy-1.6.2-py39hf3d152e_1.tar.bz2 True \n", | |
"318663 zxpy-1.6.2-py39hf3d152e_2.tar.bz2 True \n", | |
"318664 zziplib-0.13.69-h27826a3_1.tar.bz2 False \n", | |
"318665 zziplib-0.13.69-hed695b0_0.tar.bz2 False \n", | |
"318666 zziplib-0.13.69-hed695b0_1.tar.bz2 False \n", | |
"\n", | |
" python_transitive \n", | |
"0 True \n", | |
"1 True \n", | |
"2 True \n", | |
"3 True \n", | |
"4 True \n", | |
"... ... \n", | |
"318662 True \n", | |
"318663 True \n", | |
"318664 False \n", | |
"318665 False \n", | |
"318666 False \n", | |
"\n", | |
"[318667 rows x 3 columns]" | |
] | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"id": "ddff246f-4ef1-417d-a998-74fd676c0896", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"246540" | |
] | |
}, | |
"execution_count": 23, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df['python_listed'].sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"id": "15b16328-5b3c-4173-9be7-1d76b414ea54", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"286367" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df['python_transitive'].sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"id": "7cce1a63-3b9c-491f-b9ee-af810b0fad1f", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"318667" | |
] | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(rd['packages'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"id": "1ef6908f-c52c-41a6-82e7-4df024ee4383", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.13907677909815028" | |
] | |
}, | |
"execution_count": 21, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(df['python_transitive'].sum() - df['python_listed'].sum()) / df['python_transitive'].sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"id": "917901f4-ffc6-4c0b-a005-54928c8239a5", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"0.12497999479080042" | |
] | |
}, | |
"execution_count": 20, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"(df['python_transitive'].sum() - df['python_listed'].sum()) / len(rd['packages'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "489fbb9a-73f9-4fcf-a0ed-7abe965b5f53", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df['tr'].sum()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 75, | |
"id": "6f4645df-7d0c-4dfa-8bfd-73419d8a6239", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'_openmp_mutex',\n", | |
" 'astropy',\n", | |
" 'cached-property',\n", | |
" 'cffi',\n", | |
" 'click',\n", | |
" 'fftw',\n", | |
" 'gsl',\n", | |
" 'h5py',\n", | |
" 'libblas',\n", | |
" 'libgcc-ng',\n", | |
" 'matplotlib-base',\n", | |
" 'numpy',\n", | |
" 'python',\n", | |
" 'python_abi',\n", | |
" 'pyyaml',\n", | |
" 'scipy'}" | |
] | |
}, | |
"execution_count": 75, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nx.descendants(dg, name)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 77, | |
"id": "24a775c2-0925-42ab-bd4a-d576a8674fe9", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"set()" | |
] | |
}, | |
"execution_count": 77, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"nx.descendants(dg, 'scipy')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 62, | |
"id": "84ae9f70-e797-475f-95b0-4e008179a85c", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['_openmp_mutex',\n", | |
" 'astropy',\n", | |
" 'cached-property',\n", | |
" 'cffi',\n", | |
" 'click',\n", | |
" 'fftw',\n", | |
" 'gsl',\n", | |
" 'h5py',\n", | |
" 'libblas',\n", | |
" 'libgcc-ng',\n", | |
" 'matplotlib-base',\n", | |
" 'numpy',\n", | |
" 'python',\n", | |
" 'python_abi',\n", | |
" 'pyyaml',\n", | |
" 'scipy',\n", | |
" 'nomkl',\n", | |
" 'bidict',\n", | |
" 'psutil',\n", | |
" 'setuptools_scm']" | |
] | |
}, | |
"execution_count": 62, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"list(dg.successors(name))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 60, | |
"id": "08750ee2-cc46-4827-9775-14b88064b3b3", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['_openmp_mutex',\n", | |
" 'astropy',\n", | |
" 'cached-property',\n", | |
" 'cffi',\n", | |
" 'click',\n", | |
" 'fftw',\n", | |
" 'gsl',\n", | |
" 'h5py',\n", | |
" 'libblas',\n", | |
" 'libgcc-ng',\n", | |
" 'matplotlib-base',\n", | |
" 'numpy',\n", | |
" 'python',\n", | |
" 'python_abi',\n", | |
" 'pyyaml',\n", | |
" 'scipy',\n", | |
" 'nomkl',\n", | |
" 'bidict',\n", | |
" 'psutil',\n", | |
" 'setuptools_scm']" | |
] | |
}, | |
"execution_count": 60, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"dg.neighbors(name)\n", | |
"subgraph = dg.subgraph(name)\n", | |
"render(subgraph, name)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 57, | |
"id": "214c0fc7-779d-4709-9878-2a0b71bf6dd7", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"for pkg_name, pkg_info in pkgs.items():\n", | |
" name = pkg_info['name']\n", | |
" subgraph = dg.subgraph(name)\n", | |
" render(subgraph, name)\n", | |
" break" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "da1a01eb-3462-4796-a847-9e133acc25c6", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"from conda_forge_metadata import libcfgraph" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"id": "b9565f52-750a-4a82-bd02-bbdda7abedb3", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [ | |
"index = libcfgraph.get_libcfgraph_index()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "6ec20909-789f-408c-a542-fd7959d187d9", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"'artifacts/21cmfast/conda-forge/linux-64/21cmfast-3.1.2-py38ha5b31ff_2.json'" | |
] | |
}, | |
"execution_count": 13, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"index[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "a91023d9-1f1d-48d6-8d2b-3486268b93d9", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"id": "eeb6eb77-8a86-4992-a71e-f690aedcb0c2", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [], | |
"source": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "6f8e4485-f8d6-4019-9159-a9a59e9f1cf7", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [conda env:cf-metadata]", | |
"language": "python", | |
"name": "conda-env-cf-metadata-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.11.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment