Created
October 3, 2020 13:44
-
-
Save immuntasir/9e8f1eb5023a1d4aee84eb869f94e497 to your computer and use it in GitHub Desktop.
Exploring import statements that use numpy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import requests\n", | |
"from tqdm import tqdm\n", | |
"import pandas as pd\n", | |
"import time" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with open('../../api_keys/github.txt', \"r\") as f:\n", | |
" API_KEY = f.read()\n", | |
" \n", | |
"headers = {'Authorization': 'token %s' % API_KEY}\n", | |
"\n", | |
"LIBRARY = 'numpy'\n", | |
"LANGUAGE = 'python'\n", | |
"\n", | |
"URL = 'https://api.github.com/search/repositories?q=%s+language:%s&sort=stars&order=desc&page=' % (LIBRARY, LANGUAGE)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"dict_keys(['total_count', 'incomplete_results', 'items'])\n", | |
"Total Repositories: 10853\n", | |
"Total number of items in a page: 30\n", | |
"Keys in a item: dict_keys(['id', 'node_id', 'name', 'full_name', 'private', 'owner', 'html_url', 'description', 'fork', 'url', 'forks_url', 'keys_url', 'collaborators_url', 'teams_url', 'hooks_url', 'issue_events_url', 'events_url', 'assignees_url', 'branches_url', 'tags_url', 'blobs_url', 'git_tags_url', 'git_refs_url', 'trees_url', 'statuses_url', 'languages_url', 'stargazers_url', 'contributors_url', 'subscribers_url', 'subscription_url', 'commits_url', 'git_commits_url', 'comments_url', 'issue_comment_url', 'contents_url', 'compare_url', 'merges_url', 'archive_url', 'downloads_url', 'issues_url', 'pulls_url', 'milestones_url', 'notifications_url', 'labels_url', 'releases_url', 'deployments_url', 'created_at', 'updated_at', 'pushed_at', 'git_url', 'ssh_url', 'clone_url', 'svn_url', 'homepage', 'size', 'stargazers_count', 'watchers_count', 'language', 'has_issues', 'has_projects', 'has_downloads', 'has_wiki', 'has_pages', 'forks_count', 'mirror_url', 'archived', 'disabled', 'open_issues_count', 'license', 'forks', 'open_issues', 'watchers', 'default_branch', 'permissions', 'score'])\n" | |
] | |
} | |
], | |
"source": [ | |
"r = requests.get(URL + '1', headers=headers)\n", | |
"json_response = r.json()\n", | |
"\n", | |
"print(json_response.keys())\n", | |
"print('Total Repositories:', json_response['total_count'])\n", | |
"print('Total number of items in a page:', len(json_response['items']))\n", | |
"\n", | |
"print('Keys in a item:', json_response['items'][0].keys())" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 35/35 [08:17<00:00, 14.21s/it]\n" | |
] | |
} | |
], | |
"source": [ | |
"keys = ['name', 'full_name', 'html_url', 'clone_url', 'size', 'stargazers_count']\n", | |
"NUMBER_OF_PAGES_TO_ITERATE = 35\n", | |
"\n", | |
"repo_dict = dict([(key, []) for key in keys])\n", | |
"\n", | |
"for page_num in tqdm(range(0, 35)):\n", | |
" r = requests.get(URL + str(page_num))\n", | |
" contents = r.json()\n", | |
" \n", | |
" for item in contents['items']:\n", | |
" for key in keys:\n", | |
" repo_dict[key].append(item[key])\n", | |
" \n", | |
" if page_num % 5 == 0:\n", | |
" time.sleep(60)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>name</th>\n", | |
" <th>full_name</th>\n", | |
" <th>html_url</th>\n", | |
" <th>clone_url</th>\n", | |
" <th>size</th>\n", | |
" <th>stargazers_count</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>data-science-ipython-notebooks</td>\n", | |
" <td>donnemartin/data-science-ipython-notebooks</td>\n", | |
" <td>https://github.com/donnemartin/data-science-ip...</td>\n", | |
" <td>https://github.com/donnemartin/data-science-ip...</td>\n", | |
" <td>49025</td>\n", | |
" <td>19568</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>ML-From-Scratch</td>\n", | |
" <td>eriklindernoren/ML-From-Scratch</td>\n", | |
" <td>https://github.com/eriklindernoren/ML-From-Scr...</td>\n", | |
" <td>https://github.com/eriklindernoren/ML-From-Scr...</td>\n", | |
" <td>553</td>\n", | |
" <td>16849</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>numpy</td>\n", | |
" <td>numpy/numpy</td>\n", | |
" <td>https://github.com/numpy/numpy</td>\n", | |
" <td>https://github.com/numpy/numpy.git</td>\n", | |
" <td>84293</td>\n", | |
" <td>15014</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>jax</td>\n", | |
" <td>google/jax</td>\n", | |
" <td>https://github.com/google/jax</td>\n", | |
" <td>https://github.com/google/jax.git</td>\n", | |
" <td>28075</td>\n", | |
" <td>9795</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>numpy-ml</td>\n", | |
" <td>ddbourgin/numpy-ml</td>\n", | |
" <td>https://github.com/ddbourgin/numpy-ml</td>\n", | |
" <td>https://github.com/ddbourgin/numpy-ml.git</td>\n", | |
" <td>10416</td>\n", | |
" <td>8963</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" name full_name \\\n", | |
"0 data-science-ipython-notebooks donnemartin/data-science-ipython-notebooks \n", | |
"1 ML-From-Scratch eriklindernoren/ML-From-Scratch \n", | |
"2 numpy numpy/numpy \n", | |
"3 jax google/jax \n", | |
"4 numpy-ml ddbourgin/numpy-ml \n", | |
"\n", | |
" html_url \\\n", | |
"0 https://github.com/donnemartin/data-science-ip... \n", | |
"1 https://github.com/eriklindernoren/ML-From-Scr... \n", | |
"2 https://github.com/numpy/numpy \n", | |
"3 https://github.com/google/jax \n", | |
"4 https://github.com/ddbourgin/numpy-ml \n", | |
"\n", | |
" clone_url size stargazers_count \n", | |
"0 https://github.com/donnemartin/data-science-ip... 49025 19568 \n", | |
"1 https://github.com/eriklindernoren/ML-From-Scr... 553 16849 \n", | |
"2 https://github.com/numpy/numpy.git 84293 15014 \n", | |
"3 https://github.com/google/jax.git 28075 9795 \n", | |
"4 https://github.com/ddbourgin/numpy-ml.git 10416 8963 " | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"repo_df = pd.DataFrame(repo_dict)\n", | |
"repo_df.to_csv('../../data/package_popularity/numpy/repo_info.csv', index=None)\n", | |
"\n", | |
"repo_df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Number of repositories: 1050\n" | |
] | |
} | |
], | |
"source": [ | |
"print('Number of repositories:', len(repo_df))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment