Last active
February 7, 2021 13:04
-
-
Save verhovsky/52e6c344a952d467b265c28acc905f56 to your computer and use it in GitHub Desktop.
Which modules are imported in the Python standard library most often?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# This script scans all .py files in cpython/Lib, finds all import statements | |
# in them and counts out what is imported most often in each top directory | |
# or file in Lib/ (meaning if some library is imported many times in various | |
# files in e.g. Lib/http/ , it will only be counted once). | |
# | |
# Clone the CPython repo then put this file in the top level cpython/ | |
# directory and run it: | |
# | |
# cd /tmp | |
# git clone https://github.com/python/cpython/ | |
# cd cpython | |
# # <download find_improts.py to this directory> | |
# python3 find_imports.py | |
from pathlib import Path | |
import ast | |
import itertools | |
from operator import itemgetter | |
import json | |
from collections import Counter | |
CPYTHON_LIB = Path("Lib") | |
imported = {} | |
for p in CPYTHON_LIB.rglob("*.py"): | |
if "test" in p.parts or "tests" in p.parts or p.name.startswith("test_"): | |
continue | |
lib = p.relative_to(CPYTHON_LIB).parts[0].removesuffix(".py") | |
with open(p) as f: | |
parsed = ast.parse(f.read()) | |
imports = [] | |
for node in ast.walk(parsed): | |
if isinstance(node, ast.Import): | |
for alias in node.names: | |
imports.append(alias.name) | |
if isinstance(node, ast.ImportFrom): | |
if node.level == 0: # Don't include relative imports | |
imports.append(node.module) | |
# Count "collections.abc" as "collections" | |
imports = [i.split(".")[0] for i in imports] | |
# If the library is a directory with multiple files, imported might | |
# already contain something. | |
new_imports = set(imported.get(lib, [])) | set(imports) | |
# Don't count self imports | |
imported[lib] = sorted(new_imports - {lib, "__main__"}) | |
# print(json.dumps(imported, indent=4)) | |
most_imported = dict(Counter(itertools.chain.from_iterable(imported.values()))) | |
for lib, count in sorted(most_imported.items(), key=itemgetter(1, 0)): | |
print(lib, count) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Mailman 1 | |
_abc 1 | |
_ast 1 | |
_asyncio 1 | |
_bisect 1 | |
_blake2 1 | |
_bootsubprocess 1 | |
_bz2 1 | |
_codecs 1 | |
_codecs_cn 1 | |
_codecs_hk 1 | |
_codecs_iso2022 1 | |
_codecs_jp 1 | |
_codecs_kr 1 | |
_codecs_tw 1 | |
_compat_pickle 1 | |
_contextvars 1 | |
_crypt 1 | |
_csv 1 | |
_ctypes 1 | |
_curses 1 | |
_curses_panel 1 | |
_datetime 1 | |
_dbm 1 | |
_decimal 1 | |
_elementtree 1 | |
_functools 1 | |
_gdbm 1 | |
_heapq 1 | |
_json 1 | |
_lsprof 1 | |
_lzma 1 | |
_markupbase 1 | |
_md5 1 | |
_msi 1 | |
_multibytecodec 1 | |
_multiprocessing 1 | |
_opcode 1 | |
_overlapped 1 | |
_pickle 1 | |
_posixshmem 1 | |
_py_abc 1 | |
_pydecimal 1 | |
_queue 1 | |
_random 1 | |
_scproxy 1 | |
_sha1 1 | |
_sha256 1 | |
_sha3 1 | |
_signal 1 | |
_sitebuiltins 1 | |
_socket 1 | |
_sqlite3 1 | |
_ssl 1 | |
_stat 1 | |
_statistics 1 | |
_string 1 | |
_strptime 1 | |
_struct 1 | |
_symtable 1 | |
_threading_local 1 | |
_tracemalloc 1 | |
_uuid 1 | |
_winreg 1 | |
_zoneinfo 1 | |
aifc 1 | |
asynchat 1 | |
asyncio 1 | |
cgi 1 | |
codeop 1 | |
csv 1 | |
dataclasses 1 | |
dbm 1 | |
distutils 1 | |
docutils 1 | |
filecmp 1 | |
fractions 1 | |
ftplib 1 | |
idlelib 1 | |
imghdr 1 | |
json 1 | |
lib2to3 1 | |
mmap 1 | |
msilib 1 | |
nturl2path 1 | |
opcode 1 | |
pdb 1 | |
pep517 1 | |
pgen2 1 | |
pip 1 | |
profile 1 | |
pyclbr 1 | |
pydoc_data 1 | |
pyexpat 1 | |
rlcompleter 1 | |
secrets 1 | |
site 1 | |
sitecustomize 1 | |
sndhdr 1 | |
sre_compile 1 | |
statistics 1 | |
stringprep 1 | |
tabnanny 1 | |
tracemalloc 1 | |
turtle 1 | |
usercustomize 1 | |
uu 1 | |
vms_lib 1 | |
wave 1 | |
win32api 1 | |
win32con 1 | |
win32evtlog 1 | |
win32evtlogutil 1 | |
xmlrpc 1 | |
zipimport 1 | |
_aix_support 2 | |
_collections 2 | |
_frozen_importlib 2 | |
_frozen_importlib_external 2 | |
_hashlib 2 | |
_locale 2 | |
_operator 2 | |
_osx_support 2 | |
_posixsubprocess 2 | |
_sha512 2 | |
_sre 2 | |
_tkinter 2 | |
array 2 | |
asyncore 2 | |
bdb 2 | |
chunk 2 | |
cmd 2 | |
contextvars 2 | |
ctypes 2 | |
encodings 2 | |
genericpath 2 | |
java 2 | |
mimetypes 2 | |
multiprocessing 2 | |
netrc 2 | |
ntpath 2 | |
plistlib 2 | |
pstats 2 | |
quopri 2 | |
smtplib 2 | |
sre_parse 2 | |
tarfile 2 | |
termios 2 | |
test 2 | |
tty 2 | |
unittest 2 | |
xml 2 | |
__future__ 3 | |
_compression 3 | |
_warnings 3 | |
audioop 3 | |
code 3 | |
concurrent 3 | |
decimal 3 | |
difflib 3 | |
gc 3 | |
getpass 3 | |
gettext 3 | |
gzip 3 | |
lzma 3 | |
numbers 3 | |
org 3 | |
pkgutil 3 | |
posix 3 | |
py_compile 3 | |
sre_constants 3 | |
tkinter 3 | |
token 3 | |
_weakref 4 | |
_weakrefset 4 | |
_winapi 4 | |
configparser 4 | |
dis 4 | |
doctest 4 | |
fcntl 4 | |
hmac 4 | |
html 4 | |
optparse 4 | |
pathlib 4 | |
platform 4 | |
queue 4 | |
socketserver 4 | |
unicodedata 4 | |
webbrowser 4 | |
zipfile 4 | |
_imp 5 | |
_io 5 | |
ast 5 | |
bisect 5 | |
bz2 5 | |
copyreg 5 | |
grp 5 | |
heapq 5 | |
http 5 | |
keyword 5 | |
msvcrt 5 | |
pydoc 5 | |
runpy 5 | |
shlex 5 | |
signal 5 | |
textwrap 5 | |
typing 5 | |
winreg 5 | |
atexit 6 | |
glob 6 | |
nt 6 | |
reprlib 6 | |
selectors 6 | |
sysconfig 6 | |
calendar 7 | |
hashlib 7 | |
pprint 7 | |
readline 7 | |
zlib 7 | |
_collections_abc 8 | |
locale 8 | |
logging 8 | |
marshal 8 | |
random 8 | |
select 8 | |
ssl 8 | |
codecs 9 | |
pickle 9 | |
_thread 10 | |
base64 10 | |
math 10 | |
posixpath 10 | |
binascii 11 | |
email 11 | |
fnmatch 11 | |
pwd 11 | |
tokenize 11 | |
datetime 12 | |
linecache 12 | |
operator 12 | |
shutil 12 | |
string 12 | |
urllib 12 | |
weakref 12 | |
enum 13 | |
abc 14 | |
getopt 15 | |
inspect 15 | |
tempfile 15 | |
importlib 16 | |
stat 16 | |
subprocess 16 | |
threading 16 | |
contextlib 17 | |
copy 17 | |
errno 20 | |
socket 21 | |
argparse 22 | |
struct 22 | |
traceback 22 | |
builtins 24 | |
itertools 30 | |
functools 35 | |
types 35 | |
time 40 | |
warnings 50 | |
collections 51 | |
io 55 | |
re 67 | |
os 101 | |
sys 128 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment