Skip to content

Instantly share code, notes, and snippets.

@kadrach
Last active October 28, 2019 13:17
Show Gist options
  • Save kadrach/6b8911234d94a928fc999710b7ff0e4c to your computer and use it in GitHub Desktop.
Save kadrach/6b8911234d94a928fc999710b7ff0e4c to your computer and use it in GitHub Desktop.
Python quote counting
package simple double
pip 48429 39517
simplejson 2128 1282
six 128 1808
botocore 43373 12059
python-dateutil 2916 2879
setuptools 14801 16502
s3transfer 4797 1568
pyyaml 5883 1492
futures 288 346
pyasn1 4489 752
docutils 51470 39916
requests 4892 2093
jmespath 1796 383
awscli 15377 7490
rsa 1331 1634
colorama 336 53
wheel 1206 898
idna 26098 351
certifi 73 25
urllib3 8286 4287
chardet 931 361
awscli-cwlogs 1233 326
pytz 3407 166
cffi 11426 15678
pbr 3003 1984
boto3 1932 1336
cryptography 4917 19037
pycparser 10903 2543
jinja2 11493 4859
markupsafe 920 157
setuptools-scm 1318 406
enum34 None None
virtualenv 1655 966
selenium 2244 5080
ipaddress 1940 1204
numpy 72647 30038
asn1crypto 8858 3336
boto 62723 35452
pytest-runner 273 114
pyparsing 4712 31998
werkzeug 20478 6914
click 4884 1634
lxml 18081 13090
psutil 6203 7482
decorator 488 576
flask 8357 2624
pyopenssl 1518 6845
future 34282 28570
mock 2578 774
pytest 12547 26236
argparse 6182 1064
sqlalchemy 98057 74386
py 4683 5335
funcsigs 1028 415
protobuf 7525 3949
pandas 202286 50244
psycopg2 None None
coverage 4580 18095
pyasn1-modules 4654 404
jsonschema 95 2536
paramiko 6128 5190
httplib2 3612 4218
pexpect 6869 1197
ptyprocess 532 232
pygments 190475 20759
tornado 7980 14941
nose 7573 6719
itsdangerous 618 454
pillow 8025 18767
oauth2client 7373 3288
vcversioner 145 34
mccabe 157 226
django 178772 108050
singledispatch 130 56
docopt 390 204
grpcio 10993 8888
cython 21164 23028
functools32 None None
babel 15184 5356
scipy 48118 36558
wrapt 282 60
redis 6951 1482
bcrypt 106 820
html5lib 3470 20602
pymysql 3291 2462
pyflakes 4118 2602
backports-abc 130 68
packaging 466 4731
amqp 2663 951
pynacl 514 2997
google-cloud-core 1168 790
greenlet 327 228
kombu 11688 4214
zope-interface None None
websocket-client 254 1960
flake8 4959 4345
attrs 386 4068
scikit-learn 33263 37607
pycodestyle 2664 2063
gevent 55387 43726
import json
import subprocess
import tarfile
import tempfile
from pathlib import Path
from zipfile import BadZipFile, ZipFile
def count_package(package):
with tempfile.TemporaryDirectory() as tmpdir:
try:
subprocess.run(
[
'pip', 'download', '--no-deps', '--dest', tmpdir,
'--no-binary', ':all:', package
],
check=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
except:
# not checking all of these individually
return (None, None)
archive = next(Path(tmpdir).iterdir())
try:
with ZipFile(str(archive), 'r') as f:
pyfiles = [
path for path in f.namelist() if path.endswith('.py')
]
pyfiles = list(
map(lambda x: f.open(x).read().decode('utf-8', 'ignore'),
pyfiles))
except BadZipFile:
# probably a tarball then
with tarfile.open(str(archive), 'r:gz') as f:
pyfiles = [p for p in f.getmembers() if p.name.endswith('.py')]
pyfiles = list(map(lambda x: f.extractfile(x).read().decode('utf-8', 'ignore'), pyfiles))
counts = list(map(lambda x: (x.count('\''), x.count('"')), pyfiles))
single = sum([pair[0] for pair in counts])
double = sum([pair[1] for pair in counts])
return single, double
if __name__ == '__main__':
# Sourced here:
# https://github.com/hugovk/top-pypi-packages
with open('top-pypi-packages-365-days.json') as f:
top = json.load(f)
packages = [p['project'] for p in top['rows'][:100]]
for p in packages:
counts = count_package(p)
print('{p},{single},{double}'.format(
p=p, single=counts[0], double=counts[1]))
@kadrach
Copy link
Author

kadrach commented May 29, 2018

Density plot of single / (single + double).

    ++----------+----------+----------+-----------+----------+-+
    |                                 **********               |
    |                               **         **              |
    |                             ***            **            |
1.5 +                            **               **           +
    |                           **                 **          |
    |                          **                   *          |
    |                         **                     *         |
  1 +                        **                      **        +
    |                       **                        **       |
    |                     ***                          **      |
    |     ******         **                             **     |
0.5 +  ****    ***** *****                               ***   +
    |  *           ***                                     **  |
    ++----------+----------+----------+-----------+----------+-+
     0         0.2        0.4        0.6         0.8         1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment