Last active
October 28, 2019 13:17
-
-
Save kadrach/6b8911234d94a928fc999710b7ff0e4c to your computer and use it in GitHub Desktop.
Python quote counting
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package | simple | double | |
---|---|---|---|
pip | 48429 | 39517 | |
simplejson | 2128 | 1282 | |
six | 128 | 1808 | |
botocore | 43373 | 12059 | |
python-dateutil | 2916 | 2879 | |
setuptools | 14801 | 16502 | |
s3transfer | 4797 | 1568 | |
pyyaml | 5883 | 1492 | |
futures | 288 | 346 | |
pyasn1 | 4489 | 752 | |
docutils | 51470 | 39916 | |
requests | 4892 | 2093 | |
jmespath | 1796 | 383 | |
awscli | 15377 | 7490 | |
rsa | 1331 | 1634 | |
colorama | 336 | 53 | |
wheel | 1206 | 898 | |
idna | 26098 | 351 | |
certifi | 73 | 25 | |
urllib3 | 8286 | 4287 | |
chardet | 931 | 361 | |
awscli-cwlogs | 1233 | 326 | |
pytz | 3407 | 166 | |
cffi | 11426 | 15678 | |
pbr | 3003 | 1984 | |
boto3 | 1932 | 1336 | |
cryptography | 4917 | 19037 | |
pycparser | 10903 | 2543 | |
jinja2 | 11493 | 4859 | |
markupsafe | 920 | 157 | |
setuptools-scm | 1318 | 406 | |
enum34 | None | None | |
virtualenv | 1655 | 966 | |
selenium | 2244 | 5080 | |
ipaddress | 1940 | 1204 | |
numpy | 72647 | 30038 | |
asn1crypto | 8858 | 3336 | |
boto | 62723 | 35452 | |
pytest-runner | 273 | 114 | |
pyparsing | 4712 | 31998 | |
werkzeug | 20478 | 6914 | |
click | 4884 | 1634 | |
lxml | 18081 | 13090 | |
psutil | 6203 | 7482 | |
decorator | 488 | 576 | |
flask | 8357 | 2624 | |
pyopenssl | 1518 | 6845 | |
future | 34282 | 28570 | |
mock | 2578 | 774 | |
pytest | 12547 | 26236 | |
argparse | 6182 | 1064 | |
sqlalchemy | 98057 | 74386 | |
py | 4683 | 5335 | |
funcsigs | 1028 | 415 | |
protobuf | 7525 | 3949 | |
pandas | 202286 | 50244 | |
psycopg2 | None | None | |
coverage | 4580 | 18095 | |
pyasn1-modules | 4654 | 404 | |
jsonschema | 95 | 2536 | |
paramiko | 6128 | 5190 | |
httplib2 | 3612 | 4218 | |
pexpect | 6869 | 1197 | |
ptyprocess | 532 | 232 | |
pygments | 190475 | 20759 | |
tornado | 7980 | 14941 | |
nose | 7573 | 6719 | |
itsdangerous | 618 | 454 | |
pillow | 8025 | 18767 | |
oauth2client | 7373 | 3288 | |
vcversioner | 145 | 34 | |
mccabe | 157 | 226 | |
django | 178772 | 108050 | |
singledispatch | 130 | 56 | |
docopt | 390 | 204 | |
grpcio | 10993 | 8888 | |
cython | 21164 | 23028 | |
functools32 | None | None | |
babel | 15184 | 5356 | |
scipy | 48118 | 36558 | |
wrapt | 282 | 60 | |
redis | 6951 | 1482 | |
bcrypt | 106 | 820 | |
html5lib | 3470 | 20602 | |
pymysql | 3291 | 2462 | |
pyflakes | 4118 | 2602 | |
backports-abc | 130 | 68 | |
packaging | 466 | 4731 | |
amqp | 2663 | 951 | |
pynacl | 514 | 2997 | |
google-cloud-core | 1168 | 790 | |
greenlet | 327 | 228 | |
kombu | 11688 | 4214 | |
zope-interface | None | None | |
websocket-client | 254 | 1960 | |
flake8 | 4959 | 4345 | |
attrs | 386 | 4068 | |
scikit-learn | 33263 | 37607 | |
pycodestyle | 2664 | 2063 | |
gevent | 55387 | 43726 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import subprocess | |
import tarfile | |
import tempfile | |
from pathlib import Path | |
from zipfile import BadZipFile, ZipFile | |
def count_package(package): | |
with tempfile.TemporaryDirectory() as tmpdir: | |
try: | |
subprocess.run( | |
[ | |
'pip', 'download', '--no-deps', '--dest', tmpdir, | |
'--no-binary', ':all:', package | |
], | |
check=True, | |
stdout=subprocess.PIPE, | |
stderr=subprocess.PIPE) | |
except: | |
# not checking all of these individually | |
return (None, None) | |
archive = next(Path(tmpdir).iterdir()) | |
try: | |
with ZipFile(str(archive), 'r') as f: | |
pyfiles = [ | |
path for path in f.namelist() if path.endswith('.py') | |
] | |
pyfiles = list( | |
map(lambda x: f.open(x).read().decode('utf-8', 'ignore'), | |
pyfiles)) | |
except BadZipFile: | |
# probably a tarball then | |
with tarfile.open(str(archive), 'r:gz') as f: | |
pyfiles = [p for p in f.getmembers() if p.name.endswith('.py')] | |
pyfiles = list(map(lambda x: f.extractfile(x).read().decode('utf-8', 'ignore'), pyfiles)) | |
counts = list(map(lambda x: (x.count('\''), x.count('"')), pyfiles)) | |
single = sum([pair[0] for pair in counts]) | |
double = sum([pair[1] for pair in counts]) | |
return single, double | |
if __name__ == '__main__': | |
# Sourced here: | |
# https://github.com/hugovk/top-pypi-packages | |
with open('top-pypi-packages-365-days.json') as f: | |
top = json.load(f) | |
packages = [p['project'] for p in top['rows'][:100]] | |
for p in packages: | |
counts = count_package(p) | |
print('{p},{single},{double}'.format( | |
p=p, single=counts[0], double=counts[1])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Density plot of
single / (single + double)
.