From June 26, 2016 (python 3.5.2 release) to Aug. 31, 2016.
Python versions from 2.6 to 3.5
Without 2.7
From June 26, 2016 (python 3.5.2 release) to Aug. 31, 2016.
Python versions from 2.6 to 3.5
Without 2.7
-- https://bigquery.cloud.google.com/dataset/the-psf:pypi | |
SELECT concat( | |
date(timestamp), '_', REGEXP_EXTRACT(details.python, r'^([2-3]).[0-9].') | |
) as date_python, count(details.python) as downloads | |
FROM (TABLE_DATE_RANGE([the-psf:pypi.downloads], | |
TIMESTAMP('2016-06-26'), | |
TIMESTAMP('2016-08-31'))) | |
group by date_python |
-- https://bigquery.cloud.google.com/dataset/the-psf:pypi | |
-- https://bigquery.cloud.google.com/table/the-psf:pypi.downloads20160903 | |
SELECT concat( | |
date(timestamp), '_', REGEXP_EXTRACT(details.python, r'^([2-3].[0-9]).') | |
) as date_python, count(details.python) as downloads | |
FROM (TABLE_DATE_RANGE([the-psf:pypi.downloads], | |
TIMESTAMP('2016-06-26'), | |
TIMESTAMP('2016-08-31'))) | |
group by date_python |
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
# To plot chart from csv generated by bigquery | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
plt.figure() | |
ts = pd.read_csv('download_python_version_by_day.csv') | |
ts['date'] = pd.to_datetime(ts['date']) | |
df = ts.pivot(index='date', columns='python', values='downloads') | |
#df.plot() | |
#df[[2.6, 2.7, 3.1, 3.2, 3.3, 3.4, 3.5]].plot() | |
df[[2.6, 3.1, 3.2, 3.3, 3.4, 3.5]].plot() | |
plt.show() |
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
ts = pd.read_csv( | |
'download_python_major_version_by_day.csv', parse_dates=True, | |
) | |
ts['date'] = pd.to_datetime(ts['date']) | |
df = ts.pivot(index='date', columns='python', values='downloads') | |
ax = df[[2, 3]].plot(logy=True, figsize=(12, 9)) | |
ax.set_ylabel('log(downloads)') | |
ax.set_title('Python packages downloads') | |
plt.show() |
Also, if you just want a relative comparison (rather than absolute), it might also be better to add WHERE ... AND details.cpu IS NOT NULL
; more chance to get "actual" people installs rather than bots that are just mirroring (which might have more chance to be 2.7)
I also think it's important for this analysis to filter down to
pip
installs:although it doesn't have a huge impact on the results.