Skip to content

Instantly share code, notes, and snippets.

@hlecuanda
Forked from alex/pypi.md
Created August 18, 2018 12:29
Show Gist options
  • Save hlecuanda/96f61acb6bada0318d3e47469deaefbf to your computer and use it in GitHub Desktop.
Save hlecuanda/96f61acb6bada0318d3e47469deaefbf to your computer and use it in GitHub Desktop.

Most downloaded projects

SELECT
  file.project,
  COUNT(*) as total_downloads,
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
GROUP BY
  file.project
ORDER BY
  total_downloads DESC
LIMIT 100

OpenSSL versions

SELECT
  details.system.name,
  REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) ") as openssl_version,
  COUNT(*) as download_count,
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
WHERE
  details.openssl_version IS NOT NULL
GROUP BY
  details.system.name,
  openssl_version,
HAVING
  download_count >= 100
ORDER BY
  download_count DESC
LIMIT 100

Which python versions install cryptography

SELECT
  REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") as python_version,
  COUNT(*) as download_count,
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
WHERE
  file.project = 'cryptography'
GROUP BY
  python_version,
ORDER BY
  download_count DESC
LIMIT 100

Which platforms are generating 2.6 downloads

SELECT
  details.distro.name,
  details.distro.version,
  details.system.release,
  COUNT(*) as download_count,
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
WHERE
  REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") = "2.6"
GROUP BY
  details.distro.name,
  details.distro.version,
  details.system.release
ORDER BY
  download_count DESC
LIMIT 100

Which packages are downloaded by Python 2.6 most often (percent)

SELECT
  file.project,
  ROUND(100 * SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") = "2.6" THEN 1 ELSE 0 END) / COUNT(*), 1) AS percent_26,
  SUM(CASE WHEN REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") = "2.6" THEN 1 ELSE 0 END) as total_26_downloads,
  COUNT(*) as total_downloads,
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
WHERE
  details.python IS NOT NULL AND
  # Exclude things which are stdlib backports *for* Python 2.6
  file.project NOT IN ("argparse", "ordereddict")
GROUP BY
  file.project,
HAVING
  total_downloads > 5000
ORDER BY
  percent_26 DESC
LIMIT 250

Which packages are downloaded by Python 2.6 most often (absolute)

SELECT
  file.project,
  COUNT(*) as total_downloads,
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
WHERE
  REGEXP_EXTRACT(details.python, r"^([^\.]+\.[^\.]+)") == "2.6" AND
  # Exclude things which are stdlib backports *for* Python 2.6
  file.project NOT IN ("argparse", "ordereddict")
GROUP BY
  file.project,
ORDER BY
  total_downloads DESC
LIMIT 100

Most used installers

SELECT
  details.installer.name,
  details.installer.version,
  COUNT(*) as total_downloads
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
GROUP BY
  details.installer.name,
  details.installer.version
ORDER BY
  total_downloads DESC
LIMIT 100

Packages installed with OpenSSL 0.9.8 (excluding Windows and OS X)

SELECT
  file.project,
  ROUND(100 * SUM(CASE WHEN SUBSTR(REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) "), 0, 5) == "0.9.8" THEN 1 ELSE 0 END) / COUNT(*), 1) as percent_098,
  ROUND(100 * SUM(CASE WHEN SUBSTR(REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) "), 0, 5) == "1.0.0" THEN 1 ELSE 0 END) / COUNT(*), 1) as percent_100,
  ROUND(100 * SUM(CASE WHEN SUBSTR(REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) "), 0, 5) == "1.0.1" THEN 1 ELSE 0 END) / COUNT(*), 1) as percent_101,
  ROUND(100 * SUM(CASE WHEN SUBSTR(REGEXP_EXTRACT(details.openssl_version, r"^OpenSSL ([^ ]+) "), 0, 5) == "1.0.2" THEN 1 ELSE 0 END) / COUNT(*), 1) as percent_102,
  COUNT(*) as total_downloads,
FROM
  TABLE_DATE_RANGE(
    [the-psf:pypi.downloads],
    TIMESTAMP("20160114"),
    CURRENT_TIMESTAMP()
  )
WHERE
  details.openssl_version IS NOT NULL AND
  # Exclude Windows and OS X where Cryptography ships a static wheel
  details.system.name NOT IN ("Windows", "Darwin")
GROUP BY
  file.project,
HAVING
  total_downloads > 5000
ORDER BY
  percent_098 DESC
LIMIT 250
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment