Created
October 2, 2021 07:16
-
-
Save baggepinnen/b98625dbfd2a7194a744f90926ef6f66 to your computer and use it in GitHub Desktop.
Analyze julia package user numbers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#= | |
Source: https://discourse.julialang.org/t/announcing-package-download-stats/ | |
request_addrs: the approximate number of unique requesting IP addresses. Details below on why this is approximate and not exact. | |
request_count: the number of requests. | |
successes: the number of requests which resulted in a 2xx HTTP response code. Only included if status is not one of the key fields of the rollup. To get the success rate, divide by request_count. | |
cache_misses: the number of requests which resulted in the package server attempting to fetch a resource from an upstream storage server. To get the cache miss rate, divide by request_count. | |
body_bytes_sent: total number of bytes served in the bodies of HTTP responses for all requests (i.e. not including HTTP headers or TLS/IP data). To get average request body size, divide by request_count. | |
request_time: total time spent serving these requests. To get average request time, divide by request_count. | |
date_count: the number of distinct UTC dates when requests occurred. Only included if date is not one of the key fields of the rollup. | |
date_min: the earliest date of any request in this group. Only included if date is not one of the key fields of the rollup. | |
date_max: the latest date of any request in this group. Only included if date is not one of the key fields of the rollup. | |
=# | |
using CSV, DataFrames | |
using PkgDeps, UUIDs | |
data = CSV.read("/tmp/package_requests.csv", DataFrame) | |
function uuid2names() | |
registries = reachable_registries() | |
dict = Dict{UUID, String}() | |
for rego in registries | |
for (pkg_name, pkg_entry) in rego.pkgs | |
push!(dict, pkg_entry.uuid=> pkg_name) | |
end | |
end | |
dict | |
end | |
function uuid2user() | |
registries = reachable_registries() | |
dict = Dict{UUID, String}() | |
for rego in registries | |
for (pkg_name, pkg_entry) in rego.pkgs | |
m = match(r"\.com/(.+?)/", pkg_entry.repo) | |
if m === nothing | |
push!(dict, pkg_entry.uuid => "Unknown") | |
else | |
push!(dict, pkg_entry.uuid => m.captures[1]) | |
end | |
end | |
end | |
dict | |
end | |
const namemap = uuid2names() | |
const usermap = uuid2user() | |
## | |
uuid2name(uuid) = namemap[UUID(uuid)] | |
isuser(uuid) = usermap[UUID(uuid)] == "enter_your_username_here" | |
user(uuid) = usermap[UUID(uuid)] | |
data2 = filter(data) do x | |
x.status < 300 && | |
isequal(x.client_type, "user") #&& | |
# isuser(x.package_uuid) | |
end | |
data2 = select(data2, :package_uuid => ByRow(uuid2name) => :name, | |
:package_uuid => ByRow(user) => :org, | |
:request_addrs => :users | |
) | |
data2 = hcat(sort(data2, :users, rev=true), sort(data2, [:org, :users], rev=false), makeunique=true) | |
## | |
using Plots | |
histogram(log10.(data2.users), xlabel="log₁₀(users)") | |
## | |
CSV.write("/tmp/users.csv", data2) | |
## |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment