Last active
February 28, 2025 16:34
-
-
Save lesteve/6099307249f3235e16063ad847ff0348 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# %% | |
from pathlib import Path | |
import re | |
from packaging.version import Version | |
def find_python_version(): | |
content = Path("pyproject.toml").read_text() | |
python_versions = re.findall(r"Python :: (\d+\.\d+)", content) | |
if not python_versions: | |
content = Path("setup.py").read_text() | |
python_versions = re.findall(r"Python :: (\d+\.\d+)", content) | |
python_versions = [Version(v) for v in python_versions] | |
min_python_version = sorted(python_versions)[0] | |
return str(min_python_version) | |
def find_dependencies_version(dep): | |
return ( | |
subprocess.check_output([sys.executable, "sklearn/_min_dependencies.py", dep]) | |
.decode() | |
.strip() | |
) | |
# %% | |
import sys | |
import subprocess | |
import pandas as pd | |
# older versions done by hand | |
min_version_data = [ | |
{ | |
"scikit-learn": "0.21", | |
"python": "3.5", | |
"numpy": "1.11.0", | |
"scipy": "0.17.0", | |
"joblib": "0.11", | |
}, | |
{ | |
"scikit-learn": "0.22", | |
"python": "3.5", | |
"numpy": "1.11.0", | |
"scipy": "0.17.0", | |
"joblib": "0.11", | |
}, | |
{ | |
"scikit-learn": "0.23", | |
"python": "3.6", | |
"numpy": "1.13.3", | |
"scipy": "0.19.1", | |
"joblib": "0.11", | |
}, | |
] | |
version_list = [f"1.{minor}" for minor in range(7)] | |
dependency_list = ["numpy", "scipy", "pandas", "joblib", "threadpoolctl"] | |
for version in version_list: | |
d = {"scikit-learn": version} | |
subprocess.check_call(f"git checkout {version}.X".split()) | |
d["python"] = find_python_version() | |
for dep in dependency_list: | |
d[dep] = find_dependencies_version(dep) | |
min_version_data.append(d) | |
min_version_df = pd.DataFrame(min_version_data) | |
# %% | |
# get release dates from Python and scikit-learn and then | |
# https://devguide.python.org/versions/ | |
# https://pypi.org/project/scikit-learn/1.1.0/#history | |
import pandas as pd | |
df_list = pd.read_html("https://devguide.python.org/versions/") | |
df = pd.concat(df_list).astype({"Branch": str}) | |
release_dates = {} | |
python_version_info = { | |
version: release_date | |
for version, release_date in zip(df["Branch"], df["First release"]) | |
} | |
python_version_info = { | |
version: pd.to_datetime(release_date) | |
for version, release_date in python_version_info.items() | |
} | |
release_dates["python"] = python_version_info | |
# %% | |
import requests | |
def get_release_time(package, version): | |
r = requests.get(f"https://pypi.org/pypi/{package}/{version}/json") | |
json_content = r.json() | |
sdist_list = [ | |
each for each in json_content["urls"] if each["packagetype"] == "sdist" | |
] | |
# for some reason 0.21 doesn't have a sdist ... | |
if len(sdist_list) == 0: | |
sdist_list = json_content["urls"] | |
return pd.to_datetime(sdist_list[0]["upload_time"]) | |
for dep in ["scikit-learn"] + dependency_list: | |
release_dates[dep] = { | |
ver: get_release_time(dep, ver) | |
for ver in min_version_df[dep] | |
if isinstance(ver, str) | |
} | |
release_dates | |
# %% | |
def previous_minor_release(ver): | |
major, minor, *_ = ver.split(".") | |
previous_minor = int(minor) - 1 | |
return f"{major}.{previous_minor}" | |
# %% | |
for each in ["scikit-learn", "python"] + dependency_list: | |
min_version_df[f"{each}-date"] = min_version_df[each].map(release_dates[each]) | |
for each in ["python"] + dependency_list: | |
min_version_df[f"{each}-date-diff"] = ( | |
min_version_df["scikit-learn-date"] - min_version_df[f"{each}-date"] | |
).dt.days / 365 | |
# %% | |
print(min_version_df[["scikit-learn", "python", "python-date-diff"]]) | |
print(min_version_df[["scikit-learn", "python", "numpy", "numpy-date-diff"]]) | |
print(min_version_df[["scikit-learn", "python", "scipy", "scipy-date-diff"]]) | |
print(min_version_df[["scikit-learn", "python", "pandas", "scipy-date-diff"]]) | |
print(min_version_df[["scikit-learn", "python", "joblib", "joblib-date-diff"]]) | |
print( | |
min_version_df[ | |
["scikit-learn", "python", "threadpoolctl", "threadpoolctl-date-diff"] | |
] | |
) | |
# %% | |
# Plan with minor versions X.Y | |
plan = [ | |
{ | |
"scikit-learn": "1.7", | |
"scikit-learn-date": pd.to_datetime("2025-06-01"), | |
"python": "3.10", | |
"numpy": "1.22.0", | |
"scipy": "1.8.0", | |
"pandas": "1.4.0", | |
"joblib": "1.2.0", | |
"threadpoolctl": "3.1.0", | |
}, | |
{ | |
"scikit-learn": "1.8", | |
"scikit-learn-date": pd.to_datetime("2025-12-01"), | |
"python": "3.11", | |
"numpy": "1.24.0", | |
"scipy": "1.11.0", | |
"pandas": "2.0.0", | |
"joblib": "1.3.0", | |
"threadpoolctl": "3.2.0", | |
}, | |
{ | |
"scikit-learn": "1.9", | |
"scikit-learn-date": pd.to_datetime("2026-06-01"), | |
"python": "3.11.0", | |
"numpy": "1.24.0", | |
"scipy": "1.11.0", | |
"pandas": "2.0.0", | |
"joblib": "1.4.0", | |
"threadpoolctl": "3.5.0", | |
}, | |
{ | |
"scikit-learn": "1.10", | |
"scikit-learn-date": pd.to_datetime("2026-12-01"), | |
"python": "3.12", | |
"numpy": "1.26.0", | |
"scipy": "1.12.0", | |
"pandas": "2.2.0", | |
"joblib": "1.4.0", | |
"threadpoolctl": "3.5.0", | |
}, | |
] | |
# Plan with bugfix versions X.Y.Z | |
# plan = [ | |
# { | |
# "scikit-learn": "1.7", | |
# "scikit-learn-date": pd.to_datetime("2025-06-01"), | |
# "python": "3.10", | |
# "numpy": "1.21.2", | |
# "scipy": "1.8.0", | |
# "pandas": "1.3.4", | |
# "joblib": "1.2", | |
# "threadpoolctl": "3.1", | |
# }, | |
# { | |
# "scikit-learn": "1.8", | |
# "scikit-learn-date": pd.to_datetime("2025-12-01"), | |
# "python": "3.11", | |
# "numpy": "1.23.3", | |
# "scipy": "1.10.1", | |
# "pandas": "1.5.2", | |
# "joblib": "1.3", | |
# "threadpoolctl": "3.2", | |
# }, | |
# { | |
# "scikit-learn": "1.9", | |
# "scikit-learn-date": pd.to_datetime("2026-06-01"), | |
# "python": "3.11", | |
# "numpy": "1.23.3", | |
# "scipy": "1.10.1", | |
# "pandas": "1.5.2", | |
# "joblib": "1.4", | |
# "threadpoolctl": "3.5", | |
# }, | |
# { | |
# "scikit-learn": "1.10", | |
# "scikit-learn-date": pd.to_datetime("2026-12-01"), | |
# "python": "3.12", | |
# "numpy": "1.26.0", | |
# "scipy": "1.11.3", | |
# "pandas": "2.2.0", | |
# "joblib": "1.4", | |
# "threadpoolctl": "3.5", | |
# }, | |
# ] | |
plan = pd.DataFrame(plan) | |
for each in ["numpy", "scipy", "pandas", "joblib", "threadpoolctl"]: | |
for ver in plan[each]: | |
release_dates[each][ver] = get_release_time(each, ver) | |
for each in ["python", "numpy", "scipy", "pandas", "joblib", "threadpoolctl"]: | |
diff_days = ( | |
plan["scikit-learn-date"] - plan[each].map(release_dates[each]) | |
).dt.days.apply( | |
lambda days: ( | |
pd.NaT | |
if pd.isna(days) | |
else f"{days // 365} years {round(days % 365 / 30)} months" | |
) | |
) | |
plan[f"{each}-date-diff"] = diff_days | |
print( | |
plan[ | |
["scikit-learn", "scikit-learn-date", "python", "python-date-diff"] | |
].to_markdown(index=False) | |
) | |
print( | |
plan[ | |
[ | |
"scikit-learn", | |
"scikit-learn-date", | |
"numpy", | |
"numpy-date-diff", | |
"scipy", | |
"scipy-date-diff", | |
"pandas", | |
"pandas-date-diff", | |
] | |
].to_markdown(index=False) | |
) | |
print( | |
plan[ | |
[ | |
"scikit-learn", | |
"scikit-learn-date", | |
"joblib", | |
"joblib-date-diff", | |
"threadpoolctl", | |
"threadpoolctl-date-diff", | |
] | |
].to_markdown(index=False) | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment