Skip to content

Instantly share code, notes, and snippets.

View AntoineToubhans's full-sized avatar

Toubi AntoineToubhans

View GitHub Profile
stages:
download_dataset: ...
split_dataset: ...
train: ...
evaluate:
cmd: python scripts/evaluate.py
deps:
- scripts/evaluate.py
- data/dataset/test
stages:
download_dataset: ...
split_dataset: ...
train: ...
evaluate:
cmd: python scripts/evaluate.py
deps:
- scripts/evaluate.py
- data/dataset/test
import git
REPO = git.Repo(".")
MODELS_COMMITS = list(REPO.iter_commits(paths="dvc.lock"))
import streamlit as st
selected_commit = st.selectbox(
"Choose your commit",
[commit for commit in MODELS_COMMITS],
format_func=lambda commit: f"{commit.hexsha[:6]} - {commit.message} - {commit.committed_datetime}",
)
st.write("Selected Commit", selected_commit)
import dvc.api
import pandas as pd
@st.cache
def load_predictions(rev: str) -> pd.DataFrame:
with dvc.api.open("data/evaluation/predictions.csv", rev=rev) as f:
return pd.read_csv(f)
selected_commit = ... # Use the commit selector introduced previous section
selected_commit_a = st.selectbox(
"Choose commit A",
[commit for commit in MODELS_COMMITS],
format_func=lambda commit: f"{commit.hexsha[:6]} - {commit.message} - {commit.committed_datetime}",
)
selected_commit_b = st.selectbox(
"Choose commit B",
[commit for commit in MODELS_COMMITS],
format_func=lambda commit: f"{commit.hexsha[:6]} - {commit.message} - {commit.committed_datetime}",
predictions_a = load_predictions(rev=selected_commit_a.hexsha)
predictions_b = load_predictions(rev=selected_commit_b.hexsha)
disagree_predictions = pd.merge(
left=predictions_a.drop(columns=["prediction"]),
right=predictions_b.drop(columns=["true_label", "image_path", "prediction"]),
on="image_name",
).loc[lambda df: df.predicted_label_x != df.predicted_label_y]
st.dataframe(disagree_predictions)
for _, row in disagree_predictions.iterrows():
st.image(
row["image_path"],
caption=f"{row['image_name']}: A={row['predicted_label_x']}, B={row['predicted_label_y']} (true={row['true_label']})",
width=150,
)
FIRST_COMMIT = list(REPO.iter_commits())[-1]
@contextmanager
def git_open(path: str, rev: str):
commit = REPO.commit(rev)
# Hack to get the full blob data stream: compute diff with initial commit
diff = commit.diff(FIRST_COMMIT, str(path))[0]
yield diff.a_blob.data_stream
import yaml
def _read_train_params(rev: str) -> dict:
with git_open("dvc.lock", rev=rev) as file:
dvc_lock = yaml.safe_load(file)
return dvc_lock["stages"]["train"]["params"]["params.yaml"]
MODELS_PARAMETERS = {
commit.hexsha: _read_train_params(rev=commit.hexsha)
for commit in MODELS_COMMITS