Skip to content

Instantly share code, notes, and snippets.

@lesteve
Last active May 11, 2022 10:13
Show Gist options
  • Save lesteve/478d52599d394ec5e7f56dbf0827a5e9 to your computer and use it in GitHub Desktop.
Save lesteve/478d52599d394ec5e7f56dbf0827a5e9 to your computer and use it in GitHub Desktop.
# Adapted from https://gist.github.com/thomasjpfan/edef5d164d2992a2abd16e334ff59c79
from collections import defaultdict
import os
import re
from github import Github
token = os.environ["GITHUB_TOKEN"]
gh = Github(token)
sk_repo = gh.get_repo("scikit-learn/scikit-learn")
meta_issue_number = 22406
meta_issue = sk_repo.get_issue(meta_issue_number)
all_files = [
"examples/applications/plot_prediction_latency.py",
"examples/applications/plot_stock_market.py",
"examples/applications/wikipedia_principal_eigenvector.py",
"examples/calibration/plot_calibration.py",
"examples/classification/plot_lda_qda.py",
"examples/cluster/plot_affinity_propagation.py",
"examples/cluster/plot_coin_ward_segmentation.py",
"examples/cluster/plot_dbscan.py",
"examples/cluster/plot_dict_face_patches.py",
"examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py",
"examples/cluster/plot_mean_shift.py",
"examples/cluster/plot_mini_batch_kmeans.py",
"examples/cluster/plot_segmentation_toy.py",
"examples/cluster/plot_ward_structured_vs_unstructured.py",
"examples/covariance/plot_covariance_estimation.py",
"examples/covariance/plot_sparse_cov.py",
"examples/cross_decomposition/plot_compare_cross_decomposition.py",
"examples/decomposition/plot_faces_decomposition.py",
"examples/decomposition/plot_ica_blind_source_separation.py",
"examples/decomposition/plot_ica_vs_pca.py",
"examples/decomposition/plot_image_denoising.py",
"examples/decomposition/plot_pca_3d.py",
"examples/decomposition/plot_pca_vs_fa_model_selection.py",
"examples/exercises/plot_cv_diabetes.py",
"examples/feature_selection/plot_feature_selection.py",
# Nothing to do it is already using a notebook style example using `# %%`"
# "examples/impute/plot_missing_values.py,
"examples/linear_model/plot_ard.py",
"examples/linear_model/plot_bayesian_ridge_curvefit.py",
"examples/linear_model/plot_bayesian_ridge.py",
"examples/linear_model/plot_lasso_and_elasticnet.py",
"examples/linear_model/plot_lasso_dense_vs_sparse_data.py",
"examples/linear_model/plot_logistic_path.py",
"examples/linear_model/plot_multi_task_lasso_support.py",
"examples/linear_model/plot_ols_3d.py",
"examples/linear_model/plot_ridge_path.py",
"examples/linear_model/plot_theilsen.py",
"examples/miscellaneous/plot_kernel_ridge_regression.py",
"examples/model_selection/grid_search_text_feature_extraction.py",
"examples/model_selection/plot_roc_crossval.py",
"examples/model_selection/plot_train_error_vs_test_error.py",
"examples/neighbors/plot_regression.py",
"examples/neural_networks/plot_rbm_logistic_classification.py",
"examples/semi_supervised/plot_label_propagation_digits.py",
"examples/semi_supervised/plot_label_propagation_structure.py",
"examples/svm/plot_rbf_parameters.py",
"examples/svm/plot_svm_anova.py",
"examples/svm/plot_svm_regression.py",
"examples/text/plot_document_clustering.py",
]
print("Searching for pulls")
# TODO limitation: the PR description needs to mention the meta-issue. In one
# case it was done in a further comment and it was not found by this search
issues = gh.search_issues(
f"repo:scikit-learn/scikit-learn {meta_issue_number} in:body is:pr"
)
all_pulls = [sk_repo.get_pull(issue.number) for issue in issues]
# Label all open PRs as quick review
open_pulls = [p for p in all_pulls if p.state == "open"]
for p in open_pulls:
p.add_to_labels('Quick Review')
pull_infos = [
(pull.title, pull.number, pull.state == "open", pull.merged) for pull in all_pulls
]
# %%
file_with_pulls = defaultdict(list)
print("Connecting pulls to files")
for title, number, is_open, merged in pull_infos:
file_found = False
if not is_open and not merged:
continue
for afile in all_files:
# TODO limitation: the basename of the example (possibly without .py)
# needs to be in the PR title. We could look at the files changed by the
# PR instead
if os.path.basename(afile).replace('.py', '') in title:
file_with_pulls[afile].append((f"#{number}", merged))
file_found = True
if not file_found:
print(f"https://github.com/scikit-learn/scikit-learn/pull/{number} is not associated with any files. title is: {title}")
updated_files = []
for afile in all_files:
link_to_file = f"[{afile}](https://github.com/scikit-learn/scikit-learn/blob/main/{afile})"
if afile not in file_with_pulls:
updated_files.append(f"- [ ] {link_to_file}")
continue
pulls = file_with_pulls[afile]
list_str = "- [ ] "
if any(pull[1] for pull in pulls):
list_str = "- [x] "
issue_numbers = " ".join([pull[0] for pull in pulls])
if issue_numbers:
issue_numbers = f" {issue_numbers}"
updated_files.append(f"{list_str}{link_to_file}{issue_numbers}")
# %%
old_body = meta_issue.body
pattern = re.compile(r"^begin_auto_generated\s+(.+)^end_auto_generated", flags=re.MULTILINE | re.DOTALL)
old_auto_generated_content = pattern.search(old_body).group(1)
updated_auto_generated_content = "\n".join(updated_files) + '\n\n'
updated_body = old_body.replace(old_auto_generated_content, updated_auto_generated_content)
# TODO nice to have show a diff and ask for confirmation? I did not find a way
# to do diff -uw with difflib quickly enough
meta_issue.edit(body=updated_body)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment