Skip to content

Instantly share code, notes, and snippets.

@limitedeternity
Last active August 14, 2022 13:48
Show Gist options
  • Save limitedeternity/a57bd914fce987d80d2248f6abd42e65 to your computer and use it in GitHub Desktop.
Save limitedeternity/a57bd914fce987d80d2248f6abd42e65 to your computer and use it in GitHub Desktop.
A script for fetching minimum required information to migrate from GitHub
from contextlib import contextmanager
from functools import reduce
import json
import os
from pathlib import Path
from requests import Session
from shutil import copyfileobj
from time import sleep
from typing import Any, Dict, Iterable
USERNAME = "limitedeternity"
PERSONAL_TOKEN = "ghp_GbM0ODoczWS0Y7yV4nwZtDMJUz7KLkBEZYTV" # Token with full "repo" and "gist" scope
@contextmanager
def pushd(new_dir):
prev_dir = os.getcwd()
os.chdir(new_dir)
try:
yield
finally:
os.chdir(prev_dir)
def item_generator(s: Session, endpoint_url: str) -> Iterable[Dict[str, Any]]:
cur_page = 1
while True:
response = s.get(f"{endpoint_url}?per_page=100&page={cur_page}")
if response.status_code != 200:
sleep(1)
continue
response = response.json()
if not isinstance(response, list) or not response:
break
for entry in response:
yield entry
cur_page += 1
def mirror_repo(clone_url: str) -> None:
os.system(f"git clone --mirror {clone_url} .git")
os.system("git config --bool core.bare false")
os.system("git reset --hard")
os.system("git submodule update --init --recursive")
os.system("git submodule foreach --recursive git reset --hard")
os.system("git gc --aggressive")
os.system("git submodule foreach --recursive git gc --aggressive")
def fetch_artifacts(s: Session, release: Dict[str, Any]) -> None:
for asset in release["assets"]:
with s.get(asset["browser_download_url"], stream=True) as r:
with open(asset["name"], "wb") as f:
copyfileobj(r.raw, f)
if __name__ == "__main__":
with Session() as s:
s.headers["Accept"] = "application/vnd.github.v3+json"
s.headers["Authorization"] = f"token {PERSONAL_TOKEN}"
base_path = Path.cwd() / f"{USERNAME}@github"
gists_gen = item_generator(s, f"https://api.github.com/users/{USERNAME}/gists")
gists_descriptions = []
for gist in gists_gen:
gist_id = gist["id"]
(top_file, _), *rest = gist["files"].items()
gist_source_path = base_path / "gists" / f"{top_file} ({gist_id})"
gist_source_path.mkdir(parents=True)
with pushd(gist_source_path):
mirror_repo(gist["git_pull_url"])
gists_descriptions.append({gist_source_path.name: gist["description"]})
with (base_path / "gists" / "descriptions.json").open(
"w", encoding="utf-8"
) as f:
f.write(
json.dumps(
gists_descriptions,
indent=4,
sort_keys=True,
ensure_ascii=False,
)
)
gists_descriptions *= 0
starred_gen = item_generator(
s, f"https://api.github.com/users/{USERNAME}/starred"
)
with (base_path / "starred.json").open("w", encoding="utf-8") as f:
f.write(
json.dumps(
reduce(
lambda acc, entry: [
*acc,
{
key: entry[key]
for key in entry
if key
in {"full_name", "description", "language", "clone_url"}
},
],
starred_gen,
[],
),
indent=4,
sort_keys=True,
ensure_ascii=False,
)
)
repos_gen = item_generator(s, f"https://api.github.com/users/{USERNAME}/repos")
for repo in repos_gen:
repo_name = repo["name"]
repo_source_path = base_path / "repos" / repo_name / "source"
repo_source_path.mkdir(parents=True)
with pushd(repo_source_path):
mirror_repo(repo["clone_url"])
releases_gen = item_generator(
s, f"https://api.github.com/repos/{USERNAME}/{repo_name}/releases"
)
for release in releases_gen:
tag_name = release["tag_name"]
release_name = release["name"]
artifacts_dir = (
base_path
/ "repos"
/ repo_name
/ "releases"
/ f"{release_name} ({tag_name})"
)
artifacts_dir.mkdir(parents=True)
with pushd(artifacts_dir):
fetch_artifacts(s, release)
orgs_gen = item_generator(s, f"https://api.github.com/users/{USERNAME}/orgs")
for org in orgs_gen:
org_name = org["login"]
repos_gen = item_generator(
s, f"https://api.github.com/orgs/{org_name}/repos"
)
for repo in repos_gen:
repo_name = repo["name"]
repo_source_path = (
base_path / "orgs" / org_name / "repos" / repo_name / "source"
)
repo_source_path.mkdir(parents=True)
with pushd(repo_source_path):
mirror_repo(repo["clone_url"])
releases_gen = item_generator(
s, f"https://api.github.com/repos/{org_name}/{repo_name}/releases"
)
for release in releases_gen:
tag_name = release["tag_name"]
release_name = release["name"]
artifacts_dir = (
base_path
/ "orgs"
/ org_name
/ "repos"
/ repo_name
/ "releases"
/ f"{release_name} ({tag_name})"
)
artifacts_dir.mkdir(parents=True)
with pushd(artifacts_dir):
fetch_artifacts(s, release)
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"
[packages]
requests = "==2.27.1"
[dev-packages]
black = "*"
[requires]
python_version = "3.9"
[pipenv]
allow_prereleases = true
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment