-
-
Save akunzai/45c24431006883e07a62cfbbbd6da5f7 to your computer and use it in GitHub Desktop.
GitLab Artifacts Clean-Up
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
# encoding: utf-8 | |
""" | |
This is a small python script to clear up old gitlab build artifacts. | |
""" | |
import argparse | |
import datetime | |
import functools | |
import json | |
import logging | |
import os.path | |
import pytz | |
import re | |
import requests | |
import sys | |
from typing import Optional | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--base_url", help='base URL (default: https://gitlab.com)', default='https://gitlab.com', ) | |
parser.add_argument("--access_token", help='access token') | |
parser.add_argument("--include", help='include projects in regex') | |
parser.add_argument("--exclude", help='exclude projects in regex') | |
parser.add_argument( | |
"--expires_days", help='expiration in days (default: 7)', default=7, type=int) | |
parser.add_argument("--delete", help='do actual deletion', action="store_true") | |
args = parser.parse_args() | |
now = datetime.datetime.now(tz=pytz.utc) | |
delete_everything_older_than = now - datetime.timedelta(days=args.expires_days) | |
def fetch_projects() -> list[dict]: | |
logging.debug('start fetching list of projects') | |
list_of_projects = [] | |
for project_batch in make_api_call('/projects', {'simple': 'true', 'archived': 'false', 'owned': 'true', 'per_page': 100}, True): | |
projects_list = json.loads(project_batch) | |
for p in projects_list: | |
list_of_projects.append({ | |
'id': p['id'], | |
'name': p['path_with_namespace'], | |
}) | |
return list_of_projects | |
def fetch_jobs(project_id: str) -> list[dict]: | |
list_of_jobs = [] | |
for jobs_batch in make_api_call(f"/projects/{project_id}/jobs", {'per_page': 2000}): | |
jobs_list = json.loads(jobs_batch) | |
for j in jobs_list: | |
list_of_jobs.append({ | |
'id': j['id'], | |
'project_id': project_id, | |
'artifacts': j['artifacts'], | |
'date': j['finished_at'] | |
}) | |
return list_of_jobs | |
date_format = "%Y-%m-%dT%H:%M:%S.%f%z" | |
def delete_artifacts_of_project(target_project: dict, dry_run: bool = True) -> float: | |
deleted_bytes = 0 | |
total_num_of_jobs = len(target_project['jobs']) | |
i = 0 | |
for job in target_project['jobs']: | |
i += 1 | |
if len(job['artifacts']) == 0: | |
continue | |
date = datetime.datetime.strptime(job['date'], date_format) | |
if date < delete_everything_older_than: | |
deleted_bytes += functools.reduce( | |
lambda total, artifact: total + artifact['size'] if artifact['size'] else 0, job['artifacts'], 0) | |
if not dry_run: | |
logging.info( | |
f"deleting job artifacts of {target_project['project_name']}: [{i}/{total_num_of_jobs}]") | |
try: | |
make_api_call(f'/projects/{job["project_id"]}/jobs/{job["id"]}/artifacts', {}, method='delete', | |
all_pages=False) | |
except RuntimeError: | |
pass | |
logging.info( | |
f"deleted {format_bytes(deleted_bytes)} for project {target_project['project_name']}") | |
return deleted_bytes | |
def filter_project(project: dict) -> dict: | |
if (args.exclude and re.match(args.exclude, project["name"], re.IGNORECASE)): | |
return False | |
if (args.include and not re.match(args.include, project["name"], re.IGNORECASE)): | |
return False | |
return True | |
def build_projects_jobs_and_artifacts_list(list_of_projects: list[dict]) -> list[dict]: | |
projects = list(filter(filter_project, list_of_projects)) | |
num_of_projects = len(projects) | |
artifact_sizes_by_project = [] | |
i = 0 | |
for project in projects: | |
i += 1 | |
logging.info(f'fetching {project["name"]} [{i}/{num_of_projects}]') | |
jobs = fetch_jobs(project['id']) | |
total_size = functools.reduce( | |
lambda total, job: total + ( | |
functools.reduce(lambda sub_total, artifact: sub_total + artifact['size'] if artifact['size'] else 0, | |
job['artifacts'], 0)), | |
jobs, 0) | |
artifact_sizes_by_project.append({ | |
'project_id': project['id'], | |
'project_name': project['name'], | |
'total_size': total_size, | |
'jobs': jobs | |
}) | |
artifact_sizes_by_project.sort(key=lambda e: e['total_size'], reverse=True) | |
return artifact_sizes_by_project | |
def make_api_call(path: str, params: dict, all_pages: bool = True, method: str = 'get') -> list[bytes]: | |
api_url = args.base_url + '/api/v4' | |
params_for_request = f"?access_token={args.access_token}" | |
for key, value in params.items(): | |
params_for_request += f"&{key}={value}" | |
url = api_url + path + params_for_request | |
results = [] | |
while url is not None: | |
logging.debug(f'GET request to {url}') | |
if method == 'get': | |
result = requests.get(url) | |
elif method == 'delete': | |
result = requests.delete(url) | |
else: | |
raise RuntimeError(f"unsupported method '{method}'") | |
if result.status_code >= 400: | |
logging.error( | |
f'API call failed! Got response code {result.status_code} when tried to call {url}') | |
break | |
results.append(result.content) | |
url = get_next_from_link_header( | |
result.headers.get('Link')) if all_pages else None | |
return results | |
def get_next_from_link_header(link_header: str) -> Optional[str]: | |
# look for next page to visit | |
p = re.compile('(<(https://\S+)>; rel=\"next")') | |
hits = p.findall(link_header) | |
if len(hits) == 0: | |
return None | |
return hits[0][1] | |
# got it from https://stackoverflow.com/questions/12523586/python-format-size-application-converting-b-to-kb-mb-gb-tb | |
def format_bytes(bytes_to_format): | |
"""Return the given bytes as a human friendly kb, mb, gb, or tb string.""" | |
b = float(bytes_to_format) | |
kb = float(1024) | |
mb = float(kb ** 2) # 1,048,576 | |
gb = float(kb ** 3) # 1,073,741,824 | |
tb = float(kb ** 4) # 1,099,511,627,776 | |
if b < kb: | |
return '{0} {1}'.format(b, 'Bytes' if 0 == b > 1 else 'Byte') | |
elif kb <= b < mb: | |
return '{0:.2f} KB'.format(b / kb) | |
elif mb <= b < gb: | |
return '{0:.2f} MB'.format(b / mb) | |
elif gb <= b < tb: | |
return '{0:.2f} GB'.format(b / gb) | |
elif tb <= b: | |
return '{0:.2f} TB'.format(b / tb) | |
if __name__ == '__main__': | |
logging.basicConfig(level=logging.INFO) | |
if not args.access_token: | |
logging.error('access_token must be set!') | |
sys.exit(1) | |
if not args.base_url: | |
logging.error('base_url must be set!') | |
sys.exit(1) | |
if not os.path.exists('gitlab-artifacts-cleanup.json'): | |
jobs_and_artifacts_list = build_projects_jobs_and_artifacts_list( | |
fetch_projects()) | |
fp = open('gitlab-artifacts-cleanup.json', 'w') | |
json.dump(jobs_and_artifacts_list, fp) | |
fp.close() | |
else: | |
fp = open('gitlab-artifacts-cleanup.json') | |
jobs_and_artifacts_list = json.load(fp) | |
fp.close() | |
for entry in jobs_and_artifacts_list: | |
logging.info( | |
f"{entry['project_name']}: \t{format_bytes(entry['total_size'])}") | |
total_deleted = 0 | |
for project_summery in jobs_and_artifacts_list: | |
total_deleted += delete_artifacts_of_project( | |
project_summery, dry_run=(not args.delete)) | |
logging.info(f"deleted a total of {format_bytes(total_deleted)}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment