Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save zlocate/47de4ff12ccbd0f22f4a93e92a270a7f to your computer and use it in GitHub Desktop.
Save zlocate/47de4ff12ccbd0f22f4a93e92a270a7f to your computer and use it in GitHub Desktop.
gitlab_remove_expired_artifacts_by_group.py
#!/usr/bin/env python3
import time
import requests
import sys
from datetime import datetime, timezone
from dateutil import parser
import re
if len(sys.argv) != 4:
print(f'Usage: {sys.argv[0]} <server> <token> <group id>')
exit(1)
server = sys.argv[1]
token = sys.argv[2]
group_id = sys.argv[3]
now = datetime.now(timezone.utc)
projects_url = f"https://{server}/api/v4/groups/{group_id}/projects?per_page=500"
projects = {}
response = requests.get(
projects_url,
headers={
'private-token': token,
},
)
response.raise_for_status()
response_json = response.json()
project_ids = []
for project in response_json:
project_id = project['id']
project_ids.append(int(project_id))
print(f'Number of projects found: {len(project_ids)}')
overall_space_savings = 0
for project_id in project_ids:
print(f'Processing project {project_id}:')
merge_request_url = f"https://{server}/api/v4/projects/{project_id}/merge_requests?scope=all&per_page=100&page=1"
merge_requests = {}
while merge_request_url:
response = requests.get(
merge_request_url,
headers={
'private-token': token,
},
)
if response.status_code in [500, 429]:
print(f"Status {response.status_code}, retrying.")
time.sleep(10)
continue
response.raise_for_status()
response_json = response.json()
for merge_request in response_json:
iid = merge_request.get('iid', None)
if iid:
merge_requests[int(iid)] = merge_request['state']
merge_request_url = response.links.get('next', {}).get('url', None)
branch_url = f"https://{server}/api/v4/projects/{project_id}/repository/branches?per_page=100&page=1"
unmerged_branches = []
while branch_url:
response = requests.get(
branch_url,
headers={
'private-token': token,
},
)
if response.status_code in [500, 429]:
print(f"Status {response.status_code}, retrying.")
time.sleep(10)
continue
response.raise_for_status()
response_json = response.json()
for branch in response_json:
is_merged = branch['merged']
if not is_merged:
unmerged_branches.append(branch['name'])
branch_url = response.links.get('next', {}).get('url', None)
url = f"https://{server}/api/v4/projects/{project_id}/jobs?per_page=100&page=1"
job_count = 0
artifact_count = 0
artifact_size = 0
deleted_artifact_count = 0
deleted_artifact_size = 0
while url:
response = requests.get(
url,
headers={
'private-token': token,
},
)
if response.status_code in [500, 429]:
print(f"Status {response.status_code}, retrying.")
time.sleep(10)
continue
response.raise_for_status()
response_json = response.json()
for job in response_json:
job_count += 1
artifacts = job.get('artifacts', None)
artifacts_expire_at_string = job.get('artifacts_expire_at', None)
artifacts_expire_at = None
if artifacts_expire_at_string:
artifacts_expire_at = parser.parse(artifacts_expire_at_string)
has_expired_artifacts = False
deleted_job_artifact_count = 0
deleted_job_artifact_size = 0
if artifacts:
for artifact in artifacts:
if artifact['filename'] != 'job.log':
size = artifact['size']
artifact_count += 1
artifact_size += size
if not artifacts_expire_at or artifacts_expire_at < now:
has_expired_artifacts = True
deleted_job_artifact_count += 1
deleted_job_artifact_size += size
delete_artifacts = False
if has_expired_artifacts:
ref = job['ref']
merge_request_iid_match = re.search(r'refs\/merge-requests\/(\d+)\/head', ref)
if merge_request_iid_match:
merge_request_iid = merge_request_iid_match.group(1)
if merge_request_iid:
merge_request_status = merge_requests.get(int(merge_request_iid))
if merge_request_status in ['merged', 'closed', None]:
delete_artifacts = True
deleted_artifact_count += deleted_job_artifact_count
deleted_artifact_size += deleted_job_artifact_size
elif ref not in unmerged_branches:
delete_artifacts = True
deleted_artifact_count += deleted_job_artifact_count
deleted_artifact_size += deleted_job_artifact_size
if delete_artifacts:
job_id = job['id']
print(f"Processing job ID: {job_id}", end="")
delete_response = requests.delete(
f"https://{server}/api/v4/projects/{project_id}/jobs/{job_id}/artifacts",
headers={
'private-token': token,
},
)
print(f" - status: {delete_response.status_code}\033[K", end = "\r")
print(f'Processed page {url}.\033[K', end = "\r")
url = response.links.get('next', {}).get('url', None)
overall_space_savings += deleted_artifact_size
print()
print(f'Jobs analysed: {job_count}');
print(f'Pre artifact count: {artifact_count}');
print(f'Pre artifact size [MB]: {artifact_size / (1024 * 1024)}')
print(f'Post artifact count: {artifact_count - deleted_artifact_count}')
print(f'Post artifact size [MB]: {(artifact_size - deleted_artifact_size) / (1024 * 1024)}')
print()
print(f'Overall savings [MB]: {overall_space_savings / (1024 * 1024)}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment