|
import requests |
|
import time |
|
import sys |
|
import matplotlib.pyplot as plt |
|
from loguru imnport logger |
|
import structlog |
|
|
|
|
|
def retrieve_projects_ids(): |
|
list_projects_url = f"{gitlab_url}/api/v4/projects" |
|
params = { |
|
"private_token": private_token, |
|
"per_page": 100, |
|
} |
|
page = 1 |
|
ids = [] |
|
|
|
while True: |
|
try: |
|
params["page"] = page |
|
response = requests.get(list_projects_url, params=params) |
|
|
|
if response.status_code == 200: |
|
data = response.json() |
|
|
|
if len(data) == 0: |
|
break |
|
|
|
for project in data: |
|
ids.append(project["id"]) |
|
|
|
page += 1 |
|
|
|
# Sleep for a short period to avoid hitting rate limits |
|
time.sleep(1) |
|
else: |
|
logger.info( |
|
"Response status code", |
|
url=list_projects_url, |
|
code=response.status_code, |
|
) |
|
sys.exit(1) |
|
except Exception as e: |
|
logger.error(f"An error occurred: {e}") |
|
return None |
|
|
|
return ids |
|
|
|
|
|
def calculate_languages_size(size, languages): |
|
languages_byte_size = {} |
|
for language, percentage in languages.items(): |
|
languages_byte_size[language] = round((percentage / 100.0) * size) |
|
return languages_byte_size |
|
|
|
|
|
def get_project_languages(project_id): |
|
try: |
|
project_languages = f"{gitlab_url}/api/v4/projects/{project_id}/languages" |
|
project_size = f"{gitlab_url}/api/v4/projects/{project_id}?statistics=true" |
|
headers = {"PRIVATE-TOKEN": private_token} |
|
|
|
languages_response = requests.get(project_languages, headers=headers) |
|
size_response = requests.get(project_size, headers=headers) |
|
|
|
if languages_response.status_code == 200 and size_response.status_code == 200: |
|
if languages_response.json() == {}: |
|
logger.info(f"Project {project_id} has no languages") |
|
return None |
|
else: |
|
size = size_response.json()["statistics"]["repository_size"] |
|
logger.info( |
|
f"Retrieved info: {languages_response.json()}, id: {project_id}, size: {size}" |
|
) |
|
return calculate_languages_size(size, languages_response.json()) |
|
else: |
|
logger.info("Response status code", id=project_id) |
|
sys.exit(1) |
|
except Exception as e: |
|
logger.error(f"An error occurred: {e}", id=project_id) |
|
return None |
|
|
|
|
|
if __name__ == "__main__": |
|
# Arguments checks |
|
if len(sys.argv) != 4: |
|
print( |
|
"Usage: python retrieve_languages_info.py <private_token> <gitlab_url> <visualization>" |
|
) |
|
sys.exit(1) |
|
|
|
private_token = sys.argv[1] |
|
gitlab_url = sys.argv[2] |
|
visualization = sys.argv[3].lower() == "true" |
|
|
|
# Initialize structlog |
|
logger = structlog.get_logger() |
|
|
|
ids = retrieve_projects_ids() |
|
logger.info(f"Retrieved {len(ids)} project ids") |
|
languages = {} |
|
languages_sizes = {} |
|
|
|
check = 0 |
|
for project_id in ids: |
|
project_languages = get_project_languages(project_id) |
|
if project_languages is not None: |
|
for language, size in project_languages.items(): |
|
if language in languages: |
|
languages[language] += 1 |
|
languages_sizes[language] += size |
|
else: |
|
languages[language] = 1 |
|
languages_sizes[language] = size |
|
|
|
# Sort languages by size |
|
total_languages_percentages = {} |
|
total_bytes = sum(languages_sizes.values()) |
|
for language, size in languages_sizes.items(): |
|
total_languages_percentages[language] = round( |
|
(size / sum(languages_sizes.values())) * 100, 2 |
|
) |
|
|
|
sorted_languages_size = { |
|
k: v |
|
for k, v in sorted( |
|
total_languages_percentages.items(), key=lambda item: item[1], reverse=True |
|
) |
|
} |
|
|
|
print(f"\n{sorted_languages_size}\n") |
|
# Sort languages by count |
|
sorted_languages = { |
|
k: v |
|
for k, v in sorted(languages.items(), key=lambda item: item[1], reverse=True) |
|
} |
|
|
|
print(f"{sorted_languages}\n") |
|
|
|
if visualization: |
|
# Create a bar graph of the sorted_languages dictionary |
|
plt.figure(figsize=(10, 5)) |
|
plt.subplot(1, 2, 1) |
|
plt.bar( |
|
list(sorted_languages.keys())[:20], list(sorted_languages.values())[:20] |
|
) |
|
plt.xlabel("Languages") |
|
plt.ylabel("Count") |
|
plt.title("Language count across projects") |
|
plt.xticks(rotation=90) |
|
|
|
# Create a pie chart of the sorted_languages_size dictionary |
|
plt.subplot(1, 2, 2) |
|
plt.pie( |
|
list(sorted_languages_size.values())[:20], |
|
labels=list(sorted_languages_size.keys())[:20], |
|
autopct="%1.1f%%", |
|
) |
|
plt.title("Languages percentage across gitlab") |
|
|
|
plt.show() |