Skip to content

Instantly share code, notes, and snippets.

@ElMassas
Last active December 6, 2023 07:40
Show Gist options
  • Save ElMassas/c872741f8a2775927141e5001b352ab9 to your computer and use it in GitHub Desktop.
Save ElMassas/c872741f8a2775927141e5001b352ab9 to your computer and use it in GitHub Desktop.
Retrieve programming language information from private GitLab Service

Language info

This gist let's you retrieve information regarding all the languagues used in your private gitlab service, and in how many repositories these are present, not the total percentage of code per language.

Usage

python3 retrieve_languagues_info.py <private-token> <gitlab_url> <graph_visualization_enabled>
import requests
import time
import sys
import matplotlib.pyplot as plt
from loguru imnport logger
import structlog
def retrieve_projects_ids():
list_projects_url = f"{gitlab_url}/api/v4/projects"
params = {
"private_token": private_token,
"per_page": 100,
}
page = 1
ids = []
while True:
try:
params["page"] = page
response = requests.get(list_projects_url, params=params)
if response.status_code == 200:
data = response.json()
if len(data) == 0:
break
for project in data:
ids.append(project["id"])
page += 1
# Sleep for a short period to avoid hitting rate limits
time.sleep(1)
else:
logger.info(
"Response status code",
url=list_projects_url,
code=response.status_code,
)
sys.exit(1)
except Exception as e:
logger.error(f"An error occurred: {e}")
return None
return ids
def calculate_languages_size(size, languages):
languages_byte_size = {}
for language, percentage in languages.items():
languages_byte_size[language] = round((percentage / 100.0) * size)
return languages_byte_size
def get_project_languages(project_id):
try:
project_languages = f"{gitlab_url}/api/v4/projects/{project_id}/languages"
project_size = f"{gitlab_url}/api/v4/projects/{project_id}?statistics=true"
headers = {"PRIVATE-TOKEN": private_token}
languages_response = requests.get(project_languages, headers=headers)
size_response = requests.get(project_size, headers=headers)
if languages_response.status_code == 200 and size_response.status_code == 200:
if languages_response.json() == {}:
logger.info(f"Project {project_id} has no languages")
return None
else:
size = size_response.json()["statistics"]["repository_size"]
logger.info(
f"Retrieved info: {languages_response.json()}, id: {project_id}, size: {size}"
)
return calculate_languages_size(size, languages_response.json())
else:
logger.info("Response status code", id=project_id)
sys.exit(1)
except Exception as e:
logger.error(f"An error occurred: {e}", id=project_id)
return None
if __name__ == "__main__":
# Arguments checks
if len(sys.argv) != 4:
print(
"Usage: python retrieve_languages_info.py <private_token> <gitlab_url> <visualization>"
)
sys.exit(1)
private_token = sys.argv[1]
gitlab_url = sys.argv[2]
visualization = sys.argv[3].lower() == "true"
# Initialize structlog
logger = structlog.get_logger()
ids = retrieve_projects_ids()
logger.info(f"Retrieved {len(ids)} project ids")
languages = {}
languages_sizes = {}
check = 0
for project_id in ids:
project_languages = get_project_languages(project_id)
if project_languages is not None:
for language, size in project_languages.items():
if language in languages:
languages[language] += 1
languages_sizes[language] += size
else:
languages[language] = 1
languages_sizes[language] = size
# Sort languages by size
total_languages_percentages = {}
total_bytes = sum(languages_sizes.values())
for language, size in languages_sizes.items():
total_languages_percentages[language] = round(
(size / sum(languages_sizes.values())) * 100, 2
)
sorted_languages_size = {
k: v
for k, v in sorted(
total_languages_percentages.items(), key=lambda item: item[1], reverse=True
)
}
print(f"\n{sorted_languages_size}\n")
# Sort languages by count
sorted_languages = {
k: v
for k, v in sorted(languages.items(), key=lambda item: item[1], reverse=True)
}
print(f"{sorted_languages}\n")
if visualization:
# Create a bar graph of the sorted_languages dictionary
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.bar(
list(sorted_languages.keys())[:20], list(sorted_languages.values())[:20]
)
plt.xlabel("Languages")
plt.ylabel("Count")
plt.title("Language count across projects")
plt.xticks(rotation=90)
# Create a pie chart of the sorted_languages_size dictionary
plt.subplot(1, 2, 2)
plt.pie(
list(sorted_languages_size.values())[:20],
labels=list(sorted_languages_size.keys())[:20],
autopct="%1.1f%%",
)
plt.title("Languages percentage across gitlab")
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment