Created
August 18, 2025 21:13
-
-
Save danielscholl/1f2420b2cf4102600abc133a10795b9f to your computer and use it in GitHub Desktop.
OSDU Microsoft Contributions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env -S uv run --script | |
# /// script | |
# dependencies = [ | |
# "typer", | |
# "rich", | |
# "python-dateutil", | |
# "python-gitlab", | |
# "asyncio", | |
# ] | |
# requires-python = ">=3.8" | |
# /// | |
""" | |
OSDU Microsoft Contribution Analyzer (Parallel Version) | |
This script analyzes Microsoft employee contributions to OSDU GitLab projects | |
using Rich console interface for beautiful reporting and direct GitLab API access. | |
Data Access Method: | |
- **Direct GitLab API**: Uses python-gitlab library for fast, reliable API calls | |
Usage: | |
uv run analyze_osdu_contributions_parallel.py [OPTIONS] | |
Options: | |
--days INTEGER Number of days to analyze (default: 30) | |
--start-date TEXT Start date (YYYY-MM-DD format) | |
--end-date TEXT End date (YYYY-MM-DD format) | |
--output TEXT Output format: console, json (default: console) | |
--verbose Show detailed progress information | |
--max-workers INTEGER Number of parallel workers (default: 8) | |
--gitlab-token TEXT GitLab Personal Access Token (or set GITLAB_TOKEN env var) | |
--help Show this message and exit | |
Requirements: | |
- GitLab Personal Access Token (either via --gitlab-token or GITLAB_TOKEN env var) | |
- Access to OSDU GitLab projects | |
Note: Script uses parallel processing with python-gitlab for optimal performance | |
""" | |
import os | |
import sys | |
import json | |
import csv | |
import asyncio | |
from datetime import datetime, timedelta | |
from typing import Optional, List, Dict, Any, Set | |
from pathlib import Path | |
import time | |
from concurrent.futures import ThreadPoolExecutor, as_completed | |
from threading import Lock | |
import re | |
import gitlab | |
from gitlab.exceptions import GitlabAuthenticationError, GitlabGetError | |
import typer | |
from rich import print | |
from rich.console import Console | |
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn | |
from rich.table import Table | |
from rich.panel import Panel | |
from rich.tree import Tree | |
from rich.layout import Layout | |
from rich.box import ROUNDED, DOUBLE_EDGE | |
from rich.theme import Theme | |
from rich.text import Text | |
from rich.align import Align | |
from rich.columns import Columns | |
from rich.live import Live | |
from dateutil import parser as date_parser | |
# Custom theme for the application | |
custom_theme = Theme({ | |
"info": "dim white", | |
"warning": "bold yellow", | |
"error": "bold red", | |
"success": "bold green", | |
"microsoft": "bold blue", | |
"gitlab": "bold orange3", | |
"project": "cyan", | |
"contributor": "magenta", | |
"metric": "green", | |
"header": "bold white on blue", | |
}) | |
# Initialize console with custom theme | |
console = Console(theme=custom_theme) | |
app = typer.Typer(add_completion=False, help="OSDU Microsoft Contribution Analyzer (Parallel)") | |
# OSDU GitLab instance URL (constant) | |
OSDU_GITLAB_URL = "https://community.opengroup.org" | |
# Embedded Microsoft employees list (33 current employees) | |
MICROSOFT_EMPLOYEES = { | |
"Harshika", "KiranV", "NikhilSingh", "Nishant", "abpatil", "adsali", "ajoshi19", | |
"akshayps", "aneesh43", "anthonyitti", "anubhavaron", "ashley_beitz", "charleszipp", | |
"cmongin", "dacarpen", "danielscholl", "japicket", "jordanchiu", "kiranbedre", | |
"krganesan", "kumarakshay", "kymille", "ladatz", "lucynliu", "mayank.saggar", | |
"marijadukic", "mmuralidhar", "nursheikh", "petermil", "preeti", "sabarishk", | |
"spancholi", "wangjulia" | |
} | |
# Embedded OSDU project list (32 core platform projects) | |
OSDU_PROJECTS = [ | |
# Core System Services (10 projects) | |
"osdu/platform/system/dataset", | |
"osdu/platform/system/file", | |
"osdu/platform/system/indexer", | |
"osdu/platform/system/indexer-queue", | |
"osdu/platform/system/notification", | |
"osdu/platform/system/partition", | |
"osdu/platform/system/register", | |
"osdu/platform/system/schema-service", | |
"osdu/platform/system/search", | |
"osdu/platform/system/storage", | |
# Reference Services (5 projects) | |
"osdu/platform/system/reference/crs-catalog-service", | |
"osdu/platform/system/reference/crs-conversion-service", | |
"osdu/platform/system/reference/reference-and-helper-services", | |
"osdu/platform/system/reference/schema-upgrade", | |
"osdu/platform/system/reference/unit-service", | |
# Security and Compliance (5 projects) | |
"osdu/platform/security-and-compliance/policy", | |
"osdu/platform/security-and-compliance/entitlements", | |
"osdu/platform/security-and-compliance/secret", | |
"osdu/platform/security-and-compliance/legal", | |
"osdu/platform/security-and-compliance/home", | |
# Data Flow (3 projects) | |
"osdu/platform/data-flow/ingestion/ingestion-workflow", | |
"osdu/platform/data-flow/ingestion/manifest-ingestor", | |
"osdu/platform/data-flow/ingestion/segy-to-mdio-conversion-dag", | |
"osdu/platform/data-flow/real-time/RTDIP/rtss", | |
# Domain Services (5 projects) | |
"osdu/platform/data-flow/ingestion/external-data-sources/eds-dms", | |
"osdu/platform/domain-data-mgmt-services/wellbore/wellbore-domain-services", | |
"osdu/platform/domain-data-mgmt-services/well-delivery/well-delivery", | |
"osdu/platform/domain-data-mgmt-services/seismic/seismic-dms-suite/seismic-store-service", | |
# Libraries (2 projects) | |
"osdu/platform/system/lib/core", | |
"osdu/platform/system/lib/cloud/azure/os-core-lib-azure", | |
# Deployment & Operations (2 projects) | |
"osdu/platform/deployment-and-operations/infra-azure-provisioning", | |
"osdu/platform/deployment-and-operations/helm-charts-azure" | |
] | |
class OSDUContributionAnalyzer: | |
"""Analyzes Microsoft contributions to OSDU projects using direct GitLab API access""" | |
def __init__(self, days: int = 30, start_date: Optional[str] = None, | |
end_date: Optional[str] = None, verbose: bool = False, max_workers: int = 8, | |
gitlab_token: Optional[str] = None): | |
"""Initialize the analyzer with date range and GitLab API access""" | |
self.verbose = verbose | |
self.max_workers = max_workers | |
self.microsoft_employees: Set[str] = MICROSOFT_EMPLOYEES.copy() | |
self.project_list: List[str] = OSDU_PROJECTS.copy() | |
self.contributions: Dict[str, Any] = {} | |
self.analysis_results: Dict[str, Any] = {} | |
self.stats_lock = Lock() # For thread-safe statistics updates | |
# Set up GitLab connection | |
self.gitlab_token = gitlab_token or os.getenv('GITLAB_TOKEN') | |
self.gitlab_url = OSDU_GITLAB_URL | |
self.gitlab_client = None | |
if not self.gitlab_token: | |
console.print("[error]β GitLab token required. Set GITLAB_TOKEN env var or use --gitlab-token[/error]") | |
sys.exit(1) | |
# Set up date range | |
self._setup_date_range(days, start_date, end_date) | |
# Statistics counters | |
self.stats = { | |
"total_projects": 0, | |
"projects_with_contributions": 0, | |
"total_merge_requests": 0, | |
"total_open_mrs": 0, | |
"total_merged_mrs": 0, | |
"total_closed_mrs": 0, | |
"total_reviews": 0, | |
"total_approvals": 0, | |
"active_contributors": set(), | |
"project_breakdown": {}, # Track per-project MR stats | |
"project_categories": { | |
"system_services": 0, | |
"reference_services": 0, | |
"security_compliance": 0, | |
"data_flow": 0, | |
"domain_services": 0, | |
"libraries": 0, | |
"deployment_operations": 0 | |
} | |
} | |
def _setup_date_range(self, days: int, start_date: Optional[str], end_date: Optional[str]): | |
"""Setup the analysis date range""" | |
if start_date and end_date: | |
self.start_date = date_parser.parse(start_date).date() | |
self.end_date = date_parser.parse(end_date).date() | |
elif start_date: | |
self.start_date = date_parser.parse(start_date).date() | |
self.end_date = datetime.now().date() | |
elif end_date: | |
self.end_date = date_parser.parse(end_date).date() | |
self.start_date = self.end_date - timedelta(days=days) | |
else: | |
self.end_date = datetime.now().date() | |
self.start_date = self.end_date - timedelta(days=days) | |
self.days_analyzed = (self.end_date - self.start_date).days | |
def display_header(self): | |
"""Display the application header with Rich formatting""" | |
header_content = f"""OSDU Microsoft Contribution Analyzer | |
[info]Analysis Period:[/info] {self.start_date} to {self.end_date} ([metric]{self.days_analyzed} days[/metric]) | |
[info]Microsoft Employees:[/info] [metric]{len(self.microsoft_employees)}[/metric] tracked | |
[info]OSDU Projects:[/info] [metric]{len(self.project_list)}[/metric] core platform projects | |
[info]Generated:[/info] {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}""" | |
console.print(Panel( | |
Align.center(header_content), | |
title="π OSDU Analysis Report", | |
border_style="dim", | |
box=ROUNDED | |
)) | |
console.print() | |
def load_data_sources(self): | |
"""Display loaded data sources and access method""" | |
if self.verbose: | |
console.print(f"[info]Loaded {len(self.microsoft_employees)} Microsoft employees[/info]") | |
console.print(f"[info]Loaded {len(self.project_list)} OSDU core projects[/info]") | |
# Show detailed data access info | |
if self.gitlab_client: | |
console.print(f"[success]β GitLab API connected[/success]") | |
else: | |
console.print(f"[error]β No GitLab access - check token and connectivity[/error]") | |
def _init_gitlab_client(self) -> bool: | |
"""Initialize GitLab client connection""" | |
if self.gitlab_client: | |
return True | |
try: | |
self.gitlab_client = gitlab.Gitlab(self.gitlab_url, private_token=self.gitlab_token) | |
# Test the connection | |
self.gitlab_client.auth() | |
if self.verbose: | |
user = self.gitlab_client.user | |
console.print(f"[success]β Connected to GitLab as: {user.username}[/success]") | |
return True | |
except GitlabAuthenticationError: | |
console.print("[error]β GitLab authentication failed - check your token[/error]") | |
return False | |
except Exception as e: | |
console.print(f"[error]β GitLab connection failed: {e}[/error]") | |
return False | |
def analyze_contributions(self): | |
"""Main analysis method with parallel GitLab API processing""" | |
console.print(Panel( | |
"[info]Starting parallel contribution analysis...[/info]", | |
title="π Analysis Phase", | |
border_style="yellow" | |
)) | |
# Initialize GitLab client | |
if not self._init_gitlab_client(): | |
console.print("[error]β Cannot connect to GitLab API.[/error]") | |
return | |
console.print(f"[success]π Using direct GitLab API[/success]") | |
self._analyze_parallel() | |
# Process final statistics | |
self.stats["total_projects"] = len(self.project_list) | |
self.stats["projects_with_contributions"] = len(self.contributions) | |
self.stats["total_contributors"] = len(self.stats["active_contributors"]) | |
console.print() | |
console.print("[success]β Analysis complete![/success]") | |
def _analyze_parallel(self): | |
"""Parallel analysis using direct GitLab API calls""" | |
with Progress( | |
TextColumn("[progress.description]{task.description}"), | |
BarColumn(bar_width=40), | |
TaskProgressColumn(), | |
console=console | |
) as progress: | |
main_task = progress.add_task( | |
"Analyzing projects (GitLab API)...", | |
total=len(self.project_list) | |
) | |
# Use ThreadPoolExecutor for parallel GitLab API calls | |
with ThreadPoolExecutor(max_workers=self.max_workers) as executor: | |
# Submit all projects to the thread pool | |
future_to_project = { | |
executor.submit(self._analyze_project, project_path): project_path | |
for project_path in self.project_list | |
} | |
# Process completed futures as they finish | |
for future in as_completed(future_to_project): | |
project_path = future_to_project[future] | |
progress.update(main_task, advance=1) | |
try: | |
project_contributions = future.result() | |
if project_contributions: | |
# Thread-safe update of shared data | |
with self.stats_lock: | |
self.contributions[project_path] = project_contributions | |
self._update_statistics(project_path, project_contributions) | |
except Exception as e: | |
if self.verbose: | |
console.print(f"[error]Error analyzing {project_path}: {e}[/error]") | |
def _analyze_project(self, project_path: str) -> Optional[Dict[str, Any]]: | |
"""Analyze contributions for a specific project using GitLab API""" | |
try: | |
if self.verbose: | |
console.print(f"[info]Fetching data for {project_path} via GitLab API...[/info]") | |
# Get project object | |
project = self.gitlab_client.projects.get(project_path) | |
contributions = [] | |
# Get merge requests in date range | |
mrs = project.mergerequests.list( | |
created_after=self.start_date.isoformat(), | |
created_before=self.end_date.isoformat(), | |
all=True | |
) | |
for mr in mrs: | |
author_username = mr.author.get('username') if mr.author else None | |
if author_username and author_username in self.microsoft_employees: | |
contributions.append({ | |
"username": author_username, | |
"name": mr.author.get('name', ''), | |
"title": mr.title, | |
"created_at": mr.created_at, | |
"state": mr.state, | |
"contribution_type": "merge_request" | |
}) | |
# Get MR reviews/approvals from Microsoft employees | |
try: | |
# Get the full MR object with more details | |
mr_full = project.mergerequests.get(mr.iid) | |
# Get approvals | |
try: | |
approvals = mr_full.approvals.get() | |
for approval_user in approvals.approved_by: | |
reviewer_username = approval_user.get('user', {}).get('username') | |
if reviewer_username and reviewer_username in self.microsoft_employees: | |
contributions.append({ | |
"username": reviewer_username, | |
"name": approval_user.get('user', {}).get('name', ''), | |
"title": f"Approved: {mr.title}", | |
"created_at": mr.created_at, # Use MR creation date as approval date not available | |
"state": "approved", | |
"contribution_type": "approval" | |
}) | |
except Exception: | |
# Approvals might not be available in all GitLab instances | |
pass | |
# Get discussions/reviews | |
try: | |
discussions = mr_full.discussions.list(all=True) | |
for discussion in discussions: | |
for note in discussion.attributes.get('notes', []): | |
reviewer_username = note.get('author', {}).get('username') | |
if (reviewer_username and | |
reviewer_username in self.microsoft_employees and | |
reviewer_username != author_username): # Don't count self-reviews | |
contributions.append({ | |
"username": reviewer_username, | |
"name": note.get('author', {}).get('name', ''), | |
"title": f"Reviewed: {mr.title}", | |
"created_at": note.get('created_at', mr.created_at), | |
"state": "reviewed", | |
"contribution_type": "review" | |
}) | |
except Exception: | |
# Discussions might not be available | |
pass | |
except Exception: | |
# If we can't get detailed MR info, skip reviews | |
pass | |
if contributions: | |
return { | |
"project_path": project_path, | |
"project_name": project_path.split('/')[-1], | |
"contributions": contributions, | |
"total_contributions": len(contributions) | |
} | |
return None | |
except GitlabGetError as e: | |
if self.verbose: | |
console.print(f"[warning]Project not found or no access: {project_path}[/warning]") | |
return None | |
except Exception as e: | |
if self.verbose: | |
console.print(f"[error]Error analyzing {project_path}: {e}[/error]") | |
return None | |
def _update_statistics(self, project_path: str, project_data: Dict[str, Any]): | |
"""Update global statistics with project contribution data (thread-safe)""" | |
contributions = project_data.get('contributions', []) | |
# Initialize project breakdown | |
project_name = project_path.split('/')[-1] | |
if project_name not in self.stats["project_breakdown"]: | |
self.stats["project_breakdown"][project_name] = { | |
"path": project_path, | |
"open_mrs": 0, | |
"merged_mrs": 0, | |
"closed_mrs": 0, | |
"reviews": 0, | |
"approvals": 0, | |
"contributors": set() | |
} | |
for contrib in contributions: | |
contributor = contrib.get('username') or contrib.get('reviewer_username') | |
if contributor: | |
self.stats["active_contributors"].add(contributor) | |
self.stats["project_breakdown"][project_name]["contributors"].add(contributor) | |
contrib_type = contrib.get('contribution_type', '') | |
contrib_state = contrib.get('state', '') | |
if contrib_type == 'merge_request': | |
self.stats["total_merge_requests"] += 1 | |
# Track MR states | |
if contrib_state == 'opened': | |
self.stats["total_open_mrs"] += 1 | |
self.stats["project_breakdown"][project_name]["open_mrs"] += 1 | |
elif contrib_state == 'merged': | |
self.stats["total_merged_mrs"] += 1 | |
self.stats["project_breakdown"][project_name]["merged_mrs"] += 1 | |
elif contrib_state == 'closed': | |
self.stats["total_closed_mrs"] += 1 | |
self.stats["project_breakdown"][project_name]["closed_mrs"] += 1 | |
elif contrib_type == 'review': | |
self.stats["total_reviews"] += 1 | |
self.stats["project_breakdown"][project_name]["reviews"] += 1 | |
elif contrib_type == 'approval': | |
self.stats["total_approvals"] += 1 | |
self.stats["project_breakdown"][project_name]["approvals"] += 1 | |
# Categorize project | |
self._categorize_project(project_path) | |
def _categorize_project(self, project_path: str): | |
"""Categorize project by type for statistics""" | |
path_lower = project_path.lower() | |
if '/system/' in path_lower and '/reference/' not in path_lower and '/lib/' not in path_lower: | |
self.stats["project_categories"]["system_services"] += 1 | |
elif '/reference/' in path_lower: | |
self.stats["project_categories"]["reference_services"] += 1 | |
elif '/security-and-compliance/' in path_lower: | |
self.stats["project_categories"]["security_compliance"] += 1 | |
elif '/data-flow/' in path_lower: | |
self.stats["project_categories"]["data_flow"] += 1 | |
elif '/domain-data-mgmt-services/' in path_lower: | |
self.stats["project_categories"]["domain_services"] += 1 | |
elif '/lib/' in path_lower: | |
self.stats["project_categories"]["libraries"] += 1 | |
elif '/deployment-and-operations/' in path_lower: | |
self.stats["project_categories"]["deployment_operations"] += 1 | |
def display_executive_summary(self): | |
"""Display executive summary with key metrics""" | |
summary_content = f"""[metric]{self.stats['total_contributors']}[/metric] active Microsoft contributors (out of {len(self.microsoft_employees)}) | |
[metric]{self.stats['projects_with_contributions']}[/metric] projects with contributions (out of {self.stats['total_projects']}) | |
[info]Contributions:[/info] [metric]{self.stats['total_merge_requests']}[/metric] merge requests | |
[info]MR Breakdown:[/info] [metric]{self.stats['total_open_mrs']}[/metric] open, [metric]{self.stats['total_merged_mrs']}[/metric] merged, [metric]{self.stats['total_closed_mrs']}[/metric] closed | |
[info]Engagement:[/info] [metric]{self.stats['total_reviews']}[/metric] comments, [metric]{self.stats['total_approvals']}[/metric] approvals | |
[info]Coverage:[/info] {(self.stats['projects_with_contributions'] / max(self.stats['total_projects'], 1) * 100):.1f}% of projects have Microsoft contributions""" | |
console.print(Panel( | |
summary_content, | |
title="π Executive Summary", | |
border_style="green", | |
box=ROUNDED | |
)) | |
console.print() | |
def display_contributor_leaderboard(self): | |
"""Display top contributors with metrics""" | |
if not self.stats["active_contributors"]: | |
console.print("[warning]β οΈ No active contributors found in the analyzed period[/warning]") | |
return | |
# Create contributor metrics | |
contributor_metrics = {} | |
for project_data in self.contributions.values(): | |
for contrib in project_data.get('contributions', []): | |
contributor = contrib.get('username') or contrib.get('reviewer_username') | |
if contributor: | |
if contributor not in contributor_metrics: | |
contributor_metrics[contributor] = { | |
'open_mrs': 0, 'merged_mrs': 0, 'reviews': 0, 'approvals': 0 | |
} | |
contrib_type = contrib.get('contribution_type', '') | |
contrib_state = contrib.get('state', '') | |
if contrib_type == 'merge_request': | |
if contrib_state == 'opened': | |
contributor_metrics[contributor]['open_mrs'] += 1 | |
elif contrib_state == 'merged': | |
contributor_metrics[contributor]['merged_mrs'] += 1 | |
elif contrib_type == 'review': | |
contributor_metrics[contributor]['reviews'] += 1 | |
elif contrib_type == 'approval': | |
contributor_metrics[contributor]['approvals'] += 1 | |
# Create leaderboard table | |
leaderboard = Table(title="π Top Microsoft Contributors", box=ROUNDED) | |
leaderboard.add_column("Rank", style="dim", width=4) | |
leaderboard.add_column("Contributor", style="contributor") | |
leaderboard.add_column("Open MRs", justify="right", style="warning") | |
leaderboard.add_column("Merged MRs", justify="right", style="success") | |
leaderboard.add_column("Comments", justify="right", style="metric") | |
leaderboard.add_column("Approvals", justify="right", style="metric") | |
# Sort by merge requests (primary activity) | |
sorted_contributors = sorted( | |
contributor_metrics.items(), | |
key=lambda x: (x[1]['open_mrs'] + x[1]['merged_mrs'], x[1]['reviews'], x[1]['approvals']), | |
reverse=True | |
) | |
for i, (contributor, metrics) in enumerate(sorted_contributors[:10], 1): | |
leaderboard.add_row( | |
f"{i}", | |
contributor, | |
str(metrics['open_mrs']), | |
str(metrics['merged_mrs']), | |
str(metrics['reviews']), | |
str(metrics['approvals']) | |
) | |
console.print(leaderboard) | |
console.print() | |
def display_project_breakdown(self): | |
"""Display project-level breakdown of Microsoft contributions""" | |
if not self.stats["project_breakdown"]: | |
console.print("[warning]β οΈ No projects with Microsoft contributions found[/warning]") | |
return | |
# Create project breakdown table | |
breakdown_table = Table(title="π Project-Level Contribution Breakdown", box=ROUNDED) | |
breakdown_table.add_column("Project", style="project") | |
breakdown_table.add_column("Open MRs", justify="right", style="warning") | |
breakdown_table.add_column("Merged MRs", justify="right", style="success") | |
breakdown_table.add_column("Comments", justify="right", style="info") | |
breakdown_table.add_column("Approvals", justify="right", style="metric") | |
# Sort by total activity (MRs + reviews + approvals) | |
sorted_projects = sorted( | |
self.stats["project_breakdown"].items(), | |
key=lambda x: x[1]["open_mrs"] + x[1]["merged_mrs"] + x[1]["reviews"] + x[1]["approvals"], | |
reverse=True | |
) | |
for project_name, data in sorted_projects: | |
total_mrs = data["open_mrs"] + data["merged_mrs"] | |
if total_mrs > 0 or data["reviews"] > 0 or data["approvals"] > 0: # Only show projects with activity | |
breakdown_table.add_row( | |
project_name, | |
str(data["open_mrs"]) if data["open_mrs"] > 0 else "-", | |
str(data["merged_mrs"]) if data["merged_mrs"] > 0 else "-", | |
str(data["reviews"]) if data["reviews"] > 0 else "-", | |
str(data["approvals"]) if data["approvals"] > 0 else "-" | |
) | |
console.print(breakdown_table) | |
console.print() | |
def export_results(self, output_format: str): | |
"""Export results to specified format""" | |
if output_format == "json": | |
self._export_json() | |
# console is handled by the display methods | |
def _export_json(self): | |
"""Export results to JSON file""" | |
results = { | |
"analysis_date": datetime.now().isoformat(), | |
"date_range": { | |
"start": self.start_date.isoformat(), | |
"end": self.end_date.isoformat(), | |
"days": self.days_analyzed | |
}, | |
"statistics": { | |
**self.stats, | |
"active_contributors": list(self.stats["active_contributors"]) | |
}, | |
"contributions": self.contributions, | |
"parallel_processing": { | |
"max_workers": self.max_workers, | |
"data_access_method": "GitLab API" if self.gitlab_client else "None" | |
} | |
} | |
output_file = f"osdu_contributions_{self.start_date}_{self.end_date}_parallel.json" | |
with open(output_file, 'w') as f: | |
json.dump(results, f, indent=2, default=str) | |
console.print(f"[success]β JSON report saved: {output_file}[/success]") | |
def run_analysis(self, output_format: str = "console"): | |
"""Run the complete analysis workflow""" | |
try: | |
# Header | |
self.display_header() | |
# Display embedded data sources | |
self.load_data_sources() | |
# Run analysis | |
self.analyze_contributions() | |
# Display results | |
self.display_executive_summary() | |
self.display_contributor_leaderboard() | |
self.display_project_breakdown() | |
# Export if requested | |
if output_format != "console": | |
console.print() | |
self.export_results(output_format) | |
except KeyboardInterrupt: | |
console.print("\n[warning]β οΈ Analysis interrupted by user[/warning]") | |
sys.exit(1) | |
except Exception as e: | |
console.print(f"\n[error]β Analysis failed: {e}[/error]") | |
if self.verbose: | |
import traceback | |
console.print(traceback.format_exc()) | |
sys.exit(1) | |
@app.command() | |
def main( | |
days: int = typer.Option( | |
30, | |
help="Number of days to analyze (default: 30)" | |
), | |
start_date: Optional[str] = typer.Option( | |
None, | |
help="Start date for analysis (YYYY-MM-DD format)" | |
), | |
end_date: Optional[str] = typer.Option( | |
None, | |
help="End date for analysis (YYYY-MM-DD format)" | |
), | |
output: str = typer.Option( | |
"console", | |
help="Output format: console, json" | |
), | |
max_workers: int = typer.Option( | |
8, | |
help="Number of parallel workers for GitLab API requests" | |
), | |
gitlab_token: Optional[str] = typer.Option( | |
None, | |
help="GitLab Personal Access Token (or set GITLAB_TOKEN env var)" | |
), | |
verbose: bool = typer.Option( | |
False, | |
help="Show detailed progress information" | |
) | |
): | |
"""Analyze Microsoft employee contributions to OSDU GitLab projects using direct GitLab API""" | |
# Validate output format | |
valid_formats = ["console", "json"] | |
if output not in valid_formats: | |
console.print(f"[error]β Invalid output format. Use: {', '.join(valid_formats)}[/error]") | |
raise typer.Exit(1) | |
# Validate date parameters | |
if start_date and end_date: | |
try: | |
start_parsed = date_parser.parse(start_date).date() | |
end_parsed = date_parser.parse(end_date).date() | |
if start_parsed >= end_parsed: | |
console.print("[error]β Start date must be before end date[/error]") | |
raise typer.Exit(1) | |
except ValueError as e: | |
console.print(f"[error]β Invalid date format: {e}[/error]") | |
raise typer.Exit(1) | |
# Validate max_workers | |
if max_workers < 1 or max_workers > 10: | |
console.print("[error]β Max workers must be between 1 and 10[/error]") | |
raise typer.Exit(1) | |
# Create and run analyzer | |
analyzer = OSDUContributionAnalyzer( | |
days=days, | |
start_date=start_date, | |
end_date=end_date, | |
verbose=verbose, | |
max_workers=max_workers, | |
gitlab_token=gitlab_token | |
) | |
analyzer.run_analysis(output) | |
if __name__ == "__main__": | |
app() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment