Last active
October 17, 2024 16:24
-
-
Save Sdy603/da8d9701e5492527b98091e23b58d26b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
import pandas as pd | |
from dotenv import load_dotenv | |
from datetime import datetime | |
import time | |
# Load environment variables | |
load_dotenv() | |
# Retrieve GitHub PAT from .env file | |
GITHUB_PAT = os.getenv('GITHUB_PAT') | |
DX_PROXY_USER = os.getenv('DX_PROXY_USER') | |
DX_PROXY_PASS = os.getenv('DX_PROXY_PASS') | |
VERBOSE = os.getenv('VERBOSE', 'False').lower() in ('true', '1', 't') | |
# Ensure necessary credentials are present | |
if not GITHUB_PAT: | |
raise ValueError("GitHub PAT not found. Please set it in the .env file.") | |
# Set up proxy, if needed | |
proxy_url = f"http://{DX_PROXY_USER}:{DX_PROXY_PASS}@proxy.getdx.net:80" | |
proxies = {"http": proxy_url, "https": proxy_url} | |
# Prepare GitHub API headers with PAT | |
headers = { | |
"Authorization": f"token {GITHUB_PAT}", | |
"Accept": "application/vnd.github+json" | |
} | |
# Read the CSV file containing repositories | |
csv_file_path = "iggroupGH.csv" | |
df = pd.read_csv(csv_file_path) | |
# Construct the API URL for each repository's tags | |
df['url'] = "https://api.github.com/repos/" + df['login'] + "/" + df['name'] + "/tags" | |
def handle_rate_limit(response): | |
"""Handle GitHub API rate limits.""" | |
if response.status_code == 403 and 'X-RateLimit-Remaining' in response.headers: | |
remaining = int(response.headers['X-RateLimit-Remaining']) | |
if remaining == 0: | |
reset_time = int(response.headers['X-RateLimit-Reset']) | |
wait_time = max(reset_time - int(time.time()), 0) | |
print(f"Rate limit exceeded. Waiting for {wait_time} seconds.") | |
time.sleep(wait_time) | |
return True | |
return False | |
# Create an empty list to store SQL insert statements | |
insert_statements = [] | |
# Iterate over the CSV rows and fetch tags for each repository | |
for index, row in df.iterrows(): | |
repo_id = row['repo ID'] | |
repo_name = row['name'] | |
url = row['url'] | |
if VERBOSE: | |
print(f"\nRequesting tags for Repo ID: {repo_id} - URL: {url}") | |
try: | |
# Make the GET request to the GitHub API | |
response = requests.get(url, headers=headers, proxies=proxies) | |
# Handle rate limit if necessary | |
if handle_rate_limit(response): | |
response = requests.get(url, headers=headers, proxies=proxies) | |
if VERBOSE: | |
print(f"Status Code: {response.status_code}") | |
# Check if the response was successful | |
if response.status_code == 200: | |
tags_data = response.json() | |
if VERBOSE: | |
print(f"Response for {repo_name}: {tags_data}") | |
# Process each tag in the response | |
if isinstance(tags_data, list) and tags_data: | |
for tag in tags_data: | |
tag_name = tag.get('name', 'N/A').replace("'", "''") | |
tag_commit_sha = tag.get('commit', {}).get('sha', 'N/A').replace("'", "''") | |
# Generate SQL insert statement for the tag | |
sql_insert = f""" | |
INSERT INTO github_repo_tags (repo_name, repository_id, custom_tag_name, tag_commit_sha, created_at) | |
VALUES ('{repo_name.replace("'", "''")}', {repo_id}, '{tag_name}', '{tag_commit_sha}', '{datetime.now().isoformat()}'); | |
""" | |
insert_statements.append(sql_insert) | |
if VERBOSE: | |
print(f"Generated SQL: {sql_insert}") | |
else: | |
if VERBOSE: | |
print(f"No tags found for {repo_name}.") | |
else: | |
print(f"Failed request for {repo_name}. HTTP Status: {response.status_code} - {response.text}") | |
except requests.exceptions.RequestException as req_err: | |
print(f"Request error for Repo ID: {repo_id} - URL: {url} - Error: {req_err}") | |
except Exception as err: | |
print(f"Unexpected error for Repo ID: {repo_id} - URL: {url} - Error: {err}") | |
# Save the SQL statements to a file if any were generated | |
if insert_statements: | |
output_sql_path = "GithubRepoTagsInserts.sql" | |
try: | |
with open(output_sql_path, 'w') as f: | |
for statement in insert_statements: | |
f.write(statement + "\n") | |
print(f"\nSQL insert statements saved to {output_sql_path}") | |
except IOError as io_err: | |
print(f"Error saving SQL insert statements: {io_err}") | |
else: | |
print("\nNo SQL insert statements were generated.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment