Last active
July 18, 2024 18:50
-
-
Save FernandoCutire/e57eacd35cd9df10000fa483e317694d to your computer and use it in GitHub Desktop.
This will let you check emr active clusters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import boto3 | |
import csv | |
from io import StringIO | |
def get_running_emr_clusters(region): | |
""" | |
Fetches the IDs of EMR clusters that are currently running in the specified region. | |
Parameters: | |
- region (str): AWS region where the EMR clusters are located. | |
Returns: | |
- str: A CSV string of cluster data. | |
""" | |
client = boto3.client('emr', region_name=region) | |
running_clusters = [] | |
response = client.list_clusters(ClusterStates=['STARTING', 'BOOTSTRAPPING', 'RUNNING', 'WAITING']) | |
for cluster in response.get('Clusters', []): | |
running_clusters.append([ | |
cluster['Id'], | |
cluster['Name'], | |
cluster['Status']['State'] | |
]) | |
# Convert list of clusters to CSV string | |
output = StringIO() | |
writer = csv.writer(output) | |
writer.writerow(['Cluster ID', 'Name', 'Status']) # Header | |
writer.writerows(running_clusters) | |
return output.getvalue() | |
def upload_to_s3(region, bucket_name, key, data): | |
""" | |
Uploads a string of data to an S3 bucket. | |
Parameters: | |
- region (str): The AWS region where the S3 bucket is located. | |
- bucket_name (str): The name of the S3 bucket. | |
- key (str): The S3 key (path) for the file. | |
- data (str): Data to be uploaded. | |
""" | |
s3 = boto3.client('s3', region_name=region) | |
s3.put_object(Bucket=bucket_name, Key=key, Body=data) | |
print(f"Data uploaded to s3://{bucket_name}/{key}") | |
if __name__ == "__main__": | |
region = 'us-east-1' # Example: Change to your region | |
bucket_name = 'bucket' # Example: Change to your bucket | |
key = 'path/output/emr_clusters.csv' # Path where the CSV will be stored | |
# Fetch data and convert to CSV | |
csv_data = get_running_emr_clusters(region) | |
# path = f"s3://{bucket_name}/{key}" | |
# Print CSV data before uploading to S3 | |
# upload_to_s3(region, bucket_name, key, csv_data) | |
# print(f"CSV Data to be uploaded to {path}") | |
print(csv_data) | |
# Upload data to S3 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment