Skip to content

Instantly share code, notes, and snippets.

@Pelirrojo
Created November 11, 2024 20:05
Show Gist options
  • Save Pelirrojo/a78b3dfbb36ceee300fed5fb2f0d1988 to your computer and use it in GitHub Desktop.
Save Pelirrojo/a78b3dfbb36ceee300fed5fb2f0d1988 to your computer and use it in GitHub Desktop.
Informe detallado de S3 con coste y detalle del contenido para cada bucket.

User prompt 1: Solicitud

  ¿Me podrías dar un código python q haga lo siguiente?
  A partir de las credenciales AWS en memoria (no pedirlas directamente), hacer un listado de buckets y un ls/scan por cada bucket de una cuenta de S3 para imprimir en forma de informe html (un único fichero llamado s3-report-<account_ID>.html) con una estética más o menos limpia y responsive (todo loq que utilice que sea por CDN).

  Las tablas serán:
  h1 informe S3 cuenta XXXXX

  h2 Listado de buckets
  
  a) listado de buckets con nombre, región, tamaño ocupado y coste mensual estimado
  Ejemplo de línea:
  bucket_1 | irlanda | 4Gb | 9.99 USD/m

  h2 Contenido de los buckets
  h3 Bucket X
  
  b) una tabla por cada bucket con todos sus ficheros de la siguiente forma: ruta, tamaño y tipo de almacenamiento

  Ejemplo de líneas:
  /archivo1.png | 1 Gb | Glacier Deep Archive
  /folder1/archivo500.docx | 215 kb | Standard

User prompt 2: Depurado por los errores multi-región

(py38) ➜  Workspace ./s3-report.py
Error: An error occurred (IllegalLocationConstraintException) when calling the ListObjects operation: The eu-south-2 location constraint is incompatible for the region specific endpoint this request was sent to.
#!/usr/bin/env python3
import boto3
import datetime
from pathlib import Path
from typing import Dict, List, Tuple
import humanize
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class S3Reporter:
def __init__(self):
# Cliente inicial para operaciones básicas
self.default_client = boto3.client('s3')
self.account_id = boto3.client('sts').get_caller_identity().get('Account')
self.region_clients = {}
def get_s3_client(self, region: str = None) -> boto3.client:
"""Obtiene un cliente S3 para una región específica"""
if not region:
return self.default_client
if region not in self.region_clients:
self.region_clients[region] = boto3.client('s3', region_name=region)
return self.region_clients[region]
def get_bucket_size_and_cost(self, bucket_name: str, region: str) -> Tuple[int, float]:
"""Calcula el tamaño total y coste estimado mensual de un bucket"""
total_size = 0
s3_client = self.get_s3_client(region)
try:
paginator = s3_client.get_paginator('list_objects_v2')
for page in paginator.paginate(Bucket=bucket_name):
if 'Contents' in page:
for obj in page['Contents']:
total_size += obj.get('Size', 0)
# Cálculo aproximado del coste mensual (simplificación)
cost_per_gb_month = 0.023 # USD por GB/mes para primeros 50TB
cost = (total_size / (1024**3)) * cost_per_gb_month
return total_size, cost
except Exception as e:
logger.error(f"Error al obtener tamaño del bucket {bucket_name}: {str(e)}")
return 0, 0
def get_bucket_region(self, bucket_name: str) -> str:
"""Obtiene la región de un bucket"""
try:
location = self.default_client.get_bucket_location(Bucket=bucket_name)
region = location['LocationConstraint']
# None significa us-east-1
return region if region else 'us-east-1'
except Exception as e:
logger.error(f"Error al obtener región del bucket {bucket_name}: {str(e)}")
return "desconocida"
def get_bucket_contents(self, bucket_name: str, region: str) -> List[Dict]:
"""Obtiene el contenido de un bucket"""
contents = []
s3_client = self.get_s3_client(region)
try:
paginator = s3_client.get_paginator('list_objects_v2')
for page in paginator.paginate(Bucket=bucket_name):
if 'Contents' in page:
for obj in page['Contents']:
try:
obj_info = s3_client.head_object(
Bucket=bucket_name,
Key=obj['Key']
)
storage_class = obj_info.get('StorageClass', 'STANDARD')
except:
storage_class = obj.get('StorageClass', 'UNKNOWN')
contents.append({
'path': obj['Key'],
'size': obj['Size'],
'storage_class': storage_class
})
except Exception as e:
logger.error(f"Error al listar contenido del bucket {bucket_name}: {str(e)}")
return contents
def generate_html_report(self):
"""Genera el informe HTML"""
buckets_info = []
buckets_contents = {}
# Listar todos los buckets
try:
response = self.default_client.list_buckets()
buckets = response['Buckets']
except Exception as e:
logger.error(f"Error al listar buckets: {str(e)}")
return
# Recopilar información de todos los buckets
total_buckets = len(buckets)
for i, bucket in enumerate(buckets, 1):
bucket_name = bucket['Name']
logger.info(f"Procesando bucket {i}/{total_buckets}: {bucket_name}")
# Obtener región del bucket
region = self.get_bucket_region(bucket_name)
logger.info(f"Región del bucket {bucket_name}: {region}")
# Obtener tamaño y coste
size, cost = self.get_bucket_size_and_cost(bucket_name, region)
buckets_info.append({
'name': bucket_name,
'region': region,
'size': size,
'cost': cost
})
# Obtener contenido del bucket
buckets_contents[bucket_name] = self.get_bucket_contents(bucket_name, region)
# Generar HTML
html = self._generate_html_content(buckets_info, buckets_contents)
# Guardar el informe
filename = f"s3-report-{self.account_id}.html"
with open(filename, 'w', encoding='utf-8') as f:
f.write(html)
logger.info(f"Informe generado: {filename}")
def _generate_html_content(self, buckets_info: List[Dict], buckets_contents: Dict) -> str:
"""Genera el contenido HTML del informe"""
html = f"""<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Informe S3 - Cuenta {self.account_id}</title>
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
<link href="https://cdn.jsdelivr.net/npm/[email protected]/font/bootstrap-icons.css" rel="stylesheet">
<style>
.bucket-section {{ margin-top: 2rem; }}
.table-responsive {{ margin-top: 1rem; }}
.storage-class {{
padding: 0.2em 0.6em;
border-radius: 0.25rem;
font-size: 0.875em;
}}
.storage-class-STANDARD {{ background-color: #e3f2fd; }}
.storage-class-GLACIER {{ background-color: #f3e5f5; }}
.storage-class-DEEP_ARCHIVE {{ background-color: #efebe9; }}
.storage-class-INTELLIGENT_TIERING {{ background-color: #e8f5e9; }}
</style>
</head>
<body class="container-fluid py-4">
<h1 class="mb-4">
<i class="bi bi-cloud"></i>
Informe S3 - Cuenta {self.account_id}
</h1>
<h2 class="mt-5">
<i class="bi bi-bucket"></i>
Listado de Buckets
</h2>
<div class="table-responsive">
<table class="table table-striped table-hover">
<thead class="table-dark">
<tr>
<th>Nombre del Bucket</th>
<th>Región</th>
<th>Tamaño Total</th>
<th>Coste Mensual Estimado</th>
</tr>
</thead>
<tbody>
"""
# Agregar información de buckets
for bucket in buckets_info:
html += f"""
<tr>
<td>{bucket['name']}</td>
<td>{bucket['region']}</td>
<td>{humanize.naturalsize(bucket['size'])}</td>
<td>${bucket['cost']:.2f} USD/mes</td>
</tr>"""
html += """
</tbody>
</table>
</div>
<h2 class="mt-5">
<i class="bi bi-folder"></i>
Contenido de los Buckets
</h2>
"""
# Agregar contenido de cada bucket
for bucket_name, contents in buckets_contents.items():
html += f"""
<div class="bucket-section">
<h3>
<i class="bi bi-folder2-open"></i>
Bucket: {bucket_name}
</h3>
<div class="table-responsive">
<table class="table table-striped table-hover">
<thead class="table-dark">
<tr>
<th>Ruta</th>
<th>Tamaño</th>
<th>Tipo de Almacenamiento</th>
</tr>
</thead>
<tbody>
"""
for item in contents:
storage_class = item['storage_class']
html += f"""
<tr>
<td>{item['path']}</td>
<td>{humanize.naturalsize(item['size'])}</td>
<td><span class="storage-class storage-class-{storage_class}">{storage_class}</span></td>
</tr>"""
html += """
</tbody>
</table>
</div>
</div>
"""
html += f"""
<footer class="mt-5 text-muted">
<hr>
<p>
<i class="bi bi-clock"></i>
Informe generado el {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
</p>
</footer>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
</body>
</html>"""
return html
def main():
try:
reporter = S3Reporter()
reporter.generate_html_report()
except Exception as e:
logger.error(f"Error: {str(e)}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment