Skip to content

Instantly share code, notes, and snippets.

@Tatarotus
Created May 1, 2025 14:53
Show Gist options
  • Select an option

  • Save Tatarotus/a55e5f96e14c0484be0071472010622d to your computer and use it in GitHub Desktop.

Select an option

Save Tatarotus/a55e5f96e14c0484be0071472010622d to your computer and use it in GitHub Desktop.
script to organize my external HD with movies and series [need internet connection]
"""
Dependencies:
- Standard Library: os, re, shutil, logging, pathlib, time
- External Packages (install via pip):
- requests: For HTTP requests to TMDB API
- python-dotenv: For loading .env file
- ratelimit: For TMDB API rate limiting
- tqdm: For progress bar
Install external packages with:
pip install requests python-dotenv ratelimit tqdm
"""
import os
import re
import shutil
import requests
import logging
from pathlib import Path
from time import sleep
from dotenv import load_dotenv
from ratelimit import limits, sleep_and_retry
from tqdm import tqdm
# Load environment variables
load_dotenv()
TMDB_API_KEY = os.getenv("TMDB_API_KEY")
if not TMDB_API_KEY:
raise ValueError("TMDB_API_KEY not found in environment variables")
# Configurable settings from .env
LANGUAGE = os.getenv("LANGUAGE", "pt-BR")
SRC_DIR = Path(os.getenv("SRC_DIR", "."))
DEST_FILMES = Path(os.getenv("DEST_FILMES", "./FILMES"))
DEST_SERIES = Path(os.getenv("DEST_SERIES", "./SERIES"))
DEST_ANIMES = Path(os.getenv("DEST_ANIMES", "./ANIMES"))
DEST_PENDENTE = Path(os.getenv("DEST_PENDENTE", "./PENDENTE"))
GENRES_ANIME = set(os.getenv("GENRES_ANIME", "animação,animation,anime").lower().split(","))
VALID_EXTENSIONS = set(os.getenv("VALID_EXTENSIONS", ".mp4,.mkv,.avi,.mov,.wmv,.flv").lower().split(","))
# TMDB API settings
BASE_URL = "https://api.themoviedb.org/3"
CALLS = 40 # TMDB rate limit: 40 requests per 10 seconds
PERIOD = 10
# Setup logging with file and console output
logger = logging.getLogger()
logger.setLevel(logging.INFO)
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
file_handler = logging.FileHandler("media_organizer.log")
file_handler.setFormatter(formatter)
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
logger.addHandler(file_handler)
logger.addHandler(console_handler)
# Create destination folders
for path in [DEST_FILMES, DEST_SERIES, DEST_ANIMES, DEST_PENDENTE]:
path.mkdir(parents=True, exist_ok=True)
def limpar_nome(nome):
"""Clean filename to extract the core title for TMDB search."""
nome = Path(nome).stem
# Step 1: Replace dots with spaces to preserve title structure
nome = re.sub(r"\.", " ", nome)
# Step 2: Remove metadata patterns
patterns = [
r"\[.*?\]", # Fansub tags: [Erai-raws], [MultiSub][14D89703]
r"(S\d{1,2}E\d{1,2}|\d{1,2}x\d{1,2}|-\s*\d{1,2}|Ep\d{1,2})", # Season/episode: S01E01, 1x01, - 02, Ep02
r"\b(19|20)\d{2}\b", # Years
r"\d{3,4}p", # Resolution: 720p, 1080p
r"(WEB[-.]?DL|WEBRip|HDRip|BluRay|x264|x265|HEVC|AVC|AAC|6CH|DUAL|MultiSub|CR)", # Encodings/sources
r"WWW\.[A-Z0-9]+\.[A-Z]+", # Website tags: WWW.BLUDV.COM
r"\b(ep|capítulo|ja|pt|br|us|en)\b", # Language/episode words
]
for pattern in patterns:
nome = re.sub(pattern, " ", nome, flags=re.IGNORECASE)
# Step 3: Clean punctuation and stray numbers
nome = re.sub(r"[\[\]\(\)\{\}_-]", " ", nome)
nome = re.sub(r"\b\d{1,2}\b", "", nome) # Remove stray numbers
nome = re.sub(r"\s+", " ", nome) # Collapse spaces
return nome.strip()
@sleep_and_retry
@limits(calls=CALLS, period=PERIOD)
def buscar_tmdb(titulo, tipo="tv"):
"""Search TMDB for series or movie."""
endpoint = f"{BASE_URL}/search/{tipo}"
params = {
"api_key": TMDB_API_KEY,
"query": titulo,
"language": LANGUAGE,
}
try:
resp = requests.get(endpoint, params=params, timeout=10)
resp.raise_for_status()
dados = resp.json()
if dados["results"]:
return max(dados["results"], key=lambda x: x.get("popularity", 0))
return None
except requests.exceptions.HTTPError as e:
logger.error(f"HTTP error searching {tipo} '{titulo}': {e}")
return None
except requests.exceptions.ConnectionError as e:
logger.error(f"Connection error searching {tipo} '{titulo}': {e}")
return None
except Exception as e:
logger.error(f"Unexpected error searching {tipo} '{titulo}': {e}")
return None
def mover(arquivo: Path, destino_base: Path, subpasta_nome: str):
"""Move file to destination, handling naming conflicts."""
destino_final = destino_base / subpasta_nome.strip().replace(" ", "_")
destino_final.mkdir(parents=True, exist_ok=True)
novo_caminho = destino_final / arquivo.name
counter = 1
while novo_caminho.exists():
novo_nome = f"{arquivo.stem}_{counter}{arquivo.suffix}"
novo_caminho = destino_final / novo_nome
counter += 1
logger.info(f"Moving {arquivo} to {novo_caminho}")
print(f"📁 Movendo para: {novo_caminho}")
shutil.move(str(arquivo), str(novo_caminho))
def is_anime(detalhes):
"""Check if media is an anime based on genres and origin."""
generos = [g["name"].lower() for g in detalhes.get("genres", [])]
return any(g in GENRES_ANIME for g in generos) or (
detalhes.get("origin_country") == ["JP"] and "animation" in generos
)
def main():
"""Main function to process and organize media files."""
# Collect valid files
arquivos = [
f for f in SRC_DIR.rglob("*")
if f.is_file() and f.suffix.lower() in VALID_EXTENSIONS
]
if not arquivos:
logger.info("No valid media files found.")
print("⚠️ Nenhum arquivo de mídia válido encontrado.")
return
# Process files with progress bar
for arquivo in tqdm(arquivos, desc="Organizando arquivos"):
nome_limpo = limpar_nome(arquivo.name)
logger.info(f"Raw: {arquivo.name} → Cleaned: {nome_limpo}")
print(f"🔍 Buscando: {nome_limpo}")
# Try as series (most anime are series)
resultado = buscar_tmdb(nome_limpo, tipo="tv")
if resultado:
nome_serie = resultado.get("name", "Serie_Desconhecida")
id_serie = resultado.get("id")
try:
detalhes = requests.get(
f"{BASE_URL}/tv/{id_serie}",
params={"api_key": TMDB_API_KEY, "language": LANGUAGE},
timeout=10,
).json()
if is_anime(detalhes):
logger.info(f"Anime detected (series): {nome_serie}")
print(f"🎌 Anime detectado (via série): {nome_serie}")
mover(arquivo, DEST_ANIMES, nome_serie)
else:
logger.info(f"Series detected: {nome_serie}")
print(f"📺 Série detectada: {nome_serie}")
mover(arquivo, DEST_SERIES, nome_serie)
continue
except Exception as e:
logger.error(f"Error fetching series details for {nome_serie}: {e}")
print(f"❌ Erro ao obter detalhes da série: {e}")
# Try as movie
resultado = buscar_tmdb(nome_limpo, tipo="movie")
if resultado:
nome_filme = resultado.get("title", nome_limpo)
id_filme = resultado.get("id")
try:
detalhes = requests.get(
f"{BASE_URL}/movie/{id_filme}",
params={"api_key": TMDB_API_KEY, "language": LANGUAGE},
timeout=10,
).json()
if is_anime(detalhes):
logger.info(f"Anime detected: {nome_filme}")
print(f"🎌 Anime detectado: {nome_filme}")
mover(arquivo, DEST_ANIMES, nome_filme)
else:
genero = detalhes["genres"][0]["name"] if detalhes["genres"] else "Outros"
logger.info(f"Movie detected: {nome_filme} → {genero}")
print(f"🎬 Filme detectado: {nome_filme} → {genero}")
mover(arquivo, DEST_FILMES / genero.capitalize(), nome_filme)
continue
except Exception as e:
logger.error(f"Error fetching movie details for {nome_filme}: {e}")
print(f"❌ Erro ao obter detalhes do filme: {e}")
# Fallback: Move to PENDENTE with debug info
logger.warning(f"Failed to identify: {arquivo.name} (Cleaned: {nome_limpo})")
print(f"⚠️ Movendo para PENDENTE: {arquivo.name}")
mover(arquivo, DEST_PENDENTE, "Desconhecido")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment