Created
May 1, 2025 14:53
-
-
Save Tatarotus/a55e5f96e14c0484be0071472010622d to your computer and use it in GitHub Desktop.
script to organize my external HD with movies and series [need internet connection]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Dependencies: | |
| - Standard Library: os, re, shutil, logging, pathlib, time | |
| - External Packages (install via pip): | |
| - requests: For HTTP requests to TMDB API | |
| - python-dotenv: For loading .env file | |
| - ratelimit: For TMDB API rate limiting | |
| - tqdm: For progress bar | |
| Install external packages with: | |
| pip install requests python-dotenv ratelimit tqdm | |
| """ | |
| import os | |
| import re | |
| import shutil | |
| import requests | |
| import logging | |
| from pathlib import Path | |
| from time import sleep | |
| from dotenv import load_dotenv | |
| from ratelimit import limits, sleep_and_retry | |
| from tqdm import tqdm | |
| # Load environment variables | |
| load_dotenv() | |
| TMDB_API_KEY = os.getenv("TMDB_API_KEY") | |
| if not TMDB_API_KEY: | |
| raise ValueError("TMDB_API_KEY not found in environment variables") | |
| # Configurable settings from .env | |
| LANGUAGE = os.getenv("LANGUAGE", "pt-BR") | |
| SRC_DIR = Path(os.getenv("SRC_DIR", ".")) | |
| DEST_FILMES = Path(os.getenv("DEST_FILMES", "./FILMES")) | |
| DEST_SERIES = Path(os.getenv("DEST_SERIES", "./SERIES")) | |
| DEST_ANIMES = Path(os.getenv("DEST_ANIMES", "./ANIMES")) | |
| DEST_PENDENTE = Path(os.getenv("DEST_PENDENTE", "./PENDENTE")) | |
| GENRES_ANIME = set(os.getenv("GENRES_ANIME", "animação,animation,anime").lower().split(",")) | |
| VALID_EXTENSIONS = set(os.getenv("VALID_EXTENSIONS", ".mp4,.mkv,.avi,.mov,.wmv,.flv").lower().split(",")) | |
| # TMDB API settings | |
| BASE_URL = "https://api.themoviedb.org/3" | |
| CALLS = 40 # TMDB rate limit: 40 requests per 10 seconds | |
| PERIOD = 10 | |
| # Setup logging with file and console output | |
| logger = logging.getLogger() | |
| logger.setLevel(logging.INFO) | |
| formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") | |
| file_handler = logging.FileHandler("media_organizer.log") | |
| file_handler.setFormatter(formatter) | |
| console_handler = logging.StreamHandler() | |
| console_handler.setFormatter(formatter) | |
| logger.addHandler(file_handler) | |
| logger.addHandler(console_handler) | |
| # Create destination folders | |
| for path in [DEST_FILMES, DEST_SERIES, DEST_ANIMES, DEST_PENDENTE]: | |
| path.mkdir(parents=True, exist_ok=True) | |
| def limpar_nome(nome): | |
| """Clean filename to extract the core title for TMDB search.""" | |
| nome = Path(nome).stem | |
| # Step 1: Replace dots with spaces to preserve title structure | |
| nome = re.sub(r"\.", " ", nome) | |
| # Step 2: Remove metadata patterns | |
| patterns = [ | |
| r"\[.*?\]", # Fansub tags: [Erai-raws], [MultiSub][14D89703] | |
| r"(S\d{1,2}E\d{1,2}|\d{1,2}x\d{1,2}|-\s*\d{1,2}|Ep\d{1,2})", # Season/episode: S01E01, 1x01, - 02, Ep02 | |
| r"\b(19|20)\d{2}\b", # Years | |
| r"\d{3,4}p", # Resolution: 720p, 1080p | |
| r"(WEB[-.]?DL|WEBRip|HDRip|BluRay|x264|x265|HEVC|AVC|AAC|6CH|DUAL|MultiSub|CR)", # Encodings/sources | |
| r"WWW\.[A-Z0-9]+\.[A-Z]+", # Website tags: WWW.BLUDV.COM | |
| r"\b(ep|capítulo|ja|pt|br|us|en)\b", # Language/episode words | |
| ] | |
| for pattern in patterns: | |
| nome = re.sub(pattern, " ", nome, flags=re.IGNORECASE) | |
| # Step 3: Clean punctuation and stray numbers | |
| nome = re.sub(r"[\[\]\(\)\{\}_-]", " ", nome) | |
| nome = re.sub(r"\b\d{1,2}\b", "", nome) # Remove stray numbers | |
| nome = re.sub(r"\s+", " ", nome) # Collapse spaces | |
| return nome.strip() | |
| @sleep_and_retry | |
| @limits(calls=CALLS, period=PERIOD) | |
| def buscar_tmdb(titulo, tipo="tv"): | |
| """Search TMDB for series or movie.""" | |
| endpoint = f"{BASE_URL}/search/{tipo}" | |
| params = { | |
| "api_key": TMDB_API_KEY, | |
| "query": titulo, | |
| "language": LANGUAGE, | |
| } | |
| try: | |
| resp = requests.get(endpoint, params=params, timeout=10) | |
| resp.raise_for_status() | |
| dados = resp.json() | |
| if dados["results"]: | |
| return max(dados["results"], key=lambda x: x.get("popularity", 0)) | |
| return None | |
| except requests.exceptions.HTTPError as e: | |
| logger.error(f"HTTP error searching {tipo} '{titulo}': {e}") | |
| return None | |
| except requests.exceptions.ConnectionError as e: | |
| logger.error(f"Connection error searching {tipo} '{titulo}': {e}") | |
| return None | |
| except Exception as e: | |
| logger.error(f"Unexpected error searching {tipo} '{titulo}': {e}") | |
| return None | |
| def mover(arquivo: Path, destino_base: Path, subpasta_nome: str): | |
| """Move file to destination, handling naming conflicts.""" | |
| destino_final = destino_base / subpasta_nome.strip().replace(" ", "_") | |
| destino_final.mkdir(parents=True, exist_ok=True) | |
| novo_caminho = destino_final / arquivo.name | |
| counter = 1 | |
| while novo_caminho.exists(): | |
| novo_nome = f"{arquivo.stem}_{counter}{arquivo.suffix}" | |
| novo_caminho = destino_final / novo_nome | |
| counter += 1 | |
| logger.info(f"Moving {arquivo} to {novo_caminho}") | |
| print(f"📁 Movendo para: {novo_caminho}") | |
| shutil.move(str(arquivo), str(novo_caminho)) | |
| def is_anime(detalhes): | |
| """Check if media is an anime based on genres and origin.""" | |
| generos = [g["name"].lower() for g in detalhes.get("genres", [])] | |
| return any(g in GENRES_ANIME for g in generos) or ( | |
| detalhes.get("origin_country") == ["JP"] and "animation" in generos | |
| ) | |
| def main(): | |
| """Main function to process and organize media files.""" | |
| # Collect valid files | |
| arquivos = [ | |
| f for f in SRC_DIR.rglob("*") | |
| if f.is_file() and f.suffix.lower() in VALID_EXTENSIONS | |
| ] | |
| if not arquivos: | |
| logger.info("No valid media files found.") | |
| print("⚠️ Nenhum arquivo de mídia válido encontrado.") | |
| return | |
| # Process files with progress bar | |
| for arquivo in tqdm(arquivos, desc="Organizando arquivos"): | |
| nome_limpo = limpar_nome(arquivo.name) | |
| logger.info(f"Raw: {arquivo.name} → Cleaned: {nome_limpo}") | |
| print(f"🔍 Buscando: {nome_limpo}") | |
| # Try as series (most anime are series) | |
| resultado = buscar_tmdb(nome_limpo, tipo="tv") | |
| if resultado: | |
| nome_serie = resultado.get("name", "Serie_Desconhecida") | |
| id_serie = resultado.get("id") | |
| try: | |
| detalhes = requests.get( | |
| f"{BASE_URL}/tv/{id_serie}", | |
| params={"api_key": TMDB_API_KEY, "language": LANGUAGE}, | |
| timeout=10, | |
| ).json() | |
| if is_anime(detalhes): | |
| logger.info(f"Anime detected (series): {nome_serie}") | |
| print(f"🎌 Anime detectado (via série): {nome_serie}") | |
| mover(arquivo, DEST_ANIMES, nome_serie) | |
| else: | |
| logger.info(f"Series detected: {nome_serie}") | |
| print(f"📺 Série detectada: {nome_serie}") | |
| mover(arquivo, DEST_SERIES, nome_serie) | |
| continue | |
| except Exception as e: | |
| logger.error(f"Error fetching series details for {nome_serie}: {e}") | |
| print(f"❌ Erro ao obter detalhes da série: {e}") | |
| # Try as movie | |
| resultado = buscar_tmdb(nome_limpo, tipo="movie") | |
| if resultado: | |
| nome_filme = resultado.get("title", nome_limpo) | |
| id_filme = resultado.get("id") | |
| try: | |
| detalhes = requests.get( | |
| f"{BASE_URL}/movie/{id_filme}", | |
| params={"api_key": TMDB_API_KEY, "language": LANGUAGE}, | |
| timeout=10, | |
| ).json() | |
| if is_anime(detalhes): | |
| logger.info(f"Anime detected: {nome_filme}") | |
| print(f"🎌 Anime detectado: {nome_filme}") | |
| mover(arquivo, DEST_ANIMES, nome_filme) | |
| else: | |
| genero = detalhes["genres"][0]["name"] if detalhes["genres"] else "Outros" | |
| logger.info(f"Movie detected: {nome_filme} → {genero}") | |
| print(f"🎬 Filme detectado: {nome_filme} → {genero}") | |
| mover(arquivo, DEST_FILMES / genero.capitalize(), nome_filme) | |
| continue | |
| except Exception as e: | |
| logger.error(f"Error fetching movie details for {nome_filme}: {e}") | |
| print(f"❌ Erro ao obter detalhes do filme: {e}") | |
| # Fallback: Move to PENDENTE with debug info | |
| logger.warning(f"Failed to identify: {arquivo.name} (Cleaned: {nome_limpo})") | |
| print(f"⚠️ Movendo para PENDENTE: {arquivo.name}") | |
| mover(arquivo, DEST_PENDENTE, "Desconhecido") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment