Tatarotus · May 1, 2025 14:53
diff --git a/organizar_e_mover.py b/organizar_e_mover.py
 """
 Dependencies:
 - Standard Library: os, re, shutil, logging, pathlib, time
 - External Packages (install via pip):
  - requests: For HTTP requests to TMDB API
  - python-dotenv: For loading .env file
  - ratelimit: For TMDB API rate limiting
  - tqdm: For progress bar
 Install external packages with:
  pip install requests python-dotenv ratelimit tqdm
 """

 import os
 import re
 import shutil
 import requests
 import logging
 from pathlib import Path
 from time import sleep
 from dotenv import load_dotenv
 from ratelimit import limits, sleep_and_retry
 from tqdm import tqdm

 # Load environment variables
 load_dotenv()
 TMDB_API_KEY = os.getenv("TMDB_API_KEY")
 if not TMDB_API_KEY:
    raise ValueError("TMDB_API_KEY not found in environment variables")

 # Configurable settings from .env
 LANGUAGE = os.getenv("LANGUAGE", "pt-BR")
 SRC_DIR = Path(os.getenv("SRC_DIR", "."))
 DEST_FILMES = Path(os.getenv("DEST_FILMES", "./FILMES"))
 DEST_SERIES = Path(os.getenv("DEST_SERIES", "./SERIES"))
 DEST_ANIMES = Path(os.getenv("DEST_ANIMES", "./ANIMES"))
 DEST_PENDENTE = Path(os.getenv("DEST_PENDENTE", "./PENDENTE"))
 GENRES_ANIME = set(os.getenv("GENRES_ANIME", "animação,animation,anime").lower().split(","))
 VALID_EXTENSIONS = set(os.getenv("VALID_EXTENSIONS", ".mp4,.mkv,.avi,.mov,.wmv,.flv").lower().split(","))

 # TMDB API settings
 BASE_URL = "https://api.themoviedb.org/3"
 CALLS = 40  # TMDB rate limit: 40 requests per 10 seconds
 PERIOD = 10

 # Setup logging with file and console output
 logger = logging.getLogger()
 logger.setLevel(logging.INFO)
 formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
 file_handler = logging.FileHandler("media_organizer.log")
 file_handler.setFormatter(formatter)
 console_handler = logging.StreamHandler()
 console_handler.setFormatter(formatter)
 logger.addHandler(file_handler)
 logger.addHandler(console_handler)

 # Create destination folders
 for path in [DEST_FILMES, DEST_SERIES, DEST_ANIMES, DEST_PENDENTE]:
    path.mkdir(parents=True, exist_ok=True)

 def limpar_nome(nome):
    """Clean filename to extract the core title for TMDB search."""
    nome = Path(nome).stem
    # Step 1: Replace dots with spaces to preserve title structure
    nome = re.sub(r"\.", " ", nome)
    # Step 2: Remove metadata patterns
    patterns = [
        r"\[.*?\]",  # Fansub tags: [Erai-raws], [MultiSub][14D89703]
        r"(S\d{1,2}E\d{1,2}|\d{1,2}x\d{1,2}|-\s*\d{1,2}|Ep\d{1,2})",  # Season/episode: S01E01, 1x01, - 02, Ep02
        r"\b(19|20)\d{2}\b",  # Years
        r"\d{3,4}p",  # Resolution: 720p, 1080p
        r"(WEB[-.]?DL|WEBRip|HDRip|BluRay|x264|x265|HEVC|AVC|AAC|6CH|DUAL|MultiSub|CR)",  # Encodings/sources
        r"WWW\.[A-Z0-9]+\.[A-Z]+",  # Website tags: WWW.BLUDV.COM
        r"\b(ep|capítulo|ja|pt|br|us|en)\b",  # Language/episode words
    ]
    for pattern in patterns:
        nome = re.sub(pattern, " ", nome, flags=re.IGNORECASE)
    # Step 3: Clean punctuation and stray numbers
    nome = re.sub(r"[\[\]\(\)\{\}_-]", " ", nome)
    nome = re.sub(r"\b\d{1,2}\b", "", nome)  # Remove stray numbers
    nome = re.sub(r"\s+", " ", nome)  # Collapse spaces
    return nome.strip()

 @sleep_and_retry
 @limits(calls=CALLS, period=PERIOD)
 def buscar_tmdb(titulo, tipo="tv"):
    """Search TMDB for series or movie."""
    endpoint = f"{BASE_URL}/search/{tipo}"
    params = {
        "api_key": TMDB_API_KEY,
        "query": titulo,
        "language": LANGUAGE,
    }
    try:
        resp = requests.get(endpoint, params=params, timeout=10)
        resp.raise_for_status()
        dados = resp.json()
        if dados["results"]:
            return max(dados["results"], key=lambda x: x.get("popularity", 0))
        return None
    except requests.exceptions.HTTPError as e:
        logger.error(f"HTTP error searching {tipo} '{titulo}': {e}")
        return None
    except requests.exceptions.ConnectionError as e:
        logger.error(f"Connection error searching {tipo} '{titulo}': {e}")
        return None
    except Exception as e:
        logger.error(f"Unexpected error searching {tipo} '{titulo}': {e}")
        return None

 def mover(arquivo: Path, destino_base: Path, subpasta_nome: str):
    """Move file to destination, handling naming conflicts."""
    destino_final = destino_base / subpasta_nome.strip().replace(" ", "_")
    destino_final.mkdir(parents=True, exist_ok=True)
    novo_caminho = destino_final / arquivo.name
    counter = 1
    while novo_caminho.exists():
        novo_nome = f"{arquivo.stem}_{counter}{arquivo.suffix}"
        novo_caminho = destino_final / novo_nome
        counter += 1
    logger.info(f"Moving {arquivo} to {novo_caminho}")
    print(f"📁 Movendo para: {novo_caminho}")
    shutil.move(str(arquivo), str(novo_caminho))

 def is_anime(detalhes):
    """Check if media is an anime based on genres and origin."""
    generos = [g["name"].lower() for g in detalhes.get("genres", [])]
    return any(g in GENRES_ANIME for g in generos) or (
        detalhes.get("origin_country") == ["JP"] and "animation" in generos
    )

 def main():
    """Main function to process and organize media files."""
    # Collect valid files
    arquivos = [
        f for f in SRC_DIR.rglob("*")
        if f.is_file() and f.suffix.lower() in VALID_EXTENSIONS
    ]
    if not arquivos:
        logger.info("No valid media files found.")
        print("⚠️ Nenhum arquivo de mídia válido encontrado.")
        return

    # Process files with progress bar
    for arquivo in tqdm(arquivos, desc="Organizando arquivos"):
        nome_limpo = limpar_nome(arquivo.name)
        logger.info(f"Raw: {arquivo.name} → Cleaned: {nome_limpo}")
        print(f"🔍 Buscando: {nome_limpo}")

        # Try as series (most anime are series)
        resultado = buscar_tmdb(nome_limpo, tipo="tv")
        if resultado:
            nome_serie = resultado.get("name", "Serie_Desconhecida")
            id_serie = resultado.get("id")

            try:
                detalhes = requests.get(
                    f"{BASE_URL}/tv/{id_serie}",
                    params={"api_key": TMDB_API_KEY, "language": LANGUAGE},
                    timeout=10,
                ).json()

                if is_anime(detalhes):
                    logger.info(f"Anime detected (series): {nome_serie}")
                    print(f"🎌 Anime detectado (via série): {nome_serie}")
                    mover(arquivo, DEST_ANIMES, nome_serie)
                else:
                    logger.info(f"Series detected: {nome_serie}")
                    print(f"📺 Série detectada: {nome_serie}")
                    mover(arquivo, DEST_SERIES, nome_serie)
                continue
            except Exception as e:
                logger.error(f"Error fetching series details for {nome_serie}: {e}")
                print(f"❌ Erro ao obter detalhes da série: {e}")

        # Try as movie
        resultado = buscar_tmdb(nome_limpo, tipo="movie")
        if resultado:
            nome_filme = resultado.get("title", nome_limpo)
            id_filme = resultado.get("id")

            try:
                detalhes = requests.get(
                    f"{BASE_URL}/movie/{id_filme}",
                    params={"api_key": TMDB_API_KEY, "language": LANGUAGE},
                    timeout=10,
                ).json()

                if is_anime(detalhes):
                    logger.info(f"Anime detected: {nome_filme}")
                    print(f"🎌 Anime detectado: {nome_filme}")
                    mover(arquivo, DEST_ANIMES, nome_filme)
                else:
                    genero = detalhes["genres"][0]["name"] if detalhes["genres"] else "Outros"
                    logger.info(f"Movie detected: {nome_filme} → {genero}")
                    print(f"🎬 Filme detectado: {nome_filme} → {genero}")
                    mover(arquivo, DEST_FILMES / genero.capitalize(), nome_filme)
                continue
            except Exception as e:
                logger.error(f"Error fetching movie details for {nome_filme}: {e}")
                print(f"❌ Erro ao obter detalhes do filme: {e}")

        # Fallback: Move to PENDENTE with debug info
        logger.warning(f"Failed to identify: {arquivo.name} (Cleaned: {nome_limpo})")
        print(f"⚠️ Movendo para PENDENTE: {arquivo.name}")
        mover(arquivo, DEST_PENDENTE, "Desconhecido")

 if __name__ == "__main__":
    main()
	"""
	Dependencies:
	- Standard Library: os, re, shutil, logging, pathlib, time
	- External Packages (install via pip):
	- requests: For HTTP requests to TMDB API
	- python-dotenv: For loading .env file
	- ratelimit: For TMDB API rate limiting
	- tqdm: For progress bar
	Install external packages with:
	pip install requests python-dotenv ratelimit tqdm
	"""

	import os
	import re
	import shutil
	import requests
	import logging
	from pathlib import Path
	from time import sleep
	from dotenv import load_dotenv
	from ratelimit import limits, sleep_and_retry
	from tqdm import tqdm

	# Load environment variables
	load_dotenv()
	TMDB_API_KEY = os.getenv("TMDB_API_KEY")
	if not TMDB_API_KEY:
	raise ValueError("TMDB_API_KEY not found in environment variables")

	# Configurable settings from .env
	LANGUAGE = os.getenv("LANGUAGE", "pt-BR")
	SRC_DIR = Path(os.getenv("SRC_DIR", "."))
	DEST_FILMES = Path(os.getenv("DEST_FILMES", "./FILMES"))
	DEST_SERIES = Path(os.getenv("DEST_SERIES", "./SERIES"))
	DEST_ANIMES = Path(os.getenv("DEST_ANIMES", "./ANIMES"))
	DEST_PENDENTE = Path(os.getenv("DEST_PENDENTE", "./PENDENTE"))
	GENRES_ANIME = set(os.getenv("GENRES_ANIME", "animação,animation,anime").lower().split(","))
	VALID_EXTENSIONS = set(os.getenv("VALID_EXTENSIONS", ".mp4,.mkv,.avi,.mov,.wmv,.flv").lower().split(","))

	# TMDB API settings
	BASE_URL = "https://api.themoviedb.org/3"
	CALLS = 40 # TMDB rate limit: 40 requests per 10 seconds
	PERIOD = 10

	# Setup logging with file and console output
	logger = logging.getLogger()
	logger.setLevel(logging.INFO)
	formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
	file_handler = logging.FileHandler("media_organizer.log")
	file_handler.setFormatter(formatter)
	console_handler = logging.StreamHandler()
	console_handler.setFormatter(formatter)
	logger.addHandler(file_handler)
	logger.addHandler(console_handler)

	# Create destination folders
	for path in [DEST_FILMES, DEST_SERIES, DEST_ANIMES, DEST_PENDENTE]:
	path.mkdir(parents=True, exist_ok=True)

	def limpar_nome(nome):
	"""Clean filename to extract the core title for TMDB search."""
	nome = Path(nome).stem
	# Step 1: Replace dots with spaces to preserve title structure
	nome = re.sub(r"\.", " ", nome)
	# Step 2: Remove metadata patterns
	patterns = [
	r"\[.*?\]", # Fansub tags: [Erai-raws], [MultiSub][14D89703]
	r"(S\d{1,2}E\d{1,2}\|\d{1,2}x\d{1,2}\|-\s*\d{1,2}\|Ep\d{1,2})", # Season/episode: S01E01, 1x01, - 02, Ep02
	r"\b(19\|20)\d{2}\b", # Years
	r"\d{3,4}p", # Resolution: 720p, 1080p
	r"(WEB[-.]?DL\|WEBRip\|HDRip\|BluRay\|x264\|x265\|HEVC\|AVC\|AAC\|6CH\|DUAL\|MultiSub\|CR)", # Encodings/sources
	r"WWW\.[A-Z0-9]+\.[A-Z]+", # Website tags: WWW.BLUDV.COM
	r"\b(ep\|capítulo\|ja\|pt\|br\|us\|en)\b", # Language/episode words
	]
	for pattern in patterns:
	nome = re.sub(pattern, " ", nome, flags=re.IGNORECASE)
	# Step 3: Clean punctuation and stray numbers
	nome = re.sub(r"[\[\]\(\)\{\}_-]", " ", nome)
	nome = re.sub(r"\b\d{1,2}\b", "", nome) # Remove stray numbers
	nome = re.sub(r"\s+", " ", nome) # Collapse spaces
	return nome.strip()

	@sleep_and_retry
	@limits(calls=CALLS, period=PERIOD)
	def buscar_tmdb(titulo, tipo="tv"):
	"""Search TMDB for series or movie."""
	endpoint = f"{BASE_URL}/search/{tipo}"
	params = {
	"api_key": TMDB_API_KEY,
	"query": titulo,
	"language": LANGUAGE,
	}
	try:
	resp = requests.get(endpoint, params=params, timeout=10)
	resp.raise_for_status()
	dados = resp.json()
	if dados["results"]:
	return max(dados["results"], key=lambda x: x.get("popularity", 0))
	return None
	except requests.exceptions.HTTPError as e:
	logger.error(f"HTTP error searching {tipo} '{titulo}': {e}")
	return None
	except requests.exceptions.ConnectionError as e:
	logger.error(f"Connection error searching {tipo} '{titulo}': {e}")
	return None
	except Exception as e:
	logger.error(f"Unexpected error searching {tipo} '{titulo}': {e}")
	return None

	def mover(arquivo: Path, destino_base: Path, subpasta_nome: str):
	"""Move file to destination, handling naming conflicts."""
	destino_final = destino_base / subpasta_nome.strip().replace(" ", "_")
	destino_final.mkdir(parents=True, exist_ok=True)
	novo_caminho = destino_final / arquivo.name
	counter = 1
	while novo_caminho.exists():
	novo_nome = f"{arquivo.stem}_{counter}{arquivo.suffix}"
	novo_caminho = destino_final / novo_nome
	counter += 1
	logger.info(f"Moving {arquivo} to {novo_caminho}")
	print(f"📁 Movendo para: {novo_caminho}")
	shutil.move(str(arquivo), str(novo_caminho))

	def is_anime(detalhes):
	"""Check if media is an anime based on genres and origin."""
	generos = [g["name"].lower() for g in detalhes.get("genres", [])]
	return any(g in GENRES_ANIME for g in generos) or (
	detalhes.get("origin_country") == ["JP"] and "animation" in generos
	)

	def main():
	"""Main function to process and organize media files."""
	# Collect valid files
	arquivos = [
	f for f in SRC_DIR.rglob("*")
	if f.is_file() and f.suffix.lower() in VALID_EXTENSIONS
	]
	if not arquivos:
	logger.info("No valid media files found.")
	print("⚠️ Nenhum arquivo de mídia válido encontrado.")
	return

	# Process files with progress bar
	for arquivo in tqdm(arquivos, desc="Organizando arquivos"):
	nome_limpo = limpar_nome(arquivo.name)
	logger.info(f"Raw: {arquivo.name} → Cleaned: {nome_limpo}")
	print(f"🔍 Buscando: {nome_limpo}")

	# Try as series (most anime are series)
	resultado = buscar_tmdb(nome_limpo, tipo="tv")
	if resultado:
	nome_serie = resultado.get("name", "Serie_Desconhecida")
	id_serie = resultado.get("id")

	try:
	detalhes = requests.get(
	f"{BASE_URL}/tv/{id_serie}",
	params={"api_key": TMDB_API_KEY, "language": LANGUAGE},
	timeout=10,
	).json()

	if is_anime(detalhes):
	logger.info(f"Anime detected (series): {nome_serie}")
	print(f"🎌 Anime detectado (via série): {nome_serie}")
	mover(arquivo, DEST_ANIMES, nome_serie)
	else:
	logger.info(f"Series detected: {nome_serie}")
	print(f"📺 Série detectada: {nome_serie}")
	mover(arquivo, DEST_SERIES, nome_serie)
	continue
	except Exception as e:
	logger.error(f"Error fetching series details for {nome_serie}: {e}")
	print(f"❌ Erro ao obter detalhes da série: {e}")

	# Try as movie
	resultado = buscar_tmdb(nome_limpo, tipo="movie")
	if resultado:
	nome_filme = resultado.get("title", nome_limpo)
	id_filme = resultado.get("id")

	try:
	detalhes = requests.get(
	f"{BASE_URL}/movie/{id_filme}",
	params={"api_key": TMDB_API_KEY, "language": LANGUAGE},
	timeout=10,
	).json()

	if is_anime(detalhes):
	logger.info(f"Anime detected: {nome_filme}")
	print(f"🎌 Anime detectado: {nome_filme}")
	mover(arquivo, DEST_ANIMES, nome_filme)
	else:
	genero = detalhes["genres"][0]["name"] if detalhes["genres"] else "Outros"
	logger.info(f"Movie detected: {nome_filme} → {genero}")
	print(f"🎬 Filme detectado: {nome_filme} → {genero}")
	mover(arquivo, DEST_FILMES / genero.capitalize(), nome_filme)
	continue
	except Exception as e:
	logger.error(f"Error fetching movie details for {nome_filme}: {e}")
	print(f"❌ Erro ao obter detalhes do filme: {e}")

	# Fallback: Move to PENDENTE with debug info
	logger.warning(f"Failed to identify: {arquivo.name} (Cleaned: {nome_limpo})")
	print(f"⚠️ Movendo para PENDENTE: {arquivo.name}")
	mover(arquivo, DEST_PENDENTE, "Desconhecido")

	if __name__ == "__main__":
	main()
No results found