Skip to content

Instantly share code, notes, and snippets.

@linkviii
Created November 23, 2024 23:36
Show Gist options
  • Save linkviii/af595c71f25883618efe6d1aea4e95e1 to your computer and use it in GitHub Desktop.
Save linkviii/af595c71f25883618efe6d1aea4e95e1 to your computer and use it in GitHub Desktop.
sort anidb
#
# %%
from bs4 import BeautifulSoup
from pathlib import Path
import operator
import itertools
import sys
import os
# %%
# Ensure that `>` redirection works on windows
# export PYTHONIOENCODING=utf-8
# os.environ["PYTHONIOENCODING"] = "utf-8"
# %%
# Used ctrl s from the browser -- not even scraping
# fname = "./Matsui Youhei - Person (30445) - AniDB.htm"
# fname = "Ishikawa Tomohisa - Person (9097) - AniDB.htm"
# fname = "Fujimura Tooru - Person (42411) - AniDB.htm"
# fname = "ZAQ - Person (32347) - AniDB.htm"
fname = "Tom-H@ck - Person (2431) - AniDB.htm"
soup = BeautifulSoup(Path(fname).read_text(encoding="UTF-8"), "html.parser")
# %%
library = {}
# %%
library = {}
credit_kinds = set()
production_pane = soup.find("div", class_="music_production")
songs = production_pane.find("tbody").find_all("tr")
# songs = list(songs.children)
# len(list(songs.children))
for song in songs:
tmp = song.find(class_="song")
if tmp is None:
continue
name_tag = tmp.find("a")
song_name = name_tag.text.strip()
song_link = name_tag["href"]
credit = song.find(class_="credit").text.strip()
if song_name not in library:
library[song_name] = {"song": song_name, "link": song_link}
library[song_name][credit] = True
credit_kinds.add(credit)
# %%
anime_pane = soup.find("div", class_="anime_songs")
if anime_pane is None:
anime_list = []
else:
anime_list = anime_pane.find("tbody").find_all("tr")
anime_name = None
for anime in anime_list:
song_name = anime.find(class_="name song").text.strip()
try_name = anime.find(class_="name anime")
if try_name:
anime_name = try_name.text.strip()
library[song_name]["anime"] = anime_name
# %%
# If a song were in the anime pane but not the production pane
missing_credit = []
not_anime = []
for song, attr in library.items():
if "anime" not in attr:
not_anime.append((song, attr))
if not any([key in credit_kinds for key in attr.keys()]):
missing_credit.append((song, attr))
# %%
library_by_credit = {credit: {} for credit in credit_kinds}
for song, attr in library.items():
for credit in credit_kinds:
if credit in attr:
library_by_credit[credit][song] = attr
# %%
library_by_credit_by_anime = {credit: {} for credit in credit_kinds}
for kind in credit_kinds:
collection = library_by_credit[kind]
anime_bag = library_by_credit_by_anime[kind]
for attr in collection.values():
anime_name = attr.get("anime")
if anime_name not in anime_bag:
anime_bag[anime_name] = []
anime_bag[anime_name].append(attr)
# %%
max_name_len = max((len(it) for it in library.keys()))
# %%
print(f"{fname}")
print("="*80)
for kind in credit_kinds:
this_credit_by_anime = library_by_credit_by_anime[kind]
print(kind)
print("=" * len(kind))
if None in this_credit_by_anime:
print("Not for anime:")
song_list = this_credit_by_anime[None]
for attr in song_list:
print(f"\t{attr['song']:{max_name_len}} {attr['link']}")
print()
names = sorted(filter(None, this_credit_by_anime.keys()))
for anime_name in names:
song_list = this_credit_by_anime[anime_name]
print(f"{anime_name}:")
for attr in song_list:
print(f"\t{attr['song']:{max_name_len}} {attr['link']}")
print()
print()
# %%
# getter = lambda key: lambda obj: obj[key] if key in obj else None
# list(itertools.groupby(library.values(),key=getter("anime")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment