Created
November 23, 2024 23:36
-
-
Save linkviii/af595c71f25883618efe6d1aea4e95e1 to your computer and use it in GitHub Desktop.
sort anidb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# %% | |
from bs4 import BeautifulSoup | |
from pathlib import Path | |
import operator | |
import itertools | |
import sys | |
import os | |
# %% | |
# Ensure that `>` redirection works on windows | |
# export PYTHONIOENCODING=utf-8 | |
# os.environ["PYTHONIOENCODING"] = "utf-8" | |
# %% | |
# Used ctrl s from the browser -- not even scraping | |
# fname = "./Matsui Youhei - Person (30445) - AniDB.htm" | |
# fname = "Ishikawa Tomohisa - Person (9097) - AniDB.htm" | |
# fname = "Fujimura Tooru - Person (42411) - AniDB.htm" | |
# fname = "ZAQ - Person (32347) - AniDB.htm" | |
fname = "Tom-H@ck - Person (2431) - AniDB.htm" | |
soup = BeautifulSoup(Path(fname).read_text(encoding="UTF-8"), "html.parser") | |
# %% | |
library = {} | |
# %% | |
library = {} | |
credit_kinds = set() | |
production_pane = soup.find("div", class_="music_production") | |
songs = production_pane.find("tbody").find_all("tr") | |
# songs = list(songs.children) | |
# len(list(songs.children)) | |
for song in songs: | |
tmp = song.find(class_="song") | |
if tmp is None: | |
continue | |
name_tag = tmp.find("a") | |
song_name = name_tag.text.strip() | |
song_link = name_tag["href"] | |
credit = song.find(class_="credit").text.strip() | |
if song_name not in library: | |
library[song_name] = {"song": song_name, "link": song_link} | |
library[song_name][credit] = True | |
credit_kinds.add(credit) | |
# %% | |
anime_pane = soup.find("div", class_="anime_songs") | |
if anime_pane is None: | |
anime_list = [] | |
else: | |
anime_list = anime_pane.find("tbody").find_all("tr") | |
anime_name = None | |
for anime in anime_list: | |
song_name = anime.find(class_="name song").text.strip() | |
try_name = anime.find(class_="name anime") | |
if try_name: | |
anime_name = try_name.text.strip() | |
library[song_name]["anime"] = anime_name | |
# %% | |
# If a song were in the anime pane but not the production pane | |
missing_credit = [] | |
not_anime = [] | |
for song, attr in library.items(): | |
if "anime" not in attr: | |
not_anime.append((song, attr)) | |
if not any([key in credit_kinds for key in attr.keys()]): | |
missing_credit.append((song, attr)) | |
# %% | |
library_by_credit = {credit: {} for credit in credit_kinds} | |
for song, attr in library.items(): | |
for credit in credit_kinds: | |
if credit in attr: | |
library_by_credit[credit][song] = attr | |
# %% | |
library_by_credit_by_anime = {credit: {} for credit in credit_kinds} | |
for kind in credit_kinds: | |
collection = library_by_credit[kind] | |
anime_bag = library_by_credit_by_anime[kind] | |
for attr in collection.values(): | |
anime_name = attr.get("anime") | |
if anime_name not in anime_bag: | |
anime_bag[anime_name] = [] | |
anime_bag[anime_name].append(attr) | |
# %% | |
max_name_len = max((len(it) for it in library.keys())) | |
# %% | |
print(f"{fname}") | |
print("="*80) | |
for kind in credit_kinds: | |
this_credit_by_anime = library_by_credit_by_anime[kind] | |
print(kind) | |
print("=" * len(kind)) | |
if None in this_credit_by_anime: | |
print("Not for anime:") | |
song_list = this_credit_by_anime[None] | |
for attr in song_list: | |
print(f"\t{attr['song']:{max_name_len}} {attr['link']}") | |
print() | |
names = sorted(filter(None, this_credit_by_anime.keys())) | |
for anime_name in names: | |
song_list = this_credit_by_anime[anime_name] | |
print(f"{anime_name}:") | |
for attr in song_list: | |
print(f"\t{attr['song']:{max_name_len}} {attr['link']}") | |
print() | |
print() | |
# %% | |
# getter = lambda key: lambda obj: obj[key] if key in obj else None | |
# list(itertools.groupby(library.values(),key=getter("anime"))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment