-
-
Save fxthomas/fd85e906e41f4e6e06f38e92a497005b to your computer and use it in GitHub Desktop.
#!/usr/bin/python | |
# coding=utf-8 | |
"""Python Script for bootstrapping a MusicBrainz release using a VGMDB album. | |
This script uses the unofficial VGMDB.info JSON API to prefill the MusicBrainz | |
"Add Release" form with data from VGMDB. | |
It is only meant as a first step to make adding a new MB release easier; please | |
check for missing/erroneous data and make sure the imported release follows the | |
MusicBrainz guidelines! | |
Because VGMDB has a lot of Japanese content, we import track/release titles from | |
this language before trying other languages, and try to guess values for some | |
fields (e.g. "Soundtrack" album types). | |
Documentation about the field format is found at: | |
https://musicbrainz.org/doc/Development/Release_Editor_Seeding | |
""" | |
import re | |
import sys | |
import json | |
import html | |
import argparse | |
import webbrowser | |
from urllib.request import urlopen | |
from tempfile import NamedTemporaryFile | |
from datetime import datetime | |
def strptimes(s, fmts): | |
for fmt in fmts: | |
try: | |
return datetime.strptime(s, fmt) | |
except ValueError: | |
continue | |
return None | |
def vgmdb_get_album_url(album_id, format_="json"): | |
"""Return the VGMDB API URL for the given album ID""" | |
return "https://vgmdb.info/album/%d?format=%s" % (album_id, format_) | |
def vgmdb_get_album_data(album_url): | |
"""Retrieve data for a VGMDB album""" | |
return json.load(urlopen(album_url)) | |
def write_musicbrainz_html_form(fd, album_data): | |
"""Write a local MusicBrainz import form containing album data""" | |
fd.write("""<!doctype html>""") | |
fd.write("""<meta charset="UTF-8">""") | |
fd.write("""<title>Add VGMDB album As Release...</title>""") | |
fd.write("""<form action="https://musicbrainz.org/release/add" method="post">""") | |
album_title = album_data['names'].get('ja') | |
album_title = album_title or next(iter(album_data["names"].values())) | |
fd.write(f"""<input type="hidden" name="name" value="{html.escape(album_title)}">""") | |
fd.write(f"""<input type="hidden" name="status" value="official">""") | |
if "soundtrack" in album_data["classification"].lower(): | |
fd.write(f"""<input type="hidden" name="type" value="album">""") | |
fd.write(f"""<input type="hidden" name="type" value="soundtrack">""") | |
if "ja" in album_data['names'].keys(): | |
fd.write(f"""<input type="hidden" name="language" value="jpn">""") | |
fd.write(f"""<input type="hidden" name="script" value="Jpan">""") | |
all_artists = [] | |
composers = [] | |
arrangers = [] | |
performers = [] | |
lyricists = [] | |
for composer_data in album_data["composers"]: | |
composer_name = composer_data["names"].get("ja") | |
composer_name = composer_name or next(iter(composer_data["names"].values())) | |
if composer_name not in all_artists: | |
all_artists.append(composer_name) | |
if composer_name not in composers: | |
composers.append(composer_name) | |
for arranger_data in album_data["arrangers"]: | |
arranger_name = arranger_data["names"].get("ja") | |
arranger_name = arranger_name or next(iter(arranger_data["names"].values())) | |
if arranger_name not in all_artists: | |
all_artists.append(arranger_name) | |
if arranger_name not in arrangers: | |
arrangers.append(arranger_name) | |
for performer_data in album_data["performers"]: | |
performer_name = performer_data["names"].get("ja") | |
performer_name = performer_name or next(iter(performer_data["names"].values())) | |
if performer_name not in all_artists: | |
all_artists.append(performer_name) | |
if performer_name not in performers: | |
performers.append(performer_name) | |
for lyricist_data in album_data["lyricists"]: | |
lyricist_name = lyricist_data["names"].get("ja") | |
lyricist_name = lyricist_name or next(iter(lyricist_data["names"].values())) | |
if lyricist_name not in all_artists: | |
all_artists.append(lyricist_name) | |
if lyricist_name not in lyricists: | |
lyricists.append(lyricist_name) | |
artists = ["Various Artists"] if len(performers) >= 3 else all_artists | |
track_artists = performers or arrangers or composers or lyricists or all_artists | |
join_phrase = ", " | |
for artist_ix, artist_name in enumerate(artists): | |
fd.write(f"""<input type="hidden" name="artist_credit.names.{artist_ix}.artist.name" value="{html.escape(artist_name)}">""") | |
if artist_ix < len(artists)-1: | |
fd.write(f"""<input type="hidden" name="artist_credit.names.{artist_ix}.join_phrase" value="{html.escape(join_phrase)}">""") | |
release_date = strptimes(album_data['release_date'], ["%Y-%m-%d", "%Y"]) | |
if release_date: | |
fd.write(f"""<input type="hidden" name="events.0.date.year" value="{release_date.year}">""") | |
fd.write(f"""<input type="hidden" name="events.0.date.month" value="{release_date.month}">""") | |
fd.write(f"""<input type="hidden" name="events.0.date.day" value="{release_date.day}">""") | |
if "ja" in album_data['names'].keys(): | |
fd.write(f"""<input type="hidden" name="events.0.country" value="JP">""") | |
catalog_nr = album_data['catalog'] | |
fd.write(f"""<input type="hidden" name="labels.0.catalog_number" value="{html.escape(catalog_nr)}">""") | |
vgmdb_link = album_data['vgmdb_link'] | |
fd.write(f"""<input type="hidden" name="urls.0.url" value="{html.escape(vgmdb_link)}">""") | |
fd.write(f"""<input type="hidden" name="urls.0.link_type" value="86">""") # VGMDB | |
fd.write(f"""<input type="hidden" name="edit_note" value="Imported from {html.escape(vgmdb_link)}">""") | |
for disc_ix, disc_data in enumerate(album_data["discs"]): | |
if album_data["media_format"] == "CD": | |
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.format" value="CD">""") | |
for track_ix, track_data in enumerate(disc_data["tracks"]): | |
track_title = track_data["names"].get("Japanese") | |
track_title = track_title or next(iter(track_data["names"].values())) | |
if track_data["track_length"] and track_data["track_length"].lower() != "unknown": | |
track_length = datetime.strptime(track_data["track_length"], "%M:%S") | |
track_length = 1000 * (track_length.minute*60 + track_length.second) | |
else: | |
track_length = 0 | |
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.name" value="{html.escape(track_title)}">""") | |
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.length" value="{track_length}">""") | |
for artist_ix, artist_name in enumerate(track_artists): | |
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.artist_credit.names.{artist_ix}.mbid" value="">""") | |
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.artist_credit.names.{artist_ix}.name" value="{html.escape(artist_name)}">""") | |
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.artist_credit.names.{artist_ix}.artist.name" value="{html.escape(artist_name)}">""") | |
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.artist_credit.names.{artist_ix}.join_phrase" value=", ">""") | |
fd.write("""<input type="submit" value="Add Cluster As Release...">""") | |
fd.write("""</form>""") | |
fd.write("""<script>document.forms[0].submit()</script>""") | |
# Parse arguments | |
parser = argparse.ArgumentParser(description=__doc__) | |
parser.add_argument("album_id_url", help="VGMDB album id or URL", type=str) | |
parser.add_argument("--show-api-page", "-s", help="Show API page instead of the MB form", action="store_true") | |
args = parser.parse_args() | |
# Parse album ID | |
album_id = None | |
m = re.match(r"(https?://)?vgmdb.net/album/(?P<album_id>\d+).*", args.album_id_url) | |
if m: | |
album_id = int(m.group("album_id")) | |
elif args.album_id_url.isdigit(): | |
album_id = int(args.album_id_url) | |
else: | |
print("Invalid album ID or URL") | |
sys.exit(1) | |
# Retrieve album URL | |
if args.show_api_page: | |
album_url = vgmdb_get_album_url(album_id, format_="html") | |
print("Opening %s" % album_url) | |
webbrowser.open(album_url) | |
sys.exit(0) | |
# Retrieve album data, write and open MusicBrainz form | |
album_url = vgmdb_get_album_url(album_id) | |
album_data = vgmdb_get_album_data(album_url) | |
with NamedTemporaryFile(suffix=".html", encoding="utf-8", mode="w+", delete=False) as fd: | |
write_musicbrainz_html_form(fd, album_data) | |
print("Opening %s" % fd.name) | |
webbrowser.open(fd.name) |
@mitsufune You need to open a system command-line window, not a Python shell. I believe on Windows 10 you can use something called "Powershell", and the command-line should look like this by default?
PS> python vgmdb2mb.py https://vgmdb.net/album/... <you can press ENTER after this>
Never tested it on Windows though, let me know if something's not compatible!
The script breaks if the VGMDB page only has the year, just FYI, since it expects the full date format.
https://vgmdb.net/album/20652
Thanks for the script though, it's been very useful. The other MB VGMDB script doesn't work half the time.
@Tenome Thanks, never got this kind of album before. Updated so it works with year-only dates!
(That's a totally obscure release by the way, I was curious but did not find it anywhere online!)
@fxthomas Might need to be updated again? I tried this URL and it gave me an internal server error, but that might just be a problem on VGMDB's end. The other VGMDB userscript also doesn't seem to work anymore, so it could be that VGMDB updated (again). Here's the album I tried: https://vgmdb.net/album/105445
Traceback (most recent call last):
\Scripts\vgmdb2mb.py", line 183, in
album_data = vgmdb_get_album_data(album_url)
\Scripts\vgmdb2mb.py", line 48, in vgmdb_get_album_data
return json.load(urlopen(album_url))
\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
\lib\urllib\request.py", line 531, in open
response = meth(req, response)
\lib\urllib\request.py", line 640, in http_response
response = self.parent.error(
\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
\lib\urllib\request.py", line 502, in _call_chain
result = func(*args)
\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 500: Internal Server Error
The API endpoint this script uses is hosted at http://vgmdb.info which is separate from the VGMDB website. It's sometimes offline, but usually gets back up after a while.
I'm new to python and I need help on entering the arguments needed for this to work
