Skip to content

Instantly share code, notes, and snippets.

@fxthomas
Last active July 30, 2025 17:05
Show Gist options
  • Select an option

  • Save fxthomas/fd85e906e41f4e6e06f38e92a497005b to your computer and use it in GitHub Desktop.

Select an option

Save fxthomas/fd85e906e41f4e6e06f38e92a497005b to your computer and use it in GitHub Desktop.
VGMDB to MusicBrainz import script (requires Python 3.6)
#!/usr/bin/python
# coding=utf-8
"""Python Script for bootstrapping a MusicBrainz release using a VGMDB album.
This script uses the unofficial VGMDB.info JSON API to prefill the MusicBrainz
"Add Release" form with data from VGMDB.
It is only meant as a first step to make adding a new MB release easier; please
check for missing/erroneous data and make sure the imported release follows the
MusicBrainz guidelines!
Because VGMDB has a lot of Japanese content, we import track/release titles from
this language before trying other languages, and try to guess values for some
fields (e.g. "Soundtrack" album types).
Documentation about the field format is found at:
https://musicbrainz.org/doc/Development/Release_Editor_Seeding
"""
import re
import sys
import json
import html
import argparse
import webbrowser
from urllib.request import urlopen
from tempfile import NamedTemporaryFile
from datetime import datetime
def strptimes(s, fmts):
for fmt in fmts:
try:
return datetime.strptime(s, fmt)
except ValueError:
continue
return None
def vgmdb_get_album_url(album_id, format_="json"):
"""Return the VGMDB API URL for the given album ID"""
return "https://vgmdb.info/album/%d?format=%s" % (album_id, format_)
def vgmdb_get_album_data(album_url):
"""Retrieve data for a VGMDB album"""
return json.load(urlopen(album_url))
def write_musicbrainz_html_form(fd, album_data):
"""Write a local MusicBrainz import form containing album data"""
fd.write("""<!doctype html>""")
fd.write("""<meta charset="UTF-8">""")
fd.write("""<title>Add VGMDB album As Release...</title>""")
fd.write("""<form action="https://musicbrainz.org/release/add" method="post">""")
album_title = album_data['names'].get('ja')
album_title = album_title or next(iter(album_data["names"].values()))
fd.write(f"""<input type="hidden" name="name" value="{html.escape(album_title)}">""")
fd.write(f"""<input type="hidden" name="status" value="official">""")
if "soundtrack" in album_data["classification"].lower():
fd.write(f"""<input type="hidden" name="type" value="album">""")
fd.write(f"""<input type="hidden" name="type" value="soundtrack">""")
if "ja" in album_data['names'].keys():
fd.write(f"""<input type="hidden" name="language" value="jpn">""")
fd.write(f"""<input type="hidden" name="script" value="Jpan">""")
all_artists = []
composers = []
arrangers = []
performers = []
lyricists = []
for composer_data in album_data["composers"]:
composer_name = composer_data["names"].get("ja")
composer_name = composer_name or next(iter(composer_data["names"].values()))
if composer_name not in all_artists:
all_artists.append(composer_name)
if composer_name not in composers:
composers.append(composer_name)
for arranger_data in album_data["arrangers"]:
arranger_name = arranger_data["names"].get("ja")
arranger_name = arranger_name or next(iter(arranger_data["names"].values()))
if arranger_name not in all_artists:
all_artists.append(arranger_name)
if arranger_name not in arrangers:
arrangers.append(arranger_name)
for performer_data in album_data["performers"]:
performer_name = performer_data["names"].get("ja")
performer_name = performer_name or next(iter(performer_data["names"].values()))
if performer_name not in all_artists:
all_artists.append(performer_name)
if performer_name not in performers:
performers.append(performer_name)
for lyricist_data in album_data["lyricists"]:
lyricist_name = lyricist_data["names"].get("ja")
lyricist_name = lyricist_name or next(iter(lyricist_data["names"].values()))
if lyricist_name not in all_artists:
all_artists.append(lyricist_name)
if lyricist_name not in lyricists:
lyricists.append(lyricist_name)
artists = ["Various Artists"] if len(performers) >= 3 else all_artists
track_artists = performers or arrangers or composers or lyricists or all_artists
join_phrase = ", "
for artist_ix, artist_name in enumerate(artists):
fd.write(f"""<input type="hidden" name="artist_credit.names.{artist_ix}.artist.name" value="{html.escape(artist_name)}">""")
if artist_ix < len(artists)-1:
fd.write(f"""<input type="hidden" name="artist_credit.names.{artist_ix}.join_phrase" value="{html.escape(join_phrase)}">""")
release_date = strptimes(album_data['release_date'], ["%Y-%m-%d", "%Y"])
if release_date:
fd.write(f"""<input type="hidden" name="events.0.date.year" value="{release_date.year}">""")
fd.write(f"""<input type="hidden" name="events.0.date.month" value="{release_date.month}">""")
fd.write(f"""<input type="hidden" name="events.0.date.day" value="{release_date.day}">""")
if "ja" in album_data['names'].keys():
fd.write(f"""<input type="hidden" name="events.0.country" value="JP">""")
catalog_nr = album_data['catalog']
fd.write(f"""<input type="hidden" name="labels.0.catalog_number" value="{html.escape(catalog_nr)}">""")
vgmdb_link = album_data['vgmdb_link']
fd.write(f"""<input type="hidden" name="urls.0.url" value="{html.escape(vgmdb_link)}">""")
fd.write(f"""<input type="hidden" name="urls.0.link_type" value="86">""") # VGMDB
fd.write(f"""<input type="hidden" name="edit_note" value="Imported from {html.escape(vgmdb_link)}">""")
for disc_ix, disc_data in enumerate(album_data["discs"]):
if album_data["media_format"] == "CD":
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.format" value="CD">""")
for track_ix, track_data in enumerate(disc_data["tracks"]):
track_title = track_data["names"].get("Japanese")
track_title = track_title or next(iter(track_data["names"].values()))
if track_data["track_length"] and track_data["track_length"].lower() != "unknown":
track_length = datetime.strptime(track_data["track_length"], "%M:%S")
track_length = 1000 * (track_length.minute*60 + track_length.second)
else:
track_length = 0
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.name" value="{html.escape(track_title)}">""")
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.length" value="{track_length}">""")
for artist_ix, artist_name in enumerate(track_artists):
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.artist_credit.names.{artist_ix}.mbid" value="">""")
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.artist_credit.names.{artist_ix}.name" value="{html.escape(artist_name)}">""")
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.artist_credit.names.{artist_ix}.artist.name" value="{html.escape(artist_name)}">""")
fd.write(f"""<input type="hidden" name="mediums.{disc_ix}.track.{track_ix}.artist_credit.names.{artist_ix}.join_phrase" value=", ">""")
fd.write("""<input type="submit" value="Add Cluster As Release...">""")
fd.write("""</form>""")
fd.write("""<script>document.forms[0].submit()</script>""")
# Parse arguments
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument("album_id_url", help="VGMDB album id or URL", type=str)
parser.add_argument("--show-api-page", "-s", help="Show API page instead of the MB form", action="store_true")
args = parser.parse_args()
# Parse album ID
album_id = None
m = re.match(r"(https?://)?vgmdb.net/album/(?P<album_id>\d+).*", args.album_id_url)
if m:
album_id = int(m.group("album_id"))
elif args.album_id_url.isdigit():
album_id = int(args.album_id_url)
else:
print("Invalid album ID or URL")
sys.exit(1)
# Retrieve album URL
if args.show_api_page:
album_url = vgmdb_get_album_url(album_id, format_="html")
print("Opening %s" % album_url)
webbrowser.open(album_url)
sys.exit(0)
# Retrieve album data, write and open MusicBrainz form
album_url = vgmdb_get_album_url(album_id)
album_data = vgmdb_get_album_data(album_url)
with NamedTemporaryFile(suffix=".html", encoding="utf-8", mode="w+", delete=False) as fd:
write_musicbrainz_html_form(fd, album_data)
print("Opening %s" % fd.name)
webbrowser.open(fd.name)
Copy link
Copy Markdown

ghost commented Jul 10, 2021

I'm new to python and I need help on entering the arguments needed for this to work
image

@fxthomas
Copy link
Copy Markdown
Author

fxthomas commented Jul 10, 2021

@mitsufune You need to open a system command-line window, not a Python shell. I believe on Windows 10 you can use something called "Powershell", and the command-line should look like this by default?

PS> python vgmdb2mb.py https://vgmdb.net/album/... <you can press ENTER after this>

Never tested it on Windows though, let me know if something's not compatible!

@Tenome
Copy link
Copy Markdown

Tenome commented Oct 6, 2021

The script breaks if the VGMDB page only has the year, just FYI, since it expects the full date format.
https://vgmdb.net/album/20652
Thanks for the script though, it's been very useful. The other MB VGMDB script doesn't work half the time.

@fxthomas
Copy link
Copy Markdown
Author

fxthomas commented Nov 6, 2021

@Tenome Thanks, never got this kind of album before. Updated so it works with year-only dates!

(That's a totally obscure release by the way, I was curious but did not find it anywhere online!)

@Tenome
Copy link
Copy Markdown

Tenome commented Apr 3, 2022

@fxthomas Might need to be updated again? I tried this URL and it gave me an internal server error, but that might just be a problem on VGMDB's end. The other VGMDB userscript also doesn't seem to work anymore, so it could be that VGMDB updated (again). Here's the album I tried: https://vgmdb.net/album/105445

Traceback (most recent call last):
\Scripts\vgmdb2mb.py", line 183, in
album_data = vgmdb_get_album_data(album_url)
\Scripts\vgmdb2mb.py", line 48, in vgmdb_get_album_data
return json.load(urlopen(album_url))
\lib\urllib\request.py", line 222, in urlopen
return opener.open(url, data, timeout)
\lib\urllib\request.py", line 531, in open
response = meth(req, response)
\lib\urllib\request.py", line 640, in http_response
response = self.parent.error(
\lib\urllib\request.py", line 569, in error
return self._call_chain(*args)
\lib\urllib\request.py", line 502, in _call_chain
result = func(*args)
\lib\urllib\request.py", line 649, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 500: Internal Server Error

@fxthomas
Copy link
Copy Markdown
Author

The API endpoint this script uses is hosted at http://vgmdb.info which is separate from the VGMDB website. It's sometimes offline, but usually gets back up after a while.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment