|
import urllib.parse |
|
from urllib.request import urlretrieve |
|
import requests |
|
from bs4 import BeautifulSoup |
|
import urllib |
|
import os.path |
|
import zipfile |
|
import os |
|
import argparse |
|
|
|
def parse_args(*args): |
|
parser = argparse.ArgumentParser(description="Mirror simfiles from ZIV") |
|
|
|
parser.add_argument('categories', type=str, nargs='*', help='ZIV category pages to mirror', |
|
default=['https://zenius-i-vanisher.com/v5.2/simfiles.php?category=latest20official']) |
|
|
|
feature = parser.add_mutually_exclusive_group(required=False) |
|
feature.add_argument('--dry-run', '-n', |
|
help="Only perform a dry run; don't send any pings", |
|
dest='dry_run', action='store_true') |
|
feature.add_argument('--no-dry-run', |
|
help="Send pings normally", |
|
dest='dry_run', action='store_false') |
|
feature.set_defaults(dry_run=False) |
|
|
|
return parser.parse_args(*args) |
|
|
|
def mirror(cat_url, args): |
|
request = requests.get(cat_url) |
|
page = BeautifulSoup(request.text, features="html.parser") |
|
|
|
if 'viewsimfilecategory.php' in cat_url: |
|
simgroup = page.find('div', {'class': 'headertop'}).h1 |
|
else: |
|
simgroup = None |
|
|
|
for row in page.find_all('tr'): |
|
simfile = row.find("a", href=lambda href: href and "viewsimfile.php" in href) |
|
simgroup = simgroup or row.find("a", href=lambda href: href and "viewsimfilecategory.php" in href) |
|
|
|
if not (simfile and simgroup): |
|
continue |
|
|
|
songname = ' '.join(simfile.get_text().replace('/', '-').split()) |
|
groupname = ' '.join(simgroup.get_text().replace('/', '-').split()) |
|
|
|
print(f"collection: '{groupname}' simfile: '{songname}'") |
|
|
|
simlink = simfile['href'] |
|
try: |
|
sim_id = urllib.parse.parse_qs(urllib.parse.urlparse(simfile['href']).query)['simfileid'][0] |
|
except KeyError: |
|
print(f"WARNING: no simfileid found on URL {simlink}") |
|
continue |
|
|
|
url = f'https://zenius-i-vanisher.com/v5.2/download.php?type=ddrsimfile&simfileid={sim_id}' |
|
|
|
if args.dry_run: |
|
print(f"Dry run requested, not downloading {url}") |
|
continue |
|
|
|
filename = f'zips/{sim_id}.zip' |
|
if not os.path.isfile(filename): |
|
print(f'Downloading {url} -> {filename}') |
|
try: |
|
os.makedirs('zips', exist_ok=True) |
|
urlretrieve(url, filename) |
|
except KeyboardInterrupt as e: |
|
print(f'Download aborting...') |
|
if os.path.isfile(filename): |
|
print(f'Removing partial file {filename}') |
|
os.unlink(filename) |
|
raise e |
|
|
|
try: |
|
with zipfile.ZipFile(filename, 'r') as zip: |
|
songdir = f'songs/{groupname}' |
|
print(f'Extracting into {songdir}') |
|
os.makedirs(songdir, exist_ok=True) |
|
zip.extractall(songdir) |
|
except zipfile.BadZipFile: |
|
print(f'Not a zip file: {filename}') |
|
|
|
if __name__ == "__main__": |
|
args = parse_args() |
|
for url in args.categories: |
|
mirror(url, args) |