Skip to content

Instantly share code, notes, and snippets.

@luckytyphlosion
Created October 11, 2021 01:13
Show Gist options
  • Save luckytyphlosion/701ad2b7f10d52a7d016e1b957537fcf to your computer and use it in GitHub Desktop.
Save luckytyphlosion/701ad2b7f10d52a7d016e1b957537fcf to your computer and use it in GitHub Desktop.
# =============================================================================
# MIT License
#
# Copyright (c) 2021 luckytyphlosion
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# =============================================================================
import bs4
import requests
import json
import re
family_info_row_id_regex = re.compile(r"^p1-[0-9]+-0$")
def main():
track_id = 1293
MODE = 1
if MODE == 0:
r = requests.get(f"https://ct.wiimm.de/i/{track_id}")
html_doc = r.text
with open(f"ct_wiimm_de_{track_id}.html", "w+") as f:
f.write(html_doc)
elif MODE == 1:
with open(f"ct_wiimm_de_{track_id}.html", "r") as f:
html_doc = f.read()
html_doc = html_doc.replace(u"\u00A0", " ")
track_info = {}
soup = bs4.BeautifulSoup(html_doc, "html.parser")
table_info = soup.find("table", class_="table-info")
track_info["trackNameFull"] = table_info.find("th").string
# ================================
# alternate code which finds the type_class_id via tag position
# don't do this because some tags may be missing depending on the track
# see: https://ct.wiimm.de/i/1700 https://ct.wiimm.de/i/1293 https://ct.wiimm.de/i/1500
# table_rows = table_info.find_all_next("tr")
# type_class_id = table_rows[1].find_all_next("td")[1].string
# ================================
type_class_id = table_info.find("td", string="Type, Class and Id:").next_sibling.string
track_type, track_class, track_id_as_str = type_class_id.split(" / ")
if track_id != int(track_id_as_str):
raise RuntimeError("Expected track_id == int(track_id_as_str)!")
archive_info = {
"type": track_type,
"class": track_class,
"id": track_id
}
track_info["archiveInfo"] = archive_info
track_name_and_version_tag = table_info.find("td", string="Track name and version:").next_sibling
track_name = track_name_and_version_tag.find("b").string
track_version = track_name_and_version_tag.contents[1].strip()
track_info["name"] = track_name
track_info["version"] = track_version
track_info["author"] = table_info.find("td", string="Created by:").next_sibling.string
# ==============================
# alternate code to get archive info from the family box instead
#family_info = soup.find("table", id="p1-table")
#family_tracks = family_info.find_all("tr", id=family_info_row_id_regex)
#
#for family_track in family_tracks:
# family_track_tds = family_track.find_all_next('td')
# family_track_id = int(family_track_tds[1].string)
# if family_track_id == track_id:
# archive_info = {}
# archive_info["type"] = family_track.find("td", class_="ctype-1").string
# archive_info["class"] = family_track.find("td", class_="cclass").string
# archive_info["id"] = family_track_id
# track_info["archiveInfo"] = archive_info
# break
#
# print(f"family_track_id: {family_track_id}")
# #print(f"track id: {family_track.find_all_next('td')[1]}")
# #print(f"family_track.contents: {family_track.contents}")
# ==============================
with open(f"track_info_{track_id}.json", "w+") as f:
json.dump(track_info, f, indent=2)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment