Last active
April 3, 2023 07:55
-
-
Save KelSolaar/6f0847adb6bfa08665287a18039c8e24 to your computer and use it in GitHub Desktop.
AideDD - Atlas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
AideDD - Atlas | |
============== | |
This Python module extracts the map data from | |
`AideDD Atlas <https://www.aidedd.org/public/atlas>`__ | |
The following packages are required: | |
- `js2py` | |
- `requests` | |
- `tqdm` | |
- `levenshtein` | |
- `pymediawiki` | |
- `wikitextparser` | |
""" | |
import csv | |
import js2py | |
import json | |
import mediawiki | |
import pickle | |
import re | |
import requests | |
import unicodedata | |
import urllib.parse | |
import wikitextparser | |
from Levenshtein import distance | |
from collections import defaultdict | |
from enum import IntEnum | |
from dataclasses import asdict, dataclass | |
from mediawiki import MediaWiki | |
from pathlib import Path | |
from tqdm import tqdm | |
from typing import Any | |
from warnings import warn | |
__copyright__ = "Copyright 2023 Thomas Mansencal" | |
__license__ = "MIT License - https://opensource.org/licenses/MIT" | |
__maintainer__ = "Thomas Mansencal" | |
__email__ = "[email protected]" | |
__status__ = "Production" | |
__all__ = [ | |
"ROOT_AIDEDD_ATLAS", | |
"ROOT_WIKI_FANDOM", | |
"API_WIKI_FANDOM", | |
"UnitDistance", | |
"AideDDMap", | |
"AIDEDD_ATLAS", | |
"TYPES_AIDE_DDMAP_LOCATION", | |
"TYPES_AIDE_DDMAP_LOCATION_DEFAULT", | |
"DataAideDDMapZone", | |
"DataAideDDMapLocation", | |
"DataAideDDMap", | |
"slugify", | |
"NAME_SUBSTITUTIONS", | |
"fandom_page_titles", | |
"parse_aidedd_atlas_map_data", | |
"export_aidedd_atlas_map_data_to_json", | |
"export_aidedd_atlas_map_data_to_csv", | |
] | |
ROOT_AIDEDD_ATLAS = "https://www.aidedd.org/public/atlas" | |
ROOT_WIKI_FANDOM = "https://forgottenrealms.fandom.com/Wiki" | |
API_WIKI_FANDOM = "https://forgottenrealms.fandom.com/api.php" | |
class UnitDistance(IntEnum): | |
FEET = 1 | |
MILES = 2 | |
@dataclass | |
class AideDDMap: | |
name: str | |
url_image: str | |
url_data: str | |
unit: UnitDistance | |
wiki: bool | |
AIDEDD_ATLAS = { | |
"Baldur's Gate": AideDDMap( | |
"Baldur's Gate", | |
f"{ROOT_AIDEDD_ATLAS}/images/baldur-1676-1062.jpg", | |
f"{ROOT_AIDEDD_ATLAS}/dataB.js", | |
UnitDistance.FEET * 1000, | |
True, | |
), | |
"Forgotten Realms (Faerûn, Sword Coast)": AideDDMap( | |
"Faerûn", | |
f"{ROOT_AIDEDD_ATLAS}/images/RO-6420-4061.jpg", | |
f"{ROOT_AIDEDD_ATLAS}/dataR.js", | |
UnitDistance.MILES, | |
True, | |
), | |
"Icewind Dale": AideDDMap( | |
"Icewind Dale", | |
f"{ROOT_AIDEDD_ATLAS}/images/icewind-dale-5786-4008.jpg", | |
f"{ROOT_AIDEDD_ATLAS}/dataI.js", | |
UnitDistance.MILES, | |
True, | |
), | |
"Greyhawk": AideDDMap( | |
"Greyhawk", | |
f"{ROOT_AIDEDD_ATLAS}/images/greyhawk-12636-9909.jpg", | |
f"{ROOT_AIDEDD_ATLAS}/dataG.js", | |
UnitDistance.MILES, | |
False, | |
), | |
"Kara-Tur": AideDDMap( | |
"Kara-Tur", | |
f"{ROOT_AIDEDD_ATLAS}/images/karatur-2445-2625.jpg", | |
f"{ROOT_AIDEDD_ATLAS}/dataK.js", | |
UnitDistance.MILES, | |
False, | |
), | |
"Laelith": AideDDMap( | |
"Laelith", | |
f"{ROOT_AIDEDD_ATLAS}/images/laelith-4478-2691.jpg", | |
f"{ROOT_AIDEDD_ATLAS}/dataL.js", | |
UnitDistance.FEET * 1000, | |
False, | |
), | |
"Laelith Provinces": AideDDMap( | |
"Laelith Provinces", | |
f"{ROOT_AIDEDD_ATLAS}/images/provinces-6420-3859.jpg", | |
f"{ROOT_AIDEDD_ATLAS}/dataP.js", | |
UnitDistance.MILES, | |
False, | |
), | |
"Menzoberranzan": AideDDMap( | |
"Menzoberranzan", | |
f"{ROOT_AIDEDD_ATLAS}/images/menzoberranzan-2773-2000.jpg", | |
f"{ROOT_AIDEDD_ATLAS}/dataM.js", | |
UnitDistance.FEET * 100, | |
False, | |
), | |
"Neverwinter": AideDDMap( | |
"Neverwinter", | |
f"{ROOT_AIDEDD_ATLAS}/images/neverwinter-1600-1128.jpg", | |
f"{ROOT_AIDEDD_ATLAS}/dataN.js", | |
UnitDistance.FEET * 1000, | |
True, | |
), | |
"Waterdeep": AideDDMap( | |
"Waterdeep", | |
f"{ROOT_AIDEDD_ATLAS}/images/waterdeep-3560-7256.jpg", | |
f"{ROOT_AIDEDD_ATLAS}/dataW.js", | |
UnitDistance.FEET * 1000, | |
True, | |
), | |
} | |
TYPES_AIDE_DDMAP_LOCATION = { | |
"Faerûn": { | |
"#ffffff": "Area", | |
"#33cc33": "Forest", | |
"#ff8000": "Mountainous", | |
"#ff0000": "Place", | |
"#58acfa": "Water", | |
"#f781f3": "Road", | |
}, | |
"Waterdeep": { | |
"#ffffff": "Area", | |
"#ff00ff": "Business", | |
"#58acfa": "City Building", | |
"#2eb82e": "Guildhall", | |
"#ff0000": "Inn, Tavern, Festhall", | |
"#aaaaaa": "Miscellaneous", | |
"#f4900a": "Noble Villa", | |
"#ffff00": "Temple", | |
"#000066": "Warehouse", | |
}, | |
} | |
TYPES_AIDE_DDMAP_LOCATION_DEFAULT = { | |
"#ffffff": "Area", | |
"#ff00ff": "Business", | |
"#58acfa": "City Building", | |
"#2eb82e": "Guildhall", | |
"#ff0000": "Inn, Tavern, Festhall", | |
"#aaaaaa": "Miscellaneous", | |
"#f4900a": "Noble Villa", | |
"#ffff00": "Temple", | |
"#000066": "Warehouse", | |
} | |
@dataclass | |
class DataAideDDMapZone: | |
name: str | |
colour: str | |
description: dict | |
path: str | |
@dataclass | |
class DataAideDDMapLocation: | |
name: str | |
type: str | |
description: str | |
coordinates: tuple | |
references: dict | |
@dataclass | |
class DataAideDDMap: | |
zones: list | |
locations: list | |
geometry: tuple | |
scale: float | |
def slugify(object_: Any, allow_unicode: bool = False) -> str: | |
value = str(object_) | |
if allow_unicode: | |
value = unicodedata.normalize("NFKC", value) | |
else: | |
value = ( | |
unicodedata.normalize("NFKD", value) | |
.encode("ascii", "ignore") | |
.decode("ascii") | |
) | |
value = re.sub(r"[^\w\s-]", "", value.lower()) | |
return re.sub(r"[-\s]+", "-", value).strip("-_") | |
NAME_SUBSTITUTIONS = { | |
"Faerûn": { | |
r"^the\s": r"", | |
r"all father": r"all-father", | |
r"amphail": "amphail (village)", | |
r"archenbrige": r"archenbridge", | |
r"aurilsbarg": r"aurilssbarg", | |
r"blackfeather bridge": "blackfeather bridge (village)", | |
r"cain": r"cairn", | |
r"dernal": r"dernall", | |
r"desarin": r"dessarin", | |
r"easting": "easting (town)", | |
r"fendral": r"fendarl", | |
r"high horn": "high horn (mountain)", | |
r"icepeak": r"ice peak", | |
r"mounts": r"mountains", | |
r"prespur": r"presper", | |
r"redwater": "redwater (settlement)", | |
r"tiverton": r"tilverton", | |
r"turnback mountains": r"turnback mountain", | |
r"whale bones": r"whalebones", | |
r"zundridge": r"zundbridge", | |
}, | |
"Waterdeep": { | |
r"tiger's eye": r"~~~", | |
r"waymoot": r"~~~", | |
r"^the\s": r"", | |
r"^skull": r"skulls", | |
r"\bselune": r"selûne", | |
r"brahir": r"brahiir's", | |
r"brondar's street": r"brondar's way", | |
r"cobblers' and corvisers' house": r"cobblers' & corvisers' house", | |
r"cambril's": r"cymbril's", | |
r"crammer's": r"crommer's", | |
r"cynosure": r"cynosure (building)", | |
r"eilean's maztican": r"eilean's maztica", | |
r"jester's": r"jesters'", | |
r"kolat's": r"kolat", | |
r"heroes's": r"heroes'", | |
r"knife's edge": r"dretch lane", | |
r"laran'": r"laran's", | |
r"logans": r"logan's", | |
r"melvar's chapbooks and folios": r"melvar’s chapbooks and folios", | |
r"melshimber'": r"melshimber", | |
r"muleskulls": r"muleskull", | |
r"net street": r"net street (south)", | |
r"crook street": r"net street (north)", | |
r"phaulkonmere": r"phaulkonmere villa", | |
r"revon": r"rivon", | |
r"saltporkstreet": r"saltpork street", | |
r"seaeyes": r"seaeye's", | |
r"seaseyes tower": r"seaeyes tower", | |
r"sharkroar - harth shalark's": r"sharkroar, horth shalark's", | |
r"tesper villa - tespergates": r"tespergates", | |
r"trollskulls": r"trollskull", | |
r"underdark": r"underdark (tavern)", | |
r"watennens": r"watermens", | |
}, | |
} | |
def fandom_page_titles(api: str = API_WIKI_FANDOM) -> list: | |
path_cache = Path(__file__).parent / ".fandom.cache" | |
if Path(path_cache).exists(): | |
with open(path_cache, "rb") as path_file: | |
return pickle.load(path_file) | |
else: | |
wiki = MediaWiki(api) | |
all_pages = [] | |
while True: | |
print(f'Retrieving up to 500 pages from "{api}"...') | |
if not all_pages: | |
all_pages = wiki.allpages(results=500) | |
pages = wiki.allpages(all_pages[-1], results=500) | |
if pages[-1] in all_pages: | |
break | |
all_pages.extend(pages) | |
with open(path_cache, "wb") as path_file: | |
pickle.dump(all_pages, path_file) | |
return sorted(set(all_pages)) | |
def parse_aidedd_atlas_map_data( | |
aidedd_map: AideDDMap, | |
page_titles: list, | |
use_media_wiki_summary=False, | |
api: str = API_WIKI_FANDOM, | |
) -> DataAideDDMap: | |
path_cache = Path(__file__).parent / f".{aidedd_map.name.lower()}-aidedd-map.cache" | |
if Path(path_cache).exists(): | |
with open(path_cache) as path_file: | |
content = path_file.read() | |
else: | |
response = requests.get(aidedd_map.url_data) | |
content = response.content.decode("utf-8") | |
with open(path_cache, "w") as path_file: | |
path_file.write(content) | |
page_titles_lower = [re.sub(r"^the\s", r"", title.lower()) for title in page_titles] | |
width, height, scale = None, None, None | |
zones = [] | |
locations = [] | |
in_zones, in_locations = False, False | |
for line in content.splitlines(): | |
search = re.search( | |
r"var\s?imageW\s?=\s?(\d+)\s?,\s?\s?imageH\s?=\s?(\d+)", line | |
) | |
if search: | |
width, height = int(search.group(1)), int(search.group(2)) | |
continue | |
search = re.search(r"\s?factorDist\s?=\s?([-+]?(?:\d*\.*\d+))", line) | |
if search: | |
scale = float(search.group(1)) | |
continue | |
if re.search(r"var\s?zones\s?=\s?\[", line): | |
in_zones = True | |
if in_zones: | |
zones.append(line) | |
if in_zones and re.search(r"];\s?$", line): | |
in_zones = False | |
if re.search(r"var\s?groupe\s?=\s?\[", line): | |
in_locations = True | |
if in_locations: | |
locations.append(line) | |
if in_locations and re.search(r"];\s?$", line): | |
in_locations = False | |
data_zones = [] | |
for item in tqdm(js2py.eval_js("\n".join(zones))): | |
data_zones.append( | |
DataAideDDMapZone( | |
item.get("name", item.get("name1", item.get("name2"))), | |
item.get("couleur"), | |
{"text": item.get("txt", item.get("txt1", item.get("txt0")))}, | |
item.get("path"), | |
) | |
) | |
data_locations = [] | |
for item in tqdm(js2py.eval_js("\n".join(locations))): | |
references = {} | |
name = item.get("name", item.get("name1", item.get("name2"))) | |
type_ = TYPES_AIDE_DDMAP_LOCATION.get( | |
aidedd_map.name, TYPES_AIDE_DDMAP_LOCATION_DEFAULT | |
).get(item.get("color", "").lower()) | |
description = { | |
"html": item.get("txt", item.get("txt1", item.get("txt0"))) | |
.replace("<p>", "") | |
.replace("</p>", "") | |
} | |
if aidedd_map.wiki and name is not None: | |
name_lower = name.lower() | |
for pattern, subtitution in NAME_SUBSTITUTIONS.get( | |
aidedd_map.name, {} | |
).items(): | |
name_lower = re.sub(pattern, subtitution, name_lower) | |
name_extended = f"{name_lower} ({aidedd_map.name.lower()})" | |
distances = [distance(name_extended, title) for title in page_titles_lower] | |
min_distance = min(distances) | |
if min_distance != 0: | |
distances = [distance(name_lower, title) for title in page_titles_lower] | |
min_distance = min(distances) | |
if min_distance in (1, 2, 3, 4): | |
print( | |
name_lower, | |
"<-->", | |
page_titles[distances.index(min_distance)].lower(), | |
"[", | |
min_distance, | |
"]", | |
) | |
if min_distance == 0: | |
title = page_titles[distances.index(min_distance)] | |
if name.lower().startswith("the ") and not title.lower().startswith( | |
"the " | |
): | |
name = f"The {title}" | |
else: | |
name = title | |
url = urllib.parse.urljoin( | |
ROOT_WIKI_FANDOM, urllib.parse.quote(title.replace(" ", "_")) | |
) | |
references["Fandom Wiki"] = url | |
if use_media_wiki_summary: | |
try: | |
wiki = MediaWiki(api) | |
page = wiki.page(title, auto_suggest=False) | |
parser = wikitextparser.parse( | |
re.sub(r"<ref.*?.(/>|</ref>)", "", page.wikitext) | |
) | |
def untemplater(template): | |
template_name = template.normal_name() | |
if template_name == "Pronounce": | |
values = [ | |
argument.value | |
for argument in template.arguments | |
if argument.value | |
] | |
return f'Pronounced {"-".join(values)}' | |
elif template_name == "YearlinkName": | |
value = template.arguments[0].value | |
return f"{value} DR" | |
return "" | |
text = "\n".join( | |
[ | |
paragraph | |
for paragraph in parser.sections[0] | |
.plain_text(replace_templates=untemplater) | |
.strip() | |
.replace("\n\n", "\n") | |
.splitlines() | |
if not "|" in paragraph | |
] | |
) | |
def untemplater(template): | |
template_name = template.normal_name() | |
if template_name == "Pronounce": | |
values = [ | |
argument.value | |
for argument in template.arguments | |
if argument.value | |
] | |
return f'Pronounced <em>{"-".join(values)}</em>' | |
if template_name == "YearlinkName": | |
value = template.arguments[0].value | |
url = urllib.parse.urljoin( | |
ROOT_WIKI_FANDOM, f"{value}_DR" | |
) | |
return f'<a href="{url}">{value} DR</a>' | |
return "" | |
content = ( | |
parser.sections[0] | |
.plain_text( | |
replace_templates=untemplater, | |
replace_wikilinks=False, | |
replace_bolds_and_italics=False, | |
) | |
.strip() | |
) | |
content = "\n".join( | |
[ | |
paragraph | |
for paragraph in content.splitlines() | |
if not paragraph.startswith("[[File:") | |
] | |
) | |
content = ( | |
content.replace("'''''", "~@~@~") | |
.replace("'''", "~@~") | |
.replace("''", "~@") | |
) | |
html = re.sub( | |
r"~@~@~(.*?)~@~@~", | |
r"<strong><em>\1</em></strong>", | |
content, | |
) | |
html = re.sub( | |
r"~@~(.*?)~@~", | |
r"<strong>\1</strong>", | |
html, | |
) | |
html = re.sub(r"~@(.*?)~@", r"<em>\1</em>", html) | |
def urliser(match): | |
page_title = match.group(1).split("|")[0] | |
page_name = match.group(1).split("|")[-1] | |
url = urllib.parse.urljoin( | |
ROOT_WIKI_FANDOM, | |
urllib.parse.quote(page_title.replace(" ", "_")), | |
) | |
if page_title.lower() not in page_titles_lower: | |
# warn( | |
# f'"{match.group(1)}" linked from "{title}" page ' | |
# 'was not found on "Fandom Wiki" page titles!' | |
# ) | |
return page_name | |
else: | |
return f'<a href="{url}">{page_title}</a>' | |
html = re.sub(r"\[\[(.*?)\]\]", urliser, html) | |
html = "".join( | |
[ | |
f"<p>{paragraph}</p>" | |
for paragraph in html.replace("\n\n", "\n").splitlines() | |
] | |
) | |
description = { | |
"html": html, | |
"text": text, | |
} | |
except ( | |
mediawiki.exceptions.DisambiguationError, | |
mediawiki.exceptions.PageError, | |
) as error: | |
warn(str(error)) | |
# Checks that URL is correct, slow... | |
# response = requests.get(url) | |
# if response.status_code < 400: | |
# references["Fandom Wiki"] = url | |
# else: | |
# warn(f'"{name}" was not found on "Fandom Wiki"!') | |
else: | |
pass | |
# warn(f'"{name}" was not found on "Fandom Wiki"!') | |
data_locations.append( | |
DataAideDDMapLocation( | |
name, | |
type_, | |
description, | |
(item.get("x"), item.get("y")), | |
references, | |
) | |
) | |
return DataAideDDMap(data_zones, data_locations, (width, height), scale) | |
def export_aidedd_atlas_map_data_to_json( | |
aidedd_map: AideDDMap, | |
path: str, | |
page_titles: list, | |
use_media_wiki_summary=False, | |
api: str = API_WIKI_FANDOM, | |
): | |
content = { | |
"schema": "0.1.0", | |
"url_image": aidedd_map.url_image, | |
"url_data": aidedd_map.url_data, | |
"unit": aidedd_map.unit, | |
} | |
data = parse_aidedd_atlas_map_data( | |
aidedd_map, page_titles, use_media_wiki_summary, api | |
) | |
content.update( | |
{ | |
"geometry": data.geometry, | |
"scale": data.scale, | |
"zones": [asdict(zone) for zone in data.zones], | |
"locations": [asdict(location) for location in data.locations], | |
} | |
) | |
with open(path, "w") as json_file: | |
json.dump(content, json_file, indent=4) | |
def export_aidedd_atlas_map_data_to_csv(map_data_path): | |
with open(map_data_path) as map_data_file: | |
content = json.load(map_data_file) | |
types = defaultdict(list) | |
for location in content["locations"]: | |
location["name"] = re.sub(r"The\s(.*)", r"\1, The", location["name"]) | |
types[location["type"]].append(location) | |
with open(str(map_data_path).replace(".json", ".csv"), "w", newline="") as csv_file: | |
writer = csv.DictWriter(csv_file, fieldnames=["name", "description"]) | |
writer.writeheader() | |
for type_, locations in types.items(): | |
writer.writerow({"name": "", "description": type_}) | |
for location in sorted(locations, key=lambda x: x["name"]): | |
if location["name"] == "GROUP": | |
continue | |
writer.writerow( | |
{ | |
"name": location["name"], | |
"description": location["description"].get("text", ""), | |
} | |
) | |
writer.writerow({"name": "", "description": ""}) | |
if __name__ == "__main__": | |
page_titles = fandom_page_titles() | |
for aidedd_map in AIDEDD_ATLAS.values(): | |
if aidedd_map.name != "Waterdeep": | |
continue | |
print(aidedd_map) | |
path = Path( | |
f"/Users/kelsolaar/Library/Application Support/FoundryVTT/Data/atlas/{slugify(aidedd_map.name)}.jpg" | |
) | |
if not path.exists(): | |
response = requests.get(aidedd_map.url_image) | |
with open(path, "wb") as image_file: | |
image_file.write(response.content) | |
path = Path( | |
f"/Users/kelsolaar/Library/Application Support/FoundryVTT/Data/atlas/{slugify(aidedd_map.name)}_data.json" | |
) | |
export_aidedd_atlas_map_data_to_json( | |
aidedd_map, | |
path, | |
page_titles, | |
use_media_wiki_summary=True, | |
) | |
export_aidedd_atlas_map_data_to_csv(path) | |
print("*" * 79) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment