Last active
November 8, 2023 12:11
-
-
Save cbeddow/c5014f06456413a74348640b60f4fb7f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import pandas as pd | |
tags_dict = None | |
with open('./overture2osm.json') as file: | |
tags_dict = json.load(file) | |
def jsonize_tags(tags): | |
# Split the string at the '=' character | |
tags = {tag.split('=')[0]: tag.split('=')[1] for tag in tags} | |
return tags | |
def retag(obj): | |
# quick tip: Overture is out left table and has categories, we are converting it to OSM as right table, which has tags | |
# categories to skip, we cannot easily convert this category | |
skip_cats = ['structure_and_geography'] | |
#empty object that will be our final OpenStreetMap tags | |
osm_tags = {} | |
# list of all keys in the input object from Overture | |
# remove nulls | |
keys = [key for key, value in obj.items() if value is not None] | |
# first we make sure the object has a category--if it does not, we cannot do anything | |
# we need to skip the category of 'structure_and_geography' because it is too vague to convert without context | |
if 'categories' in keys: | |
#get the main category, we will ignore any secondary category | |
main_category = obj['categories']['main'] | |
# all the Overture categories except in the skip list are in the tags_dict already | |
# the Overture category will match a list of OSM tags, sometimes a list of only one item | |
# OSM tags are a key value pair but written like "key=value", so we will jsonize | |
cat_tags = tags_dict[main_category] | |
# set the output object as the cat_tags, then we will append more key/values to it after | |
osm_tags = cat_tags | |
# get place names. There is always a common local name that is default, and sometimes multilingual names | |
if 'names' in keys: | |
primary_name = [name['value'] for name in obj['names']['common'] if name['language'] == 'local'][0] | |
secondary_names = {name['language']:name['value'] for name in obj['names']['common'] if name['language'] != 'local'} | |
# add primary name | |
osm_tags['name'] = primary_name | |
# loop through secondary names if they exist and add as an alternate language name | |
if len(secondary_names) > 0: | |
for k in secondary_names.keys(): | |
language = k | |
osm_tags[f'name:{k}'] = secondary_names[k] | |
if 'addresses' in keys: | |
# example in Javascript already: https://is.gd/addrjs | |
# if addresses -> convert to addr tag in OSM, so addresses where region country locality exist preferred, freeform to addr:street etc | |
street, housenumber = None | |
if 'brand' in keys: | |
osm_tags['brand'] = obj['brand']['names']['common']['local'] | |
# multiple pieces of contact information in the format of 'contact:type':'<value>', always stringified | |
# contact info is always a list so we join by ';' with no space | |
if 'phones' in keys: | |
osm_tags['contact:phone'] = ';'.join(obj['phones']) | |
if 'websites' in keys: | |
osm_tags['contact:website'] = ';'.join(obj['websites']) | |
if 'emails' in keys: | |
osm_tags['contact:email'] = ';'.join(obj['emails']) | |
# social media we handle a little differently: | |
if 'socials' in keys: | |
for url in obj['socials']: | |
social = url.split('.')[1] # Extract the key from the URL | |
osm_tags[f'contact:{social}'] = url | |
# check if the floor number of the place within a building exists, rarely it exists | |
if 'level' in keys: | |
osm_tags['level'] = obj['level'] | |
# check similarly if the number of floors in the building exists, probably almost never | |
if 'numFloors' in keys: | |
osm_tags['building:levels'] = obj['numFloors'] | |
# finally, we cite the souce info, starting with the name and record ID, then date | |
if 'sources' in keys: | |
for source in obj['sources']: | |
if 'record_id' in source: | |
osm_tags['source'] = f'Overture/{source["dataset"]}/{source["record_id"]}' | |
else: | |
osm_tags['source'] = f'Overture/{source["dataset"]}' | |
if 'updateTime' in keys: | |
osm_tags['source:date'] = obj['updateTime'][0:10] | |
return osm_tags |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment