cbeddow · November 8, 2023 12:11
diff --git a/overture2osm.py b/overture2osm.py
 import json
 import pandas as pd

 tags_dict = None
 with open('./overture2osm.json') as file:
    tags_dict = json.load(file)

 def jsonize_tags(tags):
    # Split the string at the '=' character
    tags = {tag.split('=')[0]: tag.split('=')[1] for tag in tags}
    return tags

 def retag(obj):
    # quick tip: Overture is out left table and has categories, we are converting it to OSM as right table, which has tags
    # categories to skip, we cannot easily convert this category
    skip_cats = ['structure_and_geography']
    
    #empty object that will be our final OpenStreetMap tags
    osm_tags = {}
    
    # list of all keys in the input object from Overture
    # remove nulls
    keys = [key for key, value in obj.items() if value is not None]
    
    # first we make sure the object has a category--if it does not, we cannot do anything
    # we need to skip the category of 'structure_and_geography' because it is too vague to convert without context
    if 'categories' in keys:
        #get the main category, we will ignore any secondary category
        main_category = obj['categories']['main']
        
        # all the Overture categories except in the skip list are in the tags_dict already
        # the Overture category will match a list of OSM tags, sometimes a list of only one item
        # OSM tags are a key value pair but written like "key=value", so we will jsonize
        cat_tags = tags_dict[main_category]
        
        # set the output object as the cat_tags, then we will append more key/values to it after
        osm_tags = cat_tags
        
        # get place names. There is always a common local name that is default, and sometimes multilingual names
        if 'names' in keys:
            primary_name = [name['value'] for name in obj['names']['common'] if name['language'] == 'local'][0]
            secondary_names = {name['language']:name['value'] for name in obj['names']['common'] if name['language'] != 'local'}
            
            # add primary name
            osm_tags['name'] = primary_name
            
            # loop through secondary names if they exist and add as an alternate language name
            if len(secondary_names) > 0:
                for k in secondary_names.keys():
                    language = k
                    osm_tags[f'name:{k}'] = secondary_names[k]
                
        if 'addresses' in keys:
            # example in Javascript already: https://is.gd/addrjs
            # if addresses -> convert to addr tag in OSM, so addresses where region country locality exist preferred, freeform to addr:street etc
            street, housenumber = None
            
        if 'brand' in keys:
            osm_tags['brand'] = obj['brand']['names']['common']['local']
            
        # multiple pieces of contact information in the format of 'contact:type':'<value>', always stringified
        # contact info is always a list so we join by ';' with no space
        if 'phones' in keys:
            osm_tags['contact:phone'] = ';'.join(obj['phones'])
        if 'websites' in keys:
            osm_tags['contact:website'] = ';'.join(obj['websites'])
        if 'emails' in keys:
            osm_tags['contact:email'] = ';'.join(obj['emails'])
        # social media we handle a little differently:
        if 'socials' in keys:
            for url in obj['socials']:
                social = url.split('.')[1]  # Extract the key from the URL
                osm_tags[f'contact:{social}'] = url
        # check if the floor number of the place within a building exists, rarely it exists
        if 'level' in keys:
            osm_tags['level'] = obj['level']
        # check similarly if the number of floors in the building exists, probably almost never
        if 'numFloors' in keys:
            osm_tags['building:levels'] = obj['numFloors']

        # finally, we cite the souce info, starting with the name and record ID, then date
        if 'sources' in keys:
            for source in obj['sources']:
                if 'record_id' in source:
                    osm_tags['source'] = f'Overture/{source["dataset"]}/{source["record_id"]}'
                else:
                    osm_tags['source'] = f'Overture/{source["dataset"]}'
        if 'updateTime' in keys:
            osm_tags['source:date'] = obj['updateTime'][0:10]
            
    return osm_tags
	import json
	import pandas as pd

	tags_dict = None
	with open('./overture2osm.json') as file:
	tags_dict = json.load(file)

	def jsonize_tags(tags):
	# Split the string at the '=' character
	tags = {tag.split('=')[0]: tag.split('=')[1] for tag in tags}
	return tags

	def retag(obj):
	# quick tip: Overture is out left table and has categories, we are converting it to OSM as right table, which has tags
	# categories to skip, we cannot easily convert this category
	skip_cats = ['structure_and_geography']

	#empty object that will be our final OpenStreetMap tags
	osm_tags = {}

	# list of all keys in the input object from Overture
	# remove nulls
	keys = [key for key, value in obj.items() if value is not None]

	# first we make sure the object has a category--if it does not, we cannot do anything
	# we need to skip the category of 'structure_and_geography' because it is too vague to convert without context
	if 'categories' in keys:
	#get the main category, we will ignore any secondary category
	main_category = obj['categories']['main']

	# all the Overture categories except in the skip list are in the tags_dict already
	# the Overture category will match a list of OSM tags, sometimes a list of only one item
	# OSM tags are a key value pair but written like "key=value", so we will jsonize
	cat_tags = tags_dict[main_category]

	# set the output object as the cat_tags, then we will append more key/values to it after
	osm_tags = cat_tags

	# get place names. There is always a common local name that is default, and sometimes multilingual names
	if 'names' in keys:
	primary_name = [name['value'] for name in obj['names']['common'] if name['language'] == 'local'][0]
	secondary_names = {name['language']:name['value'] for name in obj['names']['common'] if name['language'] != 'local'}

	# add primary name
	osm_tags['name'] = primary_name

	# loop through secondary names if they exist and add as an alternate language name
	if len(secondary_names) > 0:
	for k in secondary_names.keys():
	language = k
	osm_tags[f'name:{k}'] = secondary_names[k]

	if 'addresses' in keys:
	# example in Javascript already: https://is.gd/addrjs
	# if addresses -> convert to addr tag in OSM, so addresses where region country locality exist preferred, freeform to addr:street etc
	street, housenumber = None

	if 'brand' in keys:
	osm_tags['brand'] = obj['brand']['names']['common']['local']

	# multiple pieces of contact information in the format of 'contact:type':'<value>', always stringified
	# contact info is always a list so we join by ';' with no space
	if 'phones' in keys:
	osm_tags['contact:phone'] = ';'.join(obj['phones'])
	if 'websites' in keys:
	osm_tags['contact:website'] = ';'.join(obj['websites'])
	if 'emails' in keys:
	osm_tags['contact:email'] = ';'.join(obj['emails'])
	# social media we handle a little differently:
	if 'socials' in keys:
	for url in obj['socials']:
	social = url.split('.')[1] # Extract the key from the URL
	osm_tags[f'contact:{social}'] = url
	# check if the floor number of the place within a building exists, rarely it exists
	if 'level' in keys:
	osm_tags['level'] = obj['level']
	# check similarly if the number of floors in the building exists, probably almost never
	if 'numFloors' in keys:
	osm_tags['building:levels'] = obj['numFloors']

	# finally, we cite the souce info, starting with the name and record ID, then date
	if 'sources' in keys:
	for source in obj['sources']:
	if 'record_id' in source:
	osm_tags['source'] = f'Overture/{source["dataset"]}/{source["record_id"]}'
	else:
	osm_tags['source'] = f'Overture/{source["dataset"]}'
	if 'updateTime' in keys:
	osm_tags['source:date'] = obj['updateTime'][0:10]

	return osm_tags