Created
May 13, 2020 17:11
-
-
Save fayak/9268c3a44d7d17efabcf00ec65e06672 to your computer and use it in GitHub Desktop.
Get more frequent french city "name" as mapcustomizer.com visualizable data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Based on https://public.opendatasoft.com/explore/dataset/correspondance-code-insee-code-postal/download/?format=json&timezone=Europe/Berlin&lang=fr | |
# The JSON file must be argv[1] | |
import json | |
import sys | |
class City(): | |
token_stats = {} | |
@staticmethod | |
def _sanitize_token(token): | |
if token[:2] in ["D'", "L'"]: | |
return token.replace("D'", "").replace("L'", "") | |
return token | |
@staticmethod | |
def _trash_bad_token(token): | |
trash = [ | |
"ARRONDISSEMENT", | |
"AU", | |
"AUX", | |
"DE", | |
"DES", | |
"DU", | |
"EN", | |
"ET", | |
"LA", | |
"LE", | |
"LES", | |
"SAINT", | |
"SAINTE", | |
"SAINTES", | |
"SOUS", | |
"SUR", | |
] | |
if token in trash: | |
return None | |
return token | |
@staticmethod | |
def _account_token(token): | |
try: | |
City.token_stats[token] += 1 | |
except: | |
City.token_stats[token] = 1 | |
return token | |
@staticmethod | |
def _name_to_token(name, hooks): | |
token_1 = name.split("-") | |
token = [] | |
for tok_1 in token_1: | |
token_2 = tok_1.split(" ") | |
for tok_2 in token_2: | |
for hook in hooks: | |
if tok_2 is None: | |
break | |
tok_2 = hook(tok_2) | |
if tok_2 is not None: | |
token.append(tok_2) | |
return token | |
def __init__(self, name, coords): | |
self.name = name | |
self.token = City._name_to_token(name, [City._trash_bad_token, City._sanitize_token, City._account_token]) | |
self.coords = coords | |
def __repr__(self): | |
return f"{self.coords[0]},{self.coords[1]} {{{self.name}}}" | |
with open(sys.argv[1]) as f: | |
data = json.loads(f.read()) | |
cities = [] | |
for ville in data: | |
city = City(ville["nom_comm"], ville["geo_point_2d"]) | |
cities.append(city) | |
SORT = {k: v for k, v in sorted(City.token_stats.items(), key=lambda item: item[1], reverse=True)} | |
for (k, v), _ in zip(SORT.items(), range(15)): | |
with open(f"{v}-{k}", "w") as f: | |
for city in cities: | |
if k in city.token: | |
f.write(repr(city)) | |
f.write("\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment