fayak · May 13, 2020 17:11
diff --git a/stats.py b/stats.py
 #!/usr/bin/env python3

 # Based on https://public.opendatasoft.com/explore/dataset/correspondance-code-insee-code-postal/download/?format=json&timezone=Europe/Berlin&lang=fr
 # The JSON file must be argv[1]

 import json
 import sys

 class City():
    token_stats = {}

    @staticmethod
    def _sanitize_token(token):
        if token[:2] in ["D'", "L'"]:
            return token.replace("D'", "").replace("L'", "")
        return token

    @staticmethod
    def _trash_bad_token(token):
        trash = [
            "ARRONDISSEMENT",
            "AU",
            "AUX",
            "DE",
            "DES",
            "DU",
            "EN",
            "ET",
            "LA",
            "LE",
            "LES",
            "SAINT",
            "SAINTE",
            "SAINTES",
            "SOUS",
            "SUR",
            ]
        if token in trash:
            return None
        return token

    @staticmethod
    def _account_token(token):
        try:
            City.token_stats[token] += 1
        except:
            City.token_stats[token] = 1
        return token

    @staticmethod
    def _name_to_token(name, hooks):
        token_1 = name.split("-")
        token = []
        for tok_1 in token_1:
            token_2 = tok_1.split(" ")
            for tok_2 in token_2:
                for hook in hooks:
                    if tok_2 is None:
                        break
                    tok_2 = hook(tok_2)
                if tok_2 is not None:
                    token.append(tok_2)
        return token

    def __init__(self, name, coords):
        self.name = name
        self.token = City._name_to_token(name, [City._trash_bad_token, City._sanitize_token, City._account_token])
        self.coords = coords

    def __repr__(self):
        return f"{self.coords[0]},{self.coords[1]} {{{self.name}}}"

 with open(sys.argv[1]) as f:
    data = json.loads(f.read())

 cities = []
 for ville in data:
    city = City(ville["nom_comm"], ville["geo_point_2d"])
    cities.append(city)

 SORT = {k: v for k, v in sorted(City.token_stats.items(), key=lambda item: item[1], reverse=True)}
 for (k, v), _ in zip(SORT.items(), range(15)):
    with open(f"{v}-{k}", "w") as f:
        for city in cities:
            if k in city.token:
                f.write(repr(city))
                f.write("\n")
	#!/usr/bin/env python3

	# Based on https://public.opendatasoft.com/explore/dataset/correspondance-code-insee-code-postal/download/?format=json&timezone=Europe/Berlin&lang=fr
	# The JSON file must be argv[1]

	import json
	import sys

	class City():
	token_stats = {}

	@staticmethod
	def _sanitize_token(token):
	if token[:2] in ["D'", "L'"]:
	return token.replace("D'", "").replace("L'", "")
	return token

	@staticmethod
	def _trash_bad_token(token):
	trash = [
	"ARRONDISSEMENT",
	"AU",
	"AUX",
	"DE",
	"DES",
	"DU",
	"EN",
	"ET",
	"LA",
	"LE",
	"LES",
	"SAINT",
	"SAINTE",
	"SAINTES",
	"SOUS",
	"SUR",
	]
	if token in trash:
	return None
	return token

	@staticmethod
	def _account_token(token):
	try:
	City.token_stats[token] += 1
	except:
	City.token_stats[token] = 1
	return token

	@staticmethod
	def _name_to_token(name, hooks):
	token_1 = name.split("-")
	token = []
	for tok_1 in token_1:
	token_2 = tok_1.split(" ")
	for tok_2 in token_2:
	for hook in hooks:
	if tok_2 is None:
	break
	tok_2 = hook(tok_2)
	if tok_2 is not None:
	token.append(tok_2)
	return token

	def __init__(self, name, coords):
	self.name = name
	self.token = City._name_to_token(name, [City._trash_bad_token, City._sanitize_token, City._account_token])
	self.coords = coords

	def __repr__(self):
	return f"{self.coords[0]},{self.coords[1]} {{{self.name}}}"

	with open(sys.argv[1]) as f:
	data = json.loads(f.read())

	cities = []
	for ville in data:
	city = City(ville["nom_comm"], ville["geo_point_2d"])
	cities.append(city)

	SORT = {k: v for k, v in sorted(City.token_stats.items(), key=lambda item: item[1], reverse=True)}
	for (k, v), _ in zip(SORT.items(), range(15)):
	with open(f"{v}-{k}", "w") as f:
	for city in cities:
	if k in city.token:
	f.write(repr(city))
	f.write("\n")