Last active
February 22, 2023 20:59
-
-
Save TimSC/7be7cb2efefaf908e6e9c23c55b0fe80 to your computer and use it in GitHub Desktop.
A script to group delivery areas into routes of roughly similar size.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A script to group delivery areas into routes of roughly similar size. | |
import csv | |
from sklearn import cluster | |
from pyproj import Proj, transform | |
import numpy as np | |
from matplotlib import pyplot as plt | |
class EqualWeightClustering(object): | |
# A greedy clustering approach | |
def __init__(self, n_clusters): | |
self.n_clusters = int(n_clusters) | |
self.uneven_weight_cost = 1.5 | |
self.n_restarts = 10 | |
def fit(self, samples, sample_weight=None): | |
samples = np.array(samples) | |
if sample_weight is None: | |
sample_weight = np.ones(sample.shape[0]) | |
else: | |
sample_weight = np.array(sample_weight) | |
final_assignment = None | |
final_cost = None | |
for restart_num in range(self.n_restarts): | |
print ("restart", restart_num, "of", self.n_restarts) | |
assignment = np.array(np.random.rand(samples.shape[0]) * self.n_clusters, dtype=np.uint16) | |
self.best_fit = assignment | |
self.best_cost = self.cost_func(samples, sample_weight, assignment) | |
#print ("initial cost", self.best_cost) | |
prev_score = None | |
prev_best = None | |
iter_score = None | |
iter_best = None | |
while prev_score is None or iter_score < prev_score: | |
prev_score = iter_score | |
prev_best = iter_best | |
iter_best = None | |
iter_score = None | |
#Try various changes to assignments and see what is best | |
for i in range(assignment.shape[0]): | |
candidate_assignment = assignment.copy() | |
for j in range(self.n_clusters): | |
if j == candidate_assignment[i]: continue | |
candidate_assignment[i] = j | |
iter_cost = self.cost_func(samples, sample_weight, candidate_assignment) | |
#print (i, j, candidate_assignment, iter_cost) | |
if iter_score is None or iter_cost < iter_score: | |
iter_score = iter_cost | |
iter_best = candidate_assignment.copy() | |
print ("iter_score", iter_score) | |
assignment = iter_best | |
if final_cost is None or iter_score < final_cost: | |
final_cost = iter_score | |
final_assignment = iter_best | |
if prev_score < final_cost: | |
final_cost = prev_score | |
final_assignment = prev_best | |
print ("final_cost", final_cost) | |
return final_assignment | |
def cost_func(self, samples, sample_weight, assignment): | |
total_cost = 0.0 | |
for c in range(self.n_clusters): | |
cmask = assignment == c | |
csamples = samples[cmask] | |
cweights = sample_weight[cmask] | |
#Calc cluster centre | |
totalWeight = np.sum(cweights) | |
if csamples.shape[0] > 0 and totalWeight > 0.0: | |
r = np.average(csamples, axis=0, weights=cweights) | |
else: | |
r = np.zeros(csamples.shape[1]) | |
#Calc distances from centre | |
if csamples.shape[0] > 0: | |
cost = np.sum(np.power(np.sum(np.power(csamples - r, 2.0), axis=1), 0.5)) | |
else: | |
cost = 0.0 | |
total_cost += cost | |
average_weight = np.sum(sample_weight) / self.n_clusters | |
for c in range(self.n_clusters): | |
cmask = assignment == c | |
cweights = sample_weight[cmask] | |
cost = np.abs(self.uneven_weight_cost * (np.sum(cweights) - average_weight)) | |
total_cost += cost | |
return total_cost | |
def ClusterZone(data, areaCode="IA", n_clusters=4): | |
#kmeans = cluster.KMeans(n_clusters=n_clusters, algorithm='lloyd') | |
kmeans = EqualWeightClustering(n_clusters=n_clusters) | |
deliveriesCollect = [] | |
posCollect = [] | |
inProj = Proj(init='epsg:4326') | |
outProj = Proj(init='epsg:27700') | |
processedRows = [] | |
for li in data: | |
if li['Area'] != areaCode: continue | |
try: | |
deliveries = int(li['Deliveries (estimate)']) | |
except ValueError: | |
print (0, pos, None) | |
deliveriesCollect.append(0) | |
posCollect.append((0, 0)) | |
processedRows.append(li) | |
continue | |
pos = list(map(float, li['Lat/Lon'].split(","))) | |
x2,y2 = transform(inProj,outProj,pos[1],pos[0]) | |
print (deliveries, pos, (x2, y2)) | |
deliveriesCollect.append(deliveries) | |
posCollect.append((x2,y2)) | |
processedRows.append(li) | |
print (deliveriesCollect) | |
model = kmeans.fit(posCollect, sample_weight = deliveriesCollect) | |
predicted = model | |
#predicted = model.predict(posCollect, sample_weight = deliveriesCollect) | |
#predicted = model.predict(posCollect) | |
if 0: | |
for i in range(n_clusters): | |
filteredXY = np.array(posCollect)[predicted == i] | |
filteredWeights = np.array(deliveriesCollect)[predicted == i] | |
plt.scatter(filteredXY[:,0] , filteredXY[:,1], label="Route {}".format(i)) | |
print ("route", i, filteredWeights.sum()) | |
out = csv.writer(open("out.csv", "wt")) | |
out.writerow(predicted) | |
del out | |
for pred, row in zip(predicted, data): | |
print ("{}\t{}".format(pred, row['Address Range'], row['Deliveries (estimate)'])) | |
plt.legend() | |
plt.show() | |
return predicted, processedRows | |
if __name__=="__main__": | |
data = list(csv.DictReader(open("/home/tim/Desktop/NelsonRoads.csv", "rt"))) | |
overall_n_clusters=34 | |
areaTotals = {} | |
for li in data: | |
areaCode = li['Area'] | |
try: | |
numDeliveries = int(li['Deliveries (estimate)']) | |
except ValueError: | |
numDeliveries = None | |
if numDeliveries is not None: | |
if areaCode in areaTotals: | |
areaTotals[areaCode] += numDeliveries | |
else: | |
areaTotals[areaCode] = numDeliveries | |
#Scale area deliveies to number of routes | |
out = csv.writer(open("out.csv", "wt")) | |
out.writerow(list(data[0].keys())+["route"]) | |
totalDeliveries = sum(areaTotals.values()) | |
totalRoutesGen = 0 | |
for areaCode in areaTotals: | |
#if areaCode != "ID": continue | |
numRoutes = int(round(areaTotals[areaCode] * overall_n_clusters / totalDeliveries)) | |
predicted, processedRows = ClusterZone(data, areaCode=areaCode, n_clusters=numRoutes) | |
for pred, row in zip(predicted, processedRows): | |
rowMod = list(row.values()) + [pred+totalRoutesGen+1] | |
out.writerow(rowMod) | |
totalRoutesGen += numRoutes | |
print (areaCode, numRoutes, totalRoutesGen) | |
del out |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment