Skip to content

Instantly share code, notes, and snippets.

@stevenhao
Created November 30, 2017 02:27
Show Gist options
  • Save stevenhao/07e9180934dffe4fec25316ae2659993 to your computer and use it in GitHub Desktop.
Save stevenhao/07e9180934dffe4fec25316ae2659993 to your computer and use it in GitHub Desktop.
WIP Distribution sort
# 6.851 Coding Problem: Distribution Sort
""" Prompt:
Implement external-memory distribution sort and/or cache-oblivious distribution sort, possibly with a simpler pivot rule (e.g. random).
How do they compare to quicksort?
"""
class Array:
""" Array class for fixed-size arrays that supports constant-time slicing, which returns an "iterator"-like handle that points to the same memory location.
This is in contrast with standard python slices, which do copy the array elements
"""
def __init__(self, array, offset=0, length=None):
self.array = array
self.offset = offset
self.length = length if length != None else len(array) - offset
self.nextIndex = 0
def __getitem__(self, i):
return self.array[i + self.offset]
def __setitem__(self, i, val):
self.array[i + self.offset] = val
def check_inbounds(self, i):
if not(i < self.length and i + self.offset < len(self.array)):
raise Exception('IndexError', i)
def slice(self, i, j):
self.check_inbounds(i)
self.check_inbounds(j - 1)
return Array(self.array, self.offset + i, j - i)
def hasNext(self):
return self.nextIndex < self.length
def peek(self):
return self.__getitem__(self.nextIndex)
def __len__(self):
return self.length
def __str__(self):
return str(self.array[self.offset:self.offset+self.length])
def next(self):
res = self.peek()
self.nextIndex += 1
return res
import math
class DistributionSort():
def __init__(self, array):
self.array = array
self.buckets = []
self.bnums = []
self.n = len(array)
self.subarrays = []
def copy_elems(self, i, j):
while self.subarrays[i].hasNext() and self.subarrays[i].peek() < self.buckets[j].pivot:
self.buckets[j].append(self.subarrays[i].next())
def distribute(self, i, j, m):
if m == 1:
self.copy_elems (i, j)
else:
self.distribute(i, j, m/2)
self.distribute(i + m/2, j, m/2)
self.distribute(i, j + m/2, m/2)
self.distribute(i + m/2, j + m/2, m/2)
def distribution_sort(self):
self.distribute(0, self.n, 0)
def partition(self):
sqrtn = math.ceil(math.sqrt(self.n))
cur = []
for i in self.array:
cur.append(i)
if len(cur) == sqrtn:
self.subarrays.append(cur)
cur = []
if len(cur) > 0:
self.subarrays.append(cur)
def sort(self):
self.partition()
# recursively sort subarrays
new_subarrays = []
for subarray in self.subarrays:
ds = DistributionSort(subarray)
new_subarrays.append(ds.sort())
self.subarrays = new_subarrays
self.distribution_sort()
# recursively sort buckets
array_index = 0
for bucket in self.buckets:
ds = DistributionSort(bucket)
sorted_bucket = ds.sort()
for i in sorted_bucket:
self.array[array_index] = i
array_index += 1
return self.array
if __name__ == '__main__':
from random import shuffle
ar = Array(range(100))
shuffle(ar)
print 'input', ar
# ar_sorted = DistributionSort(ar).sort()
# print 'output', ar_sorted
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment