Created
November 30, 2017 02:27
-
-
Save stevenhao/07e9180934dffe4fec25316ae2659993 to your computer and use it in GitHub Desktop.
WIP Distribution sort
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # 6.851 Coding Problem: Distribution Sort | |
| """ Prompt: | |
| Implement external-memory distribution sort and/or cache-oblivious distribution sort, possibly with a simpler pivot rule (e.g. random). | |
| How do they compare to quicksort? | |
| """ | |
| class Array: | |
| """ Array class for fixed-size arrays that supports constant-time slicing, which returns an "iterator"-like handle that points to the same memory location. | |
| This is in contrast with standard python slices, which do copy the array elements | |
| """ | |
| def __init__(self, array, offset=0, length=None): | |
| self.array = array | |
| self.offset = offset | |
| self.length = length if length != None else len(array) - offset | |
| self.nextIndex = 0 | |
| def __getitem__(self, i): | |
| return self.array[i + self.offset] | |
| def __setitem__(self, i, val): | |
| self.array[i + self.offset] = val | |
| def check_inbounds(self, i): | |
| if not(i < self.length and i + self.offset < len(self.array)): | |
| raise Exception('IndexError', i) | |
| def slice(self, i, j): | |
| self.check_inbounds(i) | |
| self.check_inbounds(j - 1) | |
| return Array(self.array, self.offset + i, j - i) | |
| def hasNext(self): | |
| return self.nextIndex < self.length | |
| def peek(self): | |
| return self.__getitem__(self.nextIndex) | |
| def __len__(self): | |
| return self.length | |
| def __str__(self): | |
| return str(self.array[self.offset:self.offset+self.length]) | |
| def next(self): | |
| res = self.peek() | |
| self.nextIndex += 1 | |
| return res | |
| import math | |
| class DistributionSort(): | |
| def __init__(self, array): | |
| self.array = array | |
| self.buckets = [] | |
| self.bnums = [] | |
| self.n = len(array) | |
| self.subarrays = [] | |
| def copy_elems(self, i, j): | |
| while self.subarrays[i].hasNext() and self.subarrays[i].peek() < self.buckets[j].pivot: | |
| self.buckets[j].append(self.subarrays[i].next()) | |
| def distribute(self, i, j, m): | |
| if m == 1: | |
| self.copy_elems (i, j) | |
| else: | |
| self.distribute(i, j, m/2) | |
| self.distribute(i + m/2, j, m/2) | |
| self.distribute(i, j + m/2, m/2) | |
| self.distribute(i + m/2, j + m/2, m/2) | |
| def distribution_sort(self): | |
| self.distribute(0, self.n, 0) | |
| def partition(self): | |
| sqrtn = math.ceil(math.sqrt(self.n)) | |
| cur = [] | |
| for i in self.array: | |
| cur.append(i) | |
| if len(cur) == sqrtn: | |
| self.subarrays.append(cur) | |
| cur = [] | |
| if len(cur) > 0: | |
| self.subarrays.append(cur) | |
| def sort(self): | |
| self.partition() | |
| # recursively sort subarrays | |
| new_subarrays = [] | |
| for subarray in self.subarrays: | |
| ds = DistributionSort(subarray) | |
| new_subarrays.append(ds.sort()) | |
| self.subarrays = new_subarrays | |
| self.distribution_sort() | |
| # recursively sort buckets | |
| array_index = 0 | |
| for bucket in self.buckets: | |
| ds = DistributionSort(bucket) | |
| sorted_bucket = ds.sort() | |
| for i in sorted_bucket: | |
| self.array[array_index] = i | |
| array_index += 1 | |
| return self.array | |
| if __name__ == '__main__': | |
| from random import shuffle | |
| ar = Array(range(100)) | |
| shuffle(ar) | |
| print 'input', ar | |
| # ar_sorted = DistributionSort(ar).sort() | |
| # print 'output', ar_sorted |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment