-
-
Save bee-san/f6ccc000ab6527769999fd0a9ebf59de to your computer and use it in GitHub Desktop.
# based off of this code https://gist.github.com/nandajavarma/a3a6b62f34e74ec4c31674934327bbd3 | |
# Brandon Skerritt | |
# https://skerritt.tech | |
def binary_search(the_array, item, start, end): | |
if start == end: | |
if the_array[start] > item: | |
return start | |
else: | |
return start + 1 | |
if start > end: | |
return start | |
mid = round((start + end)/ 2) | |
if the_array[mid] < item: | |
return binary_search(the_array, item, mid + 1, end) | |
elif the_array[mid] > item: | |
return binary_search(the_array, item, start, mid - 1) | |
else: | |
return mid | |
""" | |
Insertion sort that timsort uses if the array size is small or if | |
the size of the "run" is small | |
""" | |
def insertion_sort(the_array): | |
l = len(the_array) | |
for index in range(1, l): | |
value = the_array[index] | |
pos = binary_search(the_array, value, 0, index - 1) | |
the_array = the_array[:pos] + [value] + the_array[pos:index] + the_array[index+1:] | |
return the_array | |
def merge(left, right): | |
"""Takes two sorted lists and returns a single sorted list by comparing the | |
elements one at a time. | |
[1, 2, 3, 4, 5, 6] | |
""" | |
if not left: | |
return right | |
if not right: | |
return left | |
if left[0] < right[0]: | |
return [left[0]] + merge(left[1:], right) | |
return [right[0]] + merge(left, right[1:]) | |
def timsort(the_array): | |
runs, sorted_runs = [], [] | |
length = len(the_array) | |
new_run = [the_array[0]] | |
# for every i in the range of 1 to length of array | |
for i in range(1, length): | |
# if i is at the end of the list | |
if i == length - 1: | |
new_run.append(the_array[i]) | |
runs.append(new_run) | |
break | |
# if the i'th element of the array is less than the one before it | |
if the_array[i] < the_array[i-1]: | |
# if new_run is set to None (NULL) | |
if not new_run: | |
runs.append([the_array[i]]) | |
new_run.append(the_array[i]) | |
else: | |
runs.append(new_run) | |
new_run = [] | |
# else if its equal to or more than | |
else: | |
new_run.append(the_array[i]) | |
# for every item in runs, append it using insertion sort | |
for item in runs: | |
sorted_runs.append(insertion_sort(item)) | |
# for every run in sorted_runs, merge them | |
sorted_array = [] | |
for run in sorted_runs: | |
sorted_array = merge(sorted_array, run) | |
print(sorted_array) | |
timsort([2, 3, 1, 5, 6, 7]) |
I caculate the time and it's slower than insertion_sort and merge_sort?
I was having some issues when sorting where integers were being duplicated, based off of the above code you wrote.
Looking things over, I noticed that the issue was potentially coming from insertion_sort and therefore went ahead and rewrote it. Here's the code snippet for anyone interested or having similar issues:
def insertion_sort(the_array):
l = len(the_array)
for index in range(1, l):
value = the_array[index]
while index>0 and the_array[index-1]>value:
the_array[index] = the_array[index-1]
index = index-1
the_array[index] = value
return the_array
Hi. I have made all the changes listed above, but when I run this on an array of random number of length 5000+ I get a recursion error.
It identifies line 47 as the problem.
Anybody got any ideas how to fix it?
It looks like the runs are not being allowed to grow to a large enough size so the merge element has way too much work to do.
For example, I had 2461 runs for an array of 5000 elements.
@ConorHogan, use this in your timsort method:: sys.setrecursionlimit(max(sys.getrecursionlimit(), len(the_array)))
@avmarchenko, I think you are right. new_run =[] (in line 70) would result in missing elements in the final sorted array. For example, timsort([2, 3, 1, 5, 6, 7]) would output [2, 3, 5, 6, 7].