Last active
August 15, 2021 10:28
-
-
Save bee-san/f6ccc000ab6527769999fd0a9ebf59de to your computer and use it in GitHub Desktop.
An Python implementation of Timsort
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# based off of this code https://gist.github.com/nandajavarma/a3a6b62f34e74ec4c31674934327bbd3 | |
# Brandon Skerritt | |
# https://skerritt.tech | |
def binary_search(the_array, item, start, end): | |
if start == end: | |
if the_array[start] > item: | |
return start | |
else: | |
return start + 1 | |
if start > end: | |
return start | |
mid = round((start + end)/ 2) | |
if the_array[mid] < item: | |
return binary_search(the_array, item, mid + 1, end) | |
elif the_array[mid] > item: | |
return binary_search(the_array, item, start, mid - 1) | |
else: | |
return mid | |
""" | |
Insertion sort that timsort uses if the array size is small or if | |
the size of the "run" is small | |
""" | |
def insertion_sort(the_array): | |
l = len(the_array) | |
for index in range(1, l): | |
value = the_array[index] | |
pos = binary_search(the_array, value, 0, index - 1) | |
the_array = the_array[:pos] + [value] + the_array[pos:index] + the_array[index+1:] | |
return the_array | |
def merge(left, right): | |
"""Takes two sorted lists and returns a single sorted list by comparing the | |
elements one at a time. | |
[1, 2, 3, 4, 5, 6] | |
""" | |
if not left: | |
return right | |
if not right: | |
return left | |
if left[0] < right[0]: | |
return [left[0]] + merge(left[1:], right) | |
return [right[0]] + merge(left, right[1:]) | |
def timsort(the_array): | |
runs, sorted_runs = [], [] | |
length = len(the_array) | |
new_run = [the_array[0]] | |
# for every i in the range of 1 to length of array | |
for i in range(1, length): | |
# if i is at the end of the list | |
if i == length - 1: | |
new_run.append(the_array[i]) | |
runs.append(new_run) | |
break | |
# if the i'th element of the array is less than the one before it | |
if the_array[i] < the_array[i-1]: | |
# if new_run is set to None (NULL) | |
if not new_run: | |
runs.append([the_array[i]]) | |
new_run.append(the_array[i]) | |
else: | |
runs.append(new_run) | |
new_run = [] | |
# else if its equal to or more than | |
else: | |
new_run.append(the_array[i]) | |
# for every item in runs, append it using insertion sort | |
for item in runs: | |
sorted_runs.append(insertion_sort(item)) | |
# for every run in sorted_runs, merge them | |
sorted_array = [] | |
for run in sorted_runs: | |
sorted_array = merge(sorted_array, run) | |
print(sorted_array) | |
timsort([2, 3, 1, 5, 6, 7]) |
Hi. I have made all the changes listed above, but when I run this on an array of random number of length 5000+ I get a recursion error.
It identifies line 47 as the problem.
Anybody got any ideas how to fix it?
It looks like the runs are not being allowed to grow to a large enough size so the merge element has way too much work to do.
For example, I had 2461 runs for an array of 5000 elements.
@ConorHogan, use this in your timsort method:: sys.setrecursionlimit(max(sys.getrecursionlimit(), len(the_array)))
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I was having some issues when sorting where integers were being duplicated, based off of the above code you wrote.
Looking things over, I noticed that the issue was potentially coming from insertion_sort and therefore went ahead and rewrote it. Here's the code snippet for anyone interested or having similar issues: