-
-
Save bee-san/f6ccc000ab6527769999fd0a9ebf59de to your computer and use it in GitHub Desktop.
| # based off of this code https://gist.github.com/nandajavarma/a3a6b62f34e74ec4c31674934327bbd3 | |
| # Brandon Skerritt | |
| # https://skerritt.tech | |
| def binary_search(the_array, item, start, end): | |
| if start == end: | |
| if the_array[start] > item: | |
| return start | |
| else: | |
| return start + 1 | |
| if start > end: | |
| return start | |
| mid = round((start + end)/ 2) | |
| if the_array[mid] < item: | |
| return binary_search(the_array, item, mid + 1, end) | |
| elif the_array[mid] > item: | |
| return binary_search(the_array, item, start, mid - 1) | |
| else: | |
| return mid | |
| """ | |
| Insertion sort that timsort uses if the array size is small or if | |
| the size of the "run" is small | |
| """ | |
| def insertion_sort(the_array): | |
| l = len(the_array) | |
| for index in range(1, l): | |
| value = the_array[index] | |
| pos = binary_search(the_array, value, 0, index - 1) | |
| the_array = the_array[:pos] + [value] + the_array[pos:index] + the_array[index+1:] | |
| return the_array | |
| def merge(left, right): | |
| """Takes two sorted lists and returns a single sorted list by comparing the | |
| elements one at a time. | |
| [1, 2, 3, 4, 5, 6] | |
| """ | |
| if not left: | |
| return right | |
| if not right: | |
| return left | |
| if left[0] < right[0]: | |
| return [left[0]] + merge(left[1:], right) | |
| return [right[0]] + merge(left, right[1:]) | |
| def timsort(the_array): | |
| runs, sorted_runs = [], [] | |
| length = len(the_array) | |
| new_run = [the_array[0]] | |
| # for every i in the range of 1 to length of array | |
| for i in range(1, length): | |
| # if i is at the end of the list | |
| if i == length - 1: | |
| new_run.append(the_array[i]) | |
| runs.append(new_run) | |
| break | |
| # if the i'th element of the array is less than the one before it | |
| if the_array[i] < the_array[i-1]: | |
| # if new_run is set to None (NULL) | |
| if not new_run: | |
| runs.append([the_array[i]]) | |
| new_run.append(the_array[i]) | |
| else: | |
| runs.append(new_run) | |
| new_run = [] | |
| # else if its equal to or more than | |
| else: | |
| new_run.append(the_array[i]) | |
| # for every item in runs, append it using insertion sort | |
| for item in runs: | |
| sorted_runs.append(insertion_sort(item)) | |
| # for every run in sorted_runs, merge them | |
| sorted_array = [] | |
| for run in sorted_runs: | |
| sorted_array = merge(sorted_array, run) | |
| print(sorted_array) | |
| timsort([2, 3, 1, 5, 6, 7]) |
I caculate the time and it's slower than insertion_sort and merge_sort?
I was having some issues when sorting where integers were being duplicated, based off of the above code you wrote.
Looking things over, I noticed that the issue was potentially coming from insertion_sort and therefore went ahead and rewrote it. Here's the code snippet for anyone interested or having similar issues:
def insertion_sort(the_array):
l = len(the_array)
for index in range(1, l):
value = the_array[index]
while index>0 and the_array[index-1]>value:
the_array[index] = the_array[index-1]
index = index-1
the_array[index] = value
return the_array
Hi. I have made all the changes listed above, but when I run this on an array of random number of length 5000+ I get a recursion error.
It identifies line 47 as the problem.
Anybody got any ideas how to fix it?
It looks like the runs are not being allowed to grow to a large enough size so the merge element has way too much work to do.
For example, I had 2461 runs for an array of 5000 elements.
@ConorHogan, use this in your timsort method:: sys.setrecursionlimit(max(sys.getrecursionlimit(), len(the_array)))
@avmarchenko, I think you are right. new_run =[] (in line 70) would result in missing elements in the final sorted array. For example, timsort([2, 3, 1, 5, 6, 7]) would output [2, 3, 5, 6, 7].