-
-
Save nandajavarma/a3a6b62f34e74ec4c31674934327bbd3 to your computer and use it in GitHub Desktop.
| #!/usr/lib/python | |
| # -*- coding: utf-8 -*- | |
| # | |
| # This is a re-implementation of Python's timsort in Python | |
| # itself. This is purely for learning purposes. :) | |
| # References: [ | |
| # https://en.wikipedia.org/wiki/Timsort, | |
| # http://wiki.c2.com/?TimSort | |
| # ] | |
| # | |
| # Copyright 2017 Nandaja Varma <[email protected]> | |
| # | |
| # This program is free software; you can redistribute it and/or modify | |
| # it under the terms of the GNU General Public License as published by | |
| # the Free Software Foundation; either version 3 of the License, or | |
| # (at your option) any later version. | |
| # | |
| # This program is distributed in the hope that it will be useful, | |
| # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
| # GNU Library General Public License for more details. | |
| # | |
| # You should have received a copy of the GNU General Public License | |
| # along with this program; if not, write to the Free Software | |
| # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | |
| # 02110-1301, | |
| # USA. | |
| def binary_search(the_array, item, start, end): | |
| if start == end: | |
| if the_array[start] > item: | |
| return start | |
| else: | |
| return start + 1 | |
| if start > end: | |
| return start | |
| mid = (start + end)/ 2 | |
| if the_array[mid] < item: | |
| return binary_search(the_array, item, mid + 1, end) | |
| elif the_array[mid] > item: | |
| return binary_search(the_array, item, start, mid - 1) | |
| else: | |
| return mid | |
| """ | |
| Insertion sort that the heap sort uses if the array size is small or if | |
| the size of the "run" is small | |
| """ | |
| def insertion_sort(the_array): | |
| l = len(the_array) | |
| for index in range(1, l): | |
| value = the_array[index] | |
| pos = binary_search(the_array, value, 0, index - 1) | |
| the_array = the_array[:pos] + [value] + the_array[pos:index] + the_array[index+1:] | |
| return the_array | |
| def merge(left, right): | |
| """Takes two sorted lists and returns a single sorted list by comparing the | |
| elements one at a time. | |
| [1, 2, 3, 4, 5, 6] | |
| """ | |
| if not left: | |
| return right | |
| if not right: | |
| return left | |
| if left[0] < right[0]: | |
| return [left[0]] + merge(left[1:], right) | |
| return [right[0]] + merge(left, right[1:]) | |
| def timsort(the_array): | |
| runs, sorted_runs = [], [] | |
| l = len(the_array) | |
| new_run = [the_array[0]] | |
| for i in range(1, l): | |
| if i == l-1: | |
| new_run.append(the_array[i]) | |
| runs.append(new_run) | |
| break | |
| if the_array[i] < the_array[i-1]: | |
| if not new_run: | |
| runs.append([the_array[i-1]]) | |
| new_run.append(the_array[i]) | |
| else: | |
| runs.append(new_run) | |
| new_run = [] | |
| else: | |
| new_run.append(the_array[i]) | |
| for each in runs: | |
| sorted_runs.append(insertion_sort(each)) | |
| sorted_array = [] | |
| for run in sorted_runs: | |
| sorted_array = merge(sorted_array, run) | |
| print sorted_array |
I wrote up this Gist, which is the code as OP posted with PenguinLiam's suggestions
https://gist.github.com/bee-san/f6ccc000ab6527769999fd0a9ebf59de
@PenguinLiam Actually to make the code work on both python2 and python3, don't use round method, using // instead:
mid = (start + end) // 2
This is not Timsort! Timsort uses a stack with specific constraints on the top entries to decide which runs to merge and when.
@6r1d: I haven't worked through that code entirely, but it does not find existing runs or reverse them before performing insertion sort. I believe this is required for Timsort.
I think that line 90 should be new_run = [the_array[i]], this will not remove any values when divide the array into runs I guess.
This code work on both python2 and python3. Please use mid = int((start + end)/ 2) or mid = (start + end) // 2 instead of mid = (start + end)/ 2
Using your algorithm I found a few flaws. First off, when identifying which is the mid value in binary_search, a round function should be used as mid can be a float otherwise and lists don't use floats for indexes. Second which is little confusing, it removed on average about 20% of the values from the list when sorting it. I found the problem to be in the section placing items into arrays. The solution I found was to remove this line:
runs.append([the_array[i-1]])because it added duplicate items to the runs array, and add this line:
runs.append([the_array[i]])to add an item to the runs array if new_run isnt empty