Created
February 17, 2017 19:47
-
-
Save nandajavarma/a3a6b62f34e74ec4c31674934327bbd3 to your computer and use it in GitHub Desktop.
Timsort implementation using Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/lib/python | |
# -*- coding: utf-8 -*- | |
# | |
# This is a re-implementation of Python's timsort in Python | |
# itself. This is purely for learning purposes. :) | |
# References: [ | |
# https://en.wikipedia.org/wiki/Timsort, | |
# http://wiki.c2.com/?TimSort | |
# ] | |
# | |
# Copyright 2017 Nandaja Varma <[email protected]> | |
# | |
# This program is free software; you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation; either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU Library General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this program; if not, write to the Free Software | |
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | |
# 02110-1301, | |
# USA. | |
def binary_search(the_array, item, start, end): | |
if start == end: | |
if the_array[start] > item: | |
return start | |
else: | |
return start + 1 | |
if start > end: | |
return start | |
mid = (start + end)/ 2 | |
if the_array[mid] < item: | |
return binary_search(the_array, item, mid + 1, end) | |
elif the_array[mid] > item: | |
return binary_search(the_array, item, start, mid - 1) | |
else: | |
return mid | |
""" | |
Insertion sort that the heap sort uses if the array size is small or if | |
the size of the "run" is small | |
""" | |
def insertion_sort(the_array): | |
l = len(the_array) | |
for index in range(1, l): | |
value = the_array[index] | |
pos = binary_search(the_array, value, 0, index - 1) | |
the_array = the_array[:pos] + [value] + the_array[pos:index] + the_array[index+1:] | |
return the_array | |
def merge(left, right): | |
"""Takes two sorted lists and returns a single sorted list by comparing the | |
elements one at a time. | |
[1, 2, 3, 4, 5, 6] | |
""" | |
if not left: | |
return right | |
if not right: | |
return left | |
if left[0] < right[0]: | |
return [left[0]] + merge(left[1:], right) | |
return [right[0]] + merge(left, right[1:]) | |
def timsort(the_array): | |
runs, sorted_runs = [], [] | |
l = len(the_array) | |
new_run = [the_array[0]] | |
for i in range(1, l): | |
if i == l-1: | |
new_run.append(the_array[i]) | |
runs.append(new_run) | |
break | |
if the_array[i] < the_array[i-1]: | |
if not new_run: | |
runs.append([the_array[i-1]]) | |
new_run.append(the_array[i]) | |
else: | |
runs.append(new_run) | |
new_run = [] | |
else: | |
new_run.append(the_array[i]) | |
for each in runs: | |
sorted_runs.append(insertion_sort(each)) | |
sorted_array = [] | |
for run in sorted_runs: | |
sorted_array = merge(sorted_array, run) | |
print sorted_array |
I think that line 90 should be new_run = [the_array[i]]
, this will not remove any values when divide the array into runs I guess.
This code work on both python2 and python3. Please use mid = int((start + end)/ 2)
or mid = (start + end) // 2
instead of mid = (start + end)/ 2
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@6r1d: I haven't worked through that code entirely, but it does not find existing runs or reverse them before performing insertion sort. I believe this is required for Timsort.