Skip to content

Instantly share code, notes, and snippets.

@viniciusmss
Created January 25, 2018 09:28
Show Gist options
  • Save viniciusmss/d3687a0886305c0746565414e23945f7 to your computer and use it in GitHub Desktop.
Save viniciusmss/d3687a0886305c0746565414e23945f7 to your computer and use it in GitHub Desktop.
CS110 - Assignment 1
def merge(arr, l, mids, r):
'''
Merge is a subprocedure of mergeSort, declared below. It
merges k sorted subarrays into a sorted array. It does so by
iterating continuously over all subarrays, finding the smallest key,
and relocating it to the right position of the outcome array.
Inputs:
arr (list) The entire array to be sorted.
l, r (int) The left and right indexes that define the specific
section of the array that should be sorted in the current
routine of merge().
mids (list) The mid-points in the array arr[l, r] that define
the sorted subarrays that need to be merged. The length of
the list + 1 represents the "k" of the merge sort. For example,
if there are two mid points, the procedure is a 3-way merge.
'''
# Create a list of the size of the subarrays.
sizes = [mids[0] - l + 1]
for mid_index in range(1, len(mids)):
sizes.append(mids[mid_index] - mids[mid_index - 1])
sizes.append(r - mids[-1])
# Create temporary arrays with the appropriate sizes.
temp_arrays = [[0]*sizes[i] for i in range(len(sizes))]
# Copy the keys into the temporary arrays.
for i in range(0, sizes[0]):
temp_arrays[0][i] = arr[l + i]
for index_array in range(1, len(temp_arrays)):
for index_key in range(0, sizes[index_array]):
temp_arrays[index_array][index_key] = arr[mids[index_array - 1] + index_key + 1]
# Append a sentinel to the end of the temporary arrays.
for array in temp_arrays:
array.append(float("inf"))
# Create an array of indices to store the index of the
# next key of each subarray to be evaluated.
temp_indices = [0 for i in range(len(temp_arrays))]
for key in range(l, r+1):
# Start with a guess: the first subarray contains the next sorted key
this_min = temp_arrays[0][temp_indices[0]]
arr[key] = this_min
temp_indices[0] += 1
index_last_array = 0
# Check the guess, adding or subtracing from array indices as needed
for array_index in range(1, len(temp_arrays)):
this_key = temp_arrays[array_index][temp_indices[array_index]]
if this_key < this_min:
this_min = this_key
temp_indices[array_index] += 1
temp_indices[index_last_array] -= 1
index_last_array = array_index
# Put the current smallest element at the right part of the array
arr[key] = this_min
def mergeSort(arr, l, r, k = 2):
'''
k-way merge sort solves the sorting problem recursively by calling itself
on k subproblems of smaller size by an order of k, approximately. Its base
case is a unitary list, which is trivially sorted, when the merging procedure
returns. The merge procedure is then called to merge the sorted subarrays.
Inputs:
arr (list) The entire array to be sorted.
l, r (int) The left and right indexes that define the specific
section of the array that should be sorted in the current
routine of mergeSort().
k (int) The number of subproblems that the procedure should create.
As k increases, the recursion tree gains breadth and becomes shorter.
Default: 2.
'''
# Calculate the largest well-defined k-merge for the input array and indices
largest_k = r - l + 1
if largest_k <= 1:
return
else:
# If the user-defined k is larger than the size of the array,
# set k to the default.
if largest_k < k:
k = 2
# Calculate the relevant mid points.
mid_points = [((_*(r - l)) // k) + l for _ in range(1, k)]
# Call mergeSort on the k subproblems.
mergeSort(arr, l, mid_points[0], k)
mergeSort(arr, mid_points[-1] + 1, r, k)
for mid_index in range(1, len(mid_points)):
mergeSort(arr, mid_points[mid_index - 1] + 1, mid_points[mid_index], k)
merge(arr, l, mid_points, r)
def mixedSort(arr, l, r, k = 2):
'''
The algorithm calls insertion sort for input sizes smaller than 100.
For larger inputs, it calls te mergeSort procedure declared above.
'''
# Calculate the largest well-defined k-merge for the input array and indices.
# largest_k is equal to the length of the subarray.
largest_k = r - l + 1
if largest_k <= 1:
return
# If the subarray is smaller than 100, run insertion sort.
elif largest_k <= 100:
# The algorith sorts from left to right, starting with the second number of the list.
for j in range(l+1, largest_k):
key = arr[j]
# The key is now compared to the existing sorted way (initialized as A[0])
# And inserted into the proper position. The loop iterates the sorted array from right to left.
i = j - 1
while i >= 0 and arr[i] > key:
arr[i + 1] = arr[i]
i = i - 1
arr[i + 1] = key
else:
# If the user-defined k is larger than the size of the array,
# set k to the default.
if largest_k < k:
k = 2
# Calculate the relevant mid points.
mid_points = [((_*(r - l)) // k) + l for _ in range(1, k)]
# Call mergeSort on the k subproblems.
mergeSort(arr, l, mid_points[0], k)
mergeSort(arr, mid_points[-1] + 1, r, k)
for mid_index in range(1, len(mid_points)):
mergeSort(arr, mid_points[mid_index - 1] + 1, mid_points[mid_index], k)
merge(arr, l, mid_points, r)
# Prepare testing function and inputs
import time
def testIt(fun, extraArgs, iterations = 10):
history = [] # Save running times
for i in range(iterations):
start_time = time.time()
fun(*extraArgs)
history.append(time.time() - start_time)
# If the solution is correct
if extraArgs[0] == sorted(extraArgs[0]):
return sum(history)/iterations # Returning average running time
else:
print "Sorting algorithm is incorrect."
order3 = range(10**3, 0, -1)
order4 = range(10**4, 0, -1)
order5 = range(10**5, 0, -1)
order6 = range(10**6, 0, -1)
order7 = range(10**7, 0, -1)
# Run tests
print "Tests for binary merge sort."
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order3, 0, len(order3) - 1, 2)), len(order3))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order4, 0, len(order4) - 1, 2)), len(order4))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order5, 0, len(order5) - 1, 2)), len(order5))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order6, 0, len(order6) - 1, 2)), len(order6))
print "Test for three-way merge sort."
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order3, 0, len(order3) - 1, 3)), len(order3))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order4, 0, len(order4) - 1, 3)), len(order4))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order5, 0, len(order5) - 1, 3)), len(order5))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order6, 0, len(order6) - 1, 3)), len(order6))
print "Test for five-way merge sort."
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order3, 0, len(order3) - 1, 5)), len(order3))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order4, 0, len(order4) - 1, 5)), len(order4))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order5, 0, len(order5) - 1, 5)), len(order5))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order6, 0, len(order6) - 1, 5)), len(order6))
print "Test for ten-way merge sort."
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order3, 0, len(order3) - 1, 10)), len(order3))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order4, 0, len(order4) - 1, 10)), len(order4))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order5, 0, len(order5) - 1, 10)), len(order5))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order6, 0, len(order6) - 1, 10)), len(order6))
print "Test for n-way merge sort."
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order3, 0, len(order3) - 1, len(order3))), len(order3))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mergeSort, (order4, 0, len(order4) - 1, len(order4))), len(order4))
# These take too long.
#print "Average running time: {:.3f} seconds for input size of {:d}".format(
# testIt(mergeSort, (order5, 0, len(order5) - 1, len(order5))), len(order5))
#print "Average running time: {:.3f} seconds for input size of {:d}".format(
# testIt(mergeSort, (order6, 0, len(order6) - 1, len(order6))), len(order6))
print "Test for augmented (mixed) merge sort."
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mixedSort, (order3, 0, len(order3) - 1, 2)), len(order3))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mixedSort, (order4, 0, len(order4) - 1, 2)), len(order4))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mixedSort, (order5, 0, len(order5) - 1, 2)), len(order5))
print "Average running time: {:.3f} seconds for input size of {:d}".format(
testIt(mixedSort, (order6, 0, len(order6) - 1, 2)), len(order6))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment