Created
November 28, 2020 00:47
-
-
Save rosalogia/6ad3a70fdb4983f8236f1c2497f95902 to your computer and use it in GitHub Desktop.
Heavily Annotated Implementation of MergeSort in C++
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> // For input/output | |
using namespace std; | |
// The merge function is where the sorting actually happens | |
// int arr[] is the array we're passing in | |
// int l is the index of the array that the left "chunk" begins at | |
// int m is the index of the array that the left "chunk" ends at, whereas the right "chunk" ends at m + 1 | |
// int r is the index of the array that the right "chunk" /ends/ at | |
void merge(int arr[], int l, int m, int r) { | |
// Here we compute the size of the two "chunks" | |
// The size of the left chunk is m - l + 1 because m is where the left chunk ends, and l is where it begins. | |
// We add 1 to m - l to make sure the size is exclusive of the last element. E.g. if we have m = 5 and | |
// l = 0, we actually have a chunk of 6 elements since 5 is a valid index of the chunk. | |
int nl = m - l + 1; | |
// The size of the right chunk is r - m because r is the last valid index of the right chunk, and m is | |
// exactly one less than the first valid index of the right chunk. Be aware that these "chunks" do not | |
// exist completely on their own, and we keep track of their index positions in the original array because | |
// we will eventually be modifying the original array based on their location in that array. | |
int nr = r - m; | |
// To clarify and demonstrate the above, let's assume we have an array {5, 3, 4, 1, 2} | |
// We have the following "chunks" already prepared and sorted for us: {3, 4, 5} and {1, 2} | |
// Now we want to merge them. | |
// arr[] is still {5, 3, 4, 1, 2}, the original array | |
// int l is 0 since that's where the left chunk begins | |
// int m is 2 since that's where the left chunk ends | |
// int r is 4 since that's where the right chunk ends | |
// | |
// The size of the left chunk is m - l + 1, i.e. 2 - 0 + 1, which is 3 | |
// The size of the right chunk is r - m, i.e. 4 - 2, which is 2 | |
// Now we're going to create two temporary arrays to represent and store the values in our two chunks | |
int L[nl], R[nr]; | |
// Here we're filling up our temporary arrays with the values from the two chunks | |
// For L we have to remember that when we access the values in arr, we have to begin at l and move up from there | |
// For R we have to remember to begin at m + 1 and move up from there | |
// As a reminder, this is because l is where the left chunk begins in the original array, and m + 1 is where the | |
// right chunk begins in the original array | |
for (int i = 0; i < nl; i++) L[i] = arr[l + i]; | |
for (int j = 0; j < nr; j++) R[j] = arr[m + 1 + j]; | |
// Now we're going to use a while loop to run through both L and R at once, both of which are SORTED already! | |
// When we loop through them, we will compare each element of L to the corresponding element of R. Whichever one | |
// is least will be added first! Here we create some index variables. We will use i to represent a place in L, | |
// j to represent a place in R, and k to represent the place we're at in the original array. | |
// | |
// We set k equal to l because that is the place in the original array that we're starting at! | |
int i = 0, j = 0, k = l; | |
// We only want to loop until we're out of values for either chunk. If one chunk is larger or smaller than | |
// the other, we'll handle that by adding all the leftover values /after/ this loop. For now, we only want | |
// to loop while the current index of L is less than the size of the left chunk, AND while the current index | |
// of R is less than the size of the right chunk. | |
while (i < nl && j < nr) { | |
// Here is where the sorting begins to happen! | |
// If the current index of the Left chunk is less than or equal to the current index of the Right chunk | |
// then we want to set the current value in the original array to the current value in L, NOT the current | |
// value in R. On the other hand, if the current value of the right chunk is less than the current value of | |
// the left chunk, then we want to set the current value in the original array to the current value in R, as | |
// opposed to the current value in L. | |
// | |
// Below we'll see how that works in practice, and go over an example afterwards | |
if (L[i] <= R[j]) { | |
// Set the current value of the array to the current value of the left chunk | |
arr[k] = L[i]; | |
// ONLY increment i, NOT j, to indicate that we have already added L[i] to the array, | |
// but haven't added R[j] to the array yet | |
i++; | |
} else { | |
// If we've reached the else, then the current value of R must be less than the current value of L, | |
// which means it's going into the original array first. | |
arr[k] = R[j]; | |
// ONLY increment j, NOT i, since we have now added R[j] to the array, but haven't added L[i] yet | |
j++; | |
} | |
// At the end of every iteration of the loop, whether or not we used up L[i] or R[j], the current value | |
// of the original array has been updated, so we need to increment k to make sure that next time we're | |
// updating the next value in the array, and not updating the same value twice. | |
k++; | |
} | |
// To demonstrate how this works, let's bring back our example chunks of {3, 4, 5} and {1, 2} | |
// In this case, L = {3, 4, 5} and R = {1, 2}, and we're going through both of them at once. | |
// L[0] is 3, and R[0] is 1. R[0] is LESS than L[0], so we have entered the "else" part of the | |
// code above. This means that we're going to be setting the current value of the array (which, | |
// since l = 0 and k = l, is the 0th value of the array) to R[0] which is 1. | |
// | |
// We increment j so that j = 1, but i is still equal to 0 since we haven't added L[0], i.e. 3 | |
// to the array yet. | |
// | |
// On the next iteration of the loop, we will compare L[0] to R[1] since we have incremented j but | |
// not i. L[0] is equal to 3, and R[1] is equal to 2. | |
// | |
// R[1] is also less than L[0], so we are in the "else" part of our code once again. We will now set | |
// arr[1] (since k was incremented from 0 to 1) equal to R[1] since j = 1, meaning the 2nd element of | |
// arr is now equal to 2 (R[1] = 2). j is incremented again so that it becomes 2, but now that j is no | |
// longer less than the size of the right chunk, the while loop exits. Our original array has only had | |
// 2 values from the right chunk placed in it. How about all the values in the left chunk that we haven't | |
// used yet? We'll handle those next. | |
// We will simply add all the remaining values of L, the left chunk, to the original array in order. | |
// After all, we know they're sorted. It just so happened that none of these values were less than their | |
// corresponding values in the right chunk, so we need to add them in after the comparison stage. | |
while (i < nl) { | |
arr[k] = L[i]; | |
i++; | |
k++; | |
} | |
// In the occassion that what occurred in our example were to happen the other way around, i.e. all | |
// the values in the left chunk were left than all the values in the right chunk, or something along | |
// those lines, we would need to do the same thing as above but with the right chunk. | |
while (j < nr) { | |
arr[k] = R[j]; | |
j++; | |
k++; | |
} | |
// Success! All the values in each sorted chunk of the original array have been placed in their correct order | |
// in the original array because we compared them /while/ we were inserting them and made sure to insert them | |
// in the correct order. | |
} | |
// The function we just wrote handles the process of merging/sorting individual chunks, but we still need to write | |
// a mergeSort function that will split the array into chunks and hand them over to the merge function after the | |
// chunks are sufficiently small. | |
// When we initially call this function, | |
// int arr[] will be the original input array | |
// int l will be 0, the beginning of the array | |
// int r will be the final index of the array | |
void mergeSort(int arr[], int l, int r) { | |
// This is a recursive function, and in our recursive calls, we make sure that either | |
// 1. The new input values for l are always increasing, or ... | |
// 2. The new input values for r are always decreasing | |
// | |
// This means that eventually, l will be greater than or equal to r, and that means | |
// we've gone through as much of the array as we can, so we can return. | |
if (l >= r) { | |
return; | |
} | |
// Here we compute m, which is the final index of the left chunk of our array. | |
// It can also be thought of or referred to as the midpoint of our array, or at | |
// least the array we're currently working with. We can easily compute this value | |
// by adding the initial and final indices together and dividing their sum by 2. | |
int m = (l + r) / 2; | |
// Now that we have m computed, we want to recursively run our mergeSort function on | |
// a chunk from l to m, and a chunk from m + 1 to r. After these two chunks are merged | |
// and sorted, we want to merge them together. | |
mergeSort(arr, l, m); | |
mergeSort(arr, m+1, r); | |
merge(arr, l, m, r); | |
} | |
int main() { | |
int arr[] = {5, 4, 3, 1, 2}; | |
int n = 5; | |
for (int i = 0; i < n; i++) { | |
// cout is like print for C++ | |
// Here I'm printing out every value | |
// in the array with a space in between | |
cout << arr[i] << " "; | |
} | |
// Print a newline after that | |
cout << "\n"; | |
// Sort the array | |
mergeSort(arr, 0, n - 1); | |
for (int i = 0; i < n; i++) { | |
cout << arr[i] << " "; | |
} | |
cout << "\n"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment