Created
March 20, 2020 22:23
-
-
Save mrdmnd/734b76f7c73ce8ee2abf824348fb412f to your computer and use it in GitHub Desktop.
Solutions to the second problem set
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "stdio.h" | |
#include "stdlib.h" | |
#include "string.h" | |
#define MAX_DICTIONARY_SIZE 300000 | |
#define MAX_WORD_LENGTH 15 | |
// We should guarantee room for a newline and null byte in our buffer. | |
#define BUFFER_SIZE (MAX_WORD_LENGTH + 2) | |
int populate_dictionary(char *filepath, char dictionary[][BUFFER_SIZE]) { | |
FILE *fp = fopen(filepath, "r"); | |
if (fp == NULL) { | |
puts("Error: could not open file.\n"); | |
exit(1); | |
} | |
int num_words = 0; | |
while (fgets(dictionary[num_words], BUFFER_SIZE, fp)) { | |
// Remove trailing newline: fgets() leaves the newline on, and we don't want | |
// that. | |
dictionary[num_words][strcspn(dictionary[num_words], "\n")] = 0; | |
num_words++; | |
} | |
// Remember to close the file! | |
fclose(fp); | |
return num_words; | |
} | |
// This function takes a "needle" as `input` and a `dictionary` containing | |
// `num_words` elements, and prints all elements in the dictionary that | |
// containing the "needle" as a substring. It returns the number of matches | |
// found. | |
int count_matches(char *input, char dictionary[][BUFFER_SIZE], int num_words) { | |
int num_matches = 0; | |
for (int i = 0; i < num_words; i++) { | |
if (strstr(dictionary[i], input)) { | |
num_matches++; | |
printf("%s\n", dictionary[i]); | |
} | |
} | |
return num_matches; | |
} | |
// This function returns 1 (true) if a and b are anagrams of each other (contain | |
// the exact same characters) and 0 (false) otherwise. | |
int is_anagram_pair(char *a, char *b) { | |
// Build letter frequency counters. | |
// a_freq[0] counts the number of A characters in a | |
// a_freq[1] counts the number of B characters in a | |
// ... | |
// b_freq[0] counts the number of A characters in b | |
// ... | |
int a_freq[26] = {0}; | |
int b_freq[26] = {0}; | |
// Walk through the strings, and increment the appropriate frequency counter. | |
// Note: doing "character subtraction" here is well defined! These get | |
// promoted to integers (between 0 and 255), and the ordering lets us define | |
// things like 'C' - 'A' = 2. This ONLY WORKS if all of the characters 'ch' in | |
// our input strings are defined such that 'ch' - 'A' is in [0, 25] | |
// (otherwise, you'll attempt to access an invalid index in the array and get | |
// a segfault) - as it turns out, this works out fine because all of the | |
// characters in the input are in the capital letter set. Just a neat trick | |
// here to build a map from character to count! | |
for (int i = 0; i < strlen(a); i++) { | |
a_freq[a[i] - 'A']++; | |
} | |
for (int i = 0; i < strlen(b); i++) { | |
b_freq[b[i] - 'A']++; | |
} | |
// Short circuit if frequencies ever mismatch: | |
// as soon as we have a character in one word that's not in another, | |
// we know these words are not anagrams. | |
for (int i = 0; i < 26; i++) { | |
if (a_freq[i] != b_freq[i]) { | |
return 0; | |
} | |
} | |
return 1; | |
} | |
// This function loops over the dictionary and prints all words that are | |
// anagrams of the input. | |
void anagrams(char *input, char dictionary[][BUFFER_SIZE], int num_words) { | |
for (int i = 0; i < num_words; i++) { | |
// Compare the letter frequencies of each word to validate anagram-hood. | |
if (is_anagram_pair(dictionary[i], input)) { | |
printf("%s\n", dictionary[i]); | |
} | |
} | |
} | |
int main(int argc, char **argv) { | |
if (argc != 2) { | |
puts("Error: pass the dictionary as a command line argument.\n"); | |
exit(1); | |
} | |
char dictionary[MAX_DICTIONARY_SIZE][BUFFER_SIZE]; | |
int num_words = populate_dictionary(argv[1], dictionary); | |
printf("Counted %d words.\n", num_words); | |
printf("Tenth element: %s\n", dictionary[9]); | |
printf("Last element: %s\n", dictionary[num_words - 1]); | |
printf("Number of matches for 'STRUCT': %d\n", | |
count_matches("STRUCT", dictionary, num_words)); | |
anagrams("RESPECT", dictionary, num_words); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment