Skip to content

Instantly share code, notes, and snippets.

@iamarkdev
Last active December 11, 2016 08:35
Show Gist options
  • Save iamarkdev/07b126d0ee5d8e3b97128a8752c96862 to your computer and use it in GitHub Desktop.
Save iamarkdev/07b126d0ee5d8e3b97128a8752c96862 to your computer and use it in GitHub Desktop.
Being ran with: cc main.c -ldictgen_c; ./a.out ~/Desktop
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <dirent.h>
#include <dictionary_c.h>
#define DICT_MAX_SIZE_BYTES 1024 * 1024 * 32
#define DICT_MIN_PATTERN_SIZE_BYTES 2
#define DICT_STOP_SYMBOL '\0'
#define DICT_MAX_AUTOMATON_SIZE_BYTES 2 << 30
#define DICT_AUTOMATON_COEF 1.0
int main(int argc, char *argv[]) {
if (argc < 2) {
fprintf(stderr, "Directory path not provided as argument.\n");
return 1;
}
char *directoryPathP = argv[1];
DIR* directory = opendir(directoryPathP);
if (!directory) {
fprintf(stderr, "\"%s\" is not a valid directory.\n", argv[1]);
return 1;
}
SInGe generator = SInGeInit(
DICT_MAX_SIZE_BYTES,
DICT_MIN_PATTERN_SIZE_BYTES,
DICT_STOP_SYMBOL,
DICT_MAX_AUTOMATON_SIZE_BYTES,
DICT_AUTOMATON_COEF
);
struct dirent *directoryEntryP;
while ((directoryEntryP = readdir(directory))) {
if (directoryEntryP->d_type == DT_REG && directoryEntryP->d_name[0] != '.') {
char *filePathP = malloc(strlen(directoryPathP) + strlen(directoryEntryP->d_name) + 2); // +2 for / and \0
if (filePathP == NULL) {
fprintf(stderr, "Could not allocate memory for file path.");
return 1;
}
sprintf(filePathP, "%s/%s", directoryPathP, directoryEntryP->d_name);
FILE *fileP = fopen(filePathP, "r");
fseek(fileP, 0, SEEK_END);
size_t fileSize = ftell(fileP);
fseek(fileP, 0, SEEK_SET);
char *fileContents = malloc(fileSize);
fread(fileContents, fileSize, 1, fileP);
fclose(fileP);
printf("ADDING: %s\n", filePathP);
SInGeAddDocumentViaStopSymbol(generator, fileContents, fileSize);
free(filePathP);
}
}
printf("BUILDING DICTIONARY... \n");
struct SInGeDict dictionary = SInGeGetDict(generator);
printf("%lu | %zu\n", strlen(dictionary.data), dictionary.length);
FILE *dictionaryFileP = fopen("dictionary.sdch", "w");
if (dictionaryFileP == NULL) {
fprintf(stderr, "Could not create or open dictionary.sdch for writing.");
return 1;
}
fputs(dictionary.data, dictionaryFileP);
fclose(dictionaryFileP);
// :)
SInGeFree(generator);
printf("SUCCESS\n");
return 0;
}
@iamarkdev
Copy link
Author

Output:

ADDING: /Users/braydonbatungbacal/Desktop/paystub.pdf
ADDING: /Users/braydonbatungbacal/Desktop/Screen Shot 2016-12-06 at 7.29.14 PM.png
ADDING: /Users/braydonbatungbacal/Desktop/Screen Shot 2016-12-06 at 7.30.26 PM.png
ADDING: /Users/braydonbatungbacal/Desktop/Screen Shot 2016-12-10 at 10.26.07 PM.png
ADDING: /Users/braydonbatungbacal/Desktop/Screen Shot 2016-12-10 at 8.49.57 PM.png
ADDING: /Users/braydonbatungbacal/Desktop/Screen Shot 2016-12-10 at 9.11.19 PM.png
ADDING: /Users/braydonbatungbacal/Desktop/Screen Shot 2016-12-11 at 12.32.29 AM.png
BUILDING DICTIONARY...
533 | 2493113
kMaxSize=18446744071562067968 kCoef=1
SUCCESS

@iamarkdev
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment