Last active
December 11, 2016 08:35
-
-
Save iamarkdev/07b126d0ee5d8e3b97128a8752c96862 to your computer and use it in GitHub Desktop.
Being ran with: cc main.c -ldictgen_c; ./a.out ~/Desktop
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <memory.h> | |
#include <dirent.h> | |
#include <dictionary_c.h> | |
#define DICT_MAX_SIZE_BYTES 1024 * 1024 * 32 | |
#define DICT_MIN_PATTERN_SIZE_BYTES 2 | |
#define DICT_STOP_SYMBOL '\0' | |
#define DICT_MAX_AUTOMATON_SIZE_BYTES 2 << 30 | |
#define DICT_AUTOMATON_COEF 1.0 | |
int main(int argc, char *argv[]) { | |
if (argc < 2) { | |
fprintf(stderr, "Directory path not provided as argument.\n"); | |
return 1; | |
} | |
char *directoryPathP = argv[1]; | |
DIR* directory = opendir(directoryPathP); | |
if (!directory) { | |
fprintf(stderr, "\"%s\" is not a valid directory.\n", argv[1]); | |
return 1; | |
} | |
SInGe generator = SInGeInit( | |
DICT_MAX_SIZE_BYTES, | |
DICT_MIN_PATTERN_SIZE_BYTES, | |
DICT_STOP_SYMBOL, | |
DICT_MAX_AUTOMATON_SIZE_BYTES, | |
DICT_AUTOMATON_COEF | |
); | |
struct dirent *directoryEntryP; | |
while ((directoryEntryP = readdir(directory))) { | |
if (directoryEntryP->d_type == DT_REG && directoryEntryP->d_name[0] != '.') { | |
char *filePathP = malloc(strlen(directoryPathP) + strlen(directoryEntryP->d_name) + 2); // +2 for / and \0 | |
if (filePathP == NULL) { | |
fprintf(stderr, "Could not allocate memory for file path."); | |
return 1; | |
} | |
sprintf(filePathP, "%s/%s", directoryPathP, directoryEntryP->d_name); | |
FILE *fileP = fopen(filePathP, "r"); | |
fseek(fileP, 0, SEEK_END); | |
size_t fileSize = ftell(fileP); | |
fseek(fileP, 0, SEEK_SET); | |
char *fileContents = malloc(fileSize); | |
fread(fileContents, fileSize, 1, fileP); | |
fclose(fileP); | |
printf("ADDING: %s\n", filePathP); | |
SInGeAddDocumentViaStopSymbol(generator, fileContents, fileSize); | |
free(filePathP); | |
} | |
} | |
printf("BUILDING DICTIONARY... \n"); | |
struct SInGeDict dictionary = SInGeGetDict(generator); | |
printf("%lu | %zu\n", strlen(dictionary.data), dictionary.length); | |
FILE *dictionaryFileP = fopen("dictionary.sdch", "w"); | |
if (dictionaryFileP == NULL) { | |
fprintf(stderr, "Could not create or open dictionary.sdch for writing."); | |
return 1; | |
} | |
fputs(dictionary.data, dictionaryFileP); | |
fclose(dictionaryFileP); | |
// :) | |
SInGeFree(generator); | |
printf("SUCCESS\n"); | |
return 0; | |
} |
Generated dictionary.sdch output:
http://www.w3.org/1999/02/22-rdf-syntax-ns#http://www.w3.org/1999/02/22-rdf-syntax-nshttp://www.w3.org/1999/02/22-rdf-syntaxhttp://www.w3.org/1999/02/22-rdf-synhttp://www.w3.org/1999/02/22-rdf-syhttp://www.w3.org/1999/02/22-rdf-shttp://www.w3.org/1999/02/22-rdf-http://www.w3.org/1999/02/22-rdf<rdf:Description rdf:about=http://www.w3.org/1999/02/2<rdf:Description rdf:about<rdf:Description rdf:abohttp://www.w3.org/1999/0http://www.w3.org/1999/<rdf:Description rdf:ab<rdf:Description rdf:ahttp://www.w3.org/1999<rdf:Description rdf:
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output:
ADDING: /Users/braydonbatungbacal/Desktop/paystub.pdf
ADDING: /Users/braydonbatungbacal/Desktop/Screen Shot 2016-12-06 at 7.29.14 PM.png
ADDING: /Users/braydonbatungbacal/Desktop/Screen Shot 2016-12-06 at 7.30.26 PM.png
ADDING: /Users/braydonbatungbacal/Desktop/Screen Shot 2016-12-10 at 10.26.07 PM.png
ADDING: /Users/braydonbatungbacal/Desktop/Screen Shot 2016-12-10 at 8.49.57 PM.png
ADDING: /Users/braydonbatungbacal/Desktop/Screen Shot 2016-12-10 at 9.11.19 PM.png
ADDING: /Users/braydonbatungbacal/Desktop/Screen Shot 2016-12-11 at 12.32.29 AM.png
BUILDING DICTIONARY...
533 | 2493113
kMaxSize=18446744071562067968 kCoef=1
SUCCESS