Created
January 23, 2014 18:10
-
-
Save netshade/8583749 to your computer and use it in GitHub Desktop.
level0
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "bloom.h" | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <assert.h> | |
#ifndef DEBUG | |
#include "filter.h" | |
#include "filter_bytes.h" | |
#endif | |
#define BUFFER_SIZE 8192 | |
#define LINE_BUFFER_SIZE 1024 | |
#define WORD_BUFFER_SIZE 1024 | |
struct bloom * constructBloomFilter(char * file){ | |
FILE * f = fopen(file, "r"); | |
char buffer[BUFFER_SIZE]; | |
size_t amt; | |
int last_position = 0; | |
int i = 0; | |
struct bloom * bfilter = malloc(sizeof(struct bloom)); | |
// we'll just assume this is acceptable chance of error | |
if(bloom_init(bfilter, 234937, 0.00001) != 0){ | |
printf("Couldn't initialize filter\n"); | |
exit(1); | |
} | |
unsigned int count = 0; | |
while(!feof(f)){ | |
fseek(f, last_position, SEEK_SET); | |
amt = fread(buffer, 1, BUFFER_SIZE, f); | |
if(amt > 0){ | |
int last_word = 0; | |
for(i = 0; i < amt; i++){ | |
if(buffer[i] == '\n'){ | |
count ++; | |
if(bloom_add(bfilter, &buffer[last_word], i - last_word) == -1){ | |
printf("Couldn't create bloom filter\n"); | |
exit(1); | |
} | |
last_position += (i - last_word) + 1; | |
last_word = i + 1; | |
} | |
} | |
} | |
} | |
fclose(f); | |
return bfilter; | |
} | |
int main(int argc, const char ** argv){ | |
struct bloom * filter = NULL; | |
#ifdef DEBUG | |
if(argc == 2){ // file specified | |
filter = constructBloomFilter(argv[1]); | |
} else if(argc == 3){ // file and bloom filter output specified | |
filter = constructBloomFilter(argv[1]); | |
} else { // use default | |
printf("Default\n"); | |
} | |
#else | |
filter = (struct bloom *) __struct_dat; | |
filter->bf = __struct_bytes_dat; | |
if(filter->bytes != __struct_bytes_dat_len){ | |
printf("Precompiled filter not same size"); | |
exit(1); | |
} | |
#endif | |
char buffer[LINE_BUFFER_SIZE]; | |
char wordBuffer[WORD_BUFFER_SIZE]; | |
char lowerWordBuffer[WORD_BUFFER_SIZE]; | |
size_t readAmt = 0; | |
int wordLen = 0; | |
int i = 0; | |
int res = 0; | |
while( (readAmt = fread(buffer, 1, WORD_BUFFER_SIZE, stdin)) > 0){ | |
for(i = 0; i < readAmt; i++){ | |
if(buffer[i] == '\n' || buffer[i] == ' '){ | |
if(wordLen > 0){ | |
res = bloom_check(filter, lowerWordBuffer, wordLen); | |
if(res == 0){ | |
fwrite("<", 1, 1, stdout); | |
fwrite(wordBuffer, 1, wordLen, stdout); | |
fwrite(">", 1, 1, stdout); | |
} else if(res == 1){ | |
fwrite(wordBuffer, 1, wordLen, stdout); | |
} else { | |
printf("Bloom not initialized\n"); | |
exit(1); | |
} | |
} | |
fwrite(&buffer[i], 1, 1, stdout); | |
wordLen = 0; | |
} else { | |
wordBuffer[wordLen] = buffer[i]; | |
lowerWordBuffer[wordLen] = tolower(buffer[i]); | |
wordLen ++; | |
} | |
} | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
filter and filter_bytes are header files w/ struct memory inline