Skip to content

Instantly share code, notes, and snippets.

@netshade
Created January 23, 2014 18:10
Show Gist options
  • Save netshade/8583749 to your computer and use it in GitHub Desktop.
Save netshade/8583749 to your computer and use it in GitHub Desktop.
level0
#include "bloom.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#ifndef DEBUG
#include "filter.h"
#include "filter_bytes.h"
#endif
#define BUFFER_SIZE 8192
#define LINE_BUFFER_SIZE 1024
#define WORD_BUFFER_SIZE 1024
struct bloom * constructBloomFilter(char * file){
FILE * f = fopen(file, "r");
char buffer[BUFFER_SIZE];
size_t amt;
int last_position = 0;
int i = 0;
struct bloom * bfilter = malloc(sizeof(struct bloom));
// we'll just assume this is acceptable chance of error
if(bloom_init(bfilter, 234937, 0.00001) != 0){
printf("Couldn't initialize filter\n");
exit(1);
}
unsigned int count = 0;
while(!feof(f)){
fseek(f, last_position, SEEK_SET);
amt = fread(buffer, 1, BUFFER_SIZE, f);
if(amt > 0){
int last_word = 0;
for(i = 0; i < amt; i++){
if(buffer[i] == '\n'){
count ++;
if(bloom_add(bfilter, &buffer[last_word], i - last_word) == -1){
printf("Couldn't create bloom filter\n");
exit(1);
}
last_position += (i - last_word) + 1;
last_word = i + 1;
}
}
}
}
fclose(f);
return bfilter;
}
int main(int argc, const char ** argv){
struct bloom * filter = NULL;
#ifdef DEBUG
if(argc == 2){ // file specified
filter = constructBloomFilter(argv[1]);
} else if(argc == 3){ // file and bloom filter output specified
filter = constructBloomFilter(argv[1]);
} else { // use default
printf("Default\n");
}
#else
filter = (struct bloom *) __struct_dat;
filter->bf = __struct_bytes_dat;
if(filter->bytes != __struct_bytes_dat_len){
printf("Precompiled filter not same size");
exit(1);
}
#endif
char buffer[LINE_BUFFER_SIZE];
char wordBuffer[WORD_BUFFER_SIZE];
char lowerWordBuffer[WORD_BUFFER_SIZE];
size_t readAmt = 0;
int wordLen = 0;
int i = 0;
int res = 0;
while( (readAmt = fread(buffer, 1, WORD_BUFFER_SIZE, stdin)) > 0){
for(i = 0; i < readAmt; i++){
if(buffer[i] == '\n' || buffer[i] == ' '){
if(wordLen > 0){
res = bloom_check(filter, lowerWordBuffer, wordLen);
if(res == 0){
fwrite("<", 1, 1, stdout);
fwrite(wordBuffer, 1, wordLen, stdout);
fwrite(">", 1, 1, stdout);
} else if(res == 1){
fwrite(wordBuffer, 1, wordLen, stdout);
} else {
printf("Bloom not initialized\n");
exit(1);
}
}
fwrite(&buffer[i], 1, 1, stdout);
wordLen = 0;
} else {
wordBuffer[wordLen] = buffer[i];
lowerWordBuffer[wordLen] = tolower(buffer[i]);
wordLen ++;
}
}
}
return 0;
}
@netshade
Copy link
Author

filter and filter_bytes are header files w/ struct memory inline

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment