Created
January 23, 2012 15:04
-
-
Save jrosskopf/1663558 to your computer and use it in GitHub Desktop.
14_dups
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "dups.h" | |
void add_single_file_to_ht(hash_table *ht, char *absolute_file_name, struct stat *stat) { | |
assert(ht); | |
assert(absolute_file_name); | |
size_t file_size = (size_t)stat->st_size; | |
ht_insert(ht, file_size, (hash_value *)absolute_file_name); | |
} | |
char *build_absolute_name(char *directory_name, char *file_name) { | |
unsigned int len = strlen(directory_name) + strlen("/") + strlen(file_name) + 1; | |
char *abs_file_name = (char *)malloc(len * sizeof(char)); | |
memset(abs_file_name,'\0', len); | |
strncat(abs_file_name, directory_name, len); | |
strncat(abs_file_name, "/", len); | |
strncat(abs_file_name, file_name, len); | |
return abs_file_name; | |
} | |
void add_files_to_ht(hash_table *ht, char *directory_name) { | |
DIR *d_handle = opendir(directory_name); | |
if (d_handle == NULL) { | |
fprintf(stderr, "**Warning** Kann Verzeichnis '%s' nicht öffnen\n", directory_name); | |
return; | |
} | |
struct dirent *d_entry = NULL; | |
for(d_entry = readdir(d_handle); d_entry != NULL; d_entry = readdir(d_handle)) | |
{ | |
if (strcmp(d_entry->d_name, ".") == 0 || strcmp(d_entry->d_name, "..") == 0) | |
continue; | |
char *absolute_file_name = build_absolute_name(directory_name, d_entry->d_name); | |
struct stat stat; | |
if (lstat(absolute_file_name, &stat) == -1) { | |
fprintf(stderr, "**Warning** Stat für '%s' fehlgeschlagen\n", absolute_file_name); | |
free(absolute_file_name); | |
continue; | |
} | |
//printf("-> %s\n", absolute_file_name); | |
if (S_ISDIR(stat.st_mode)) | |
add_files_to_ht(ht, absolute_file_name); | |
if (S_ISREG(stat.st_mode)) | |
add_single_file_to_ht(ht, absolute_file_name, &stat); | |
free(absolute_file_name); | |
} | |
closedir(d_handle); | |
} | |
void begin_add_files_to_ht(hash_table *ht, char *rel_dir_name) { | |
assert(ht); | |
assert(rel_dir_name); | |
if (chdir(rel_dir_name) != 0) { | |
fprintf(stderr, "**Warning** Kann nicht ins Verzeichnis '%s' wechseln\n", rel_dir_name); | |
return; | |
} | |
char abs_dir_name[MAXPATHLEN]; | |
if (getcwd(abs_dir_name, MAXPATHLEN) == NULL) { | |
fprintf(stderr, "**Warning** Kann aktuelles Arbeitsverzeichnis nicht öffnen\n"); | |
return; | |
} | |
add_files_to_ht(ht, abs_dir_name); | |
} | |
void show_duplicates_if_neccessary(hash_table *ht, size_t unique_key) { | |
assert(ht); | |
hash_query_result *res = ht_query(ht, unique_key); | |
if (res->n_results < 2) { | |
ht_query_free(ht, res); | |
return; | |
} | |
int i = 0; | |
for (hash_entry *cur_entry = res->first_entry; cur_entry != NULL; cur_entry = cur_entry->next) { | |
if (i == 0) | |
printf("+ %10lu %s\n", cur_entry->key, (char *)cur_entry->value); | |
else | |
printf("+->%10s %s\n", " ", (char *)cur_entry->value); | |
i++; | |
} | |
printf("\n"); | |
ht_query_free(ht, res); | |
} | |
int main(int argc, char **argv) { | |
hash_table *ht = ht_new(1024); | |
for (int i = 1; i < argc; i++) { | |
begin_add_files_to_ht(ht, argv[i]); | |
} | |
for (unsigned int i = 0; i < ht->n_unique_keys; i++) { | |
show_duplicates_if_neccessary(ht, ht->key_buffer[i]); | |
} | |
//ht_dump(ht); | |
ht_free(ht); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef _DUPS_H_ | |
#define _DUPS_H_ | |
#include <dirent.h> | |
#include <sys/param.h> | |
#include <sys/stat.h> | |
#include <sys/types.h> | |
#include <time.h> | |
#include <unistd.h> | |
#include "hashtable.h" | |
#endif // _DUPS_H_ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "hashtable.h" | |
unsigned int _ht_size_t_hash_key(size_t key) { | |
return key; | |
} | |
bool _ht_size_t_key_eq(size_t k1, size_t k2) { | |
return k1 == k2 ? true : false; | |
} | |
hash_entry *_ht_dup_entry(hash_entry *orig_entry) { | |
hash_entry *new_entry = calloc(1, sizeof(hash_entry)); | |
new_entry->key = orig_entry->key; | |
new_entry->value = (hash_value *)strdup((char *)(orig_entry->value)); | |
return new_entry; | |
} | |
void _ht_free_entry(hash_entry *entry) { | |
if (entry == NULL) | |
return; | |
if (entry->value != NULL) | |
free(entry->value); | |
} | |
hash_table *ht_new(unsigned int capacity) { | |
hash_table *ht = calloc(1, sizeof(hash_table)); | |
assert(ht); | |
ht->n_buckets = capacity; | |
ht->buckets = calloc(capacity, sizeof(hash_bucket)); | |
assert(ht->buckets); | |
ht->key_buffer_size = KEY_BUFFER_INCREMENT; | |
ht->n_unique_keys = 0; | |
ht->key_buffer = malloc(ht->key_buffer_size * sizeof(size_t)); | |
ht->hash = _ht_size_t_hash_key; | |
ht->are_keys_eq = _ht_size_t_key_eq; | |
ht->dup_entry = _ht_dup_entry; | |
ht->free_entry = _ht_free_entry; | |
return ht; | |
} | |
void _ht_dump_bucket(hash_bucket bucket) { | |
for (hash_entry *cur_entry = bucket.first_entry; cur_entry != NULL; cur_entry = cur_entry->next) { | |
printf("{%lu => %s} ", cur_entry->key, (char *)cur_entry->value); | |
} | |
} | |
void ht_dump(hash_table *ht) { | |
assert(ht != NULL); | |
for (int i = 0; i < ht->n_buckets; i++) { | |
printf("[%04d] -> { ", i); | |
_ht_dump_bucket(ht->buckets[i]); | |
printf("}\n"); | |
} | |
printf("Unique Keys in Hashtable: %lu\n", ht->n_unique_keys); | |
printf("Total Keys in Hashtable: %lu\n", ht->n_keys); | |
//for (size_t i = 0; i < ht->n_unique_keys; i++) { | |
// printf("%lu, " (ht->key_buffer[i])); | |
//} | |
printf("\n\n"); | |
} | |
void _ht_free_bucket(hash_table *ht, hash_bucket bucket) { | |
for (hash_entry *cur_entry = bucket.first_entry; cur_entry != NULL;) { | |
ht->free_entry(cur_entry); | |
hash_entry *prev_entry = cur_entry; | |
cur_entry = cur_entry->next; | |
free(prev_entry); | |
} | |
} | |
void ht_free(hash_table *ht) { | |
if (ht == NULL) | |
return; | |
for (unsigned int i = 0; i < ht->n_buckets; i++) { | |
_ht_free_bucket(ht, ht->buckets[i]); | |
} | |
free(ht->buckets); | |
free(ht->key_buffer); | |
free(ht); | |
} | |
hash_entry *ht_get_entry_first_or_default(hash_table *ht, size_t key) { | |
assert(ht); | |
unsigned int hash_value = ht->hash(key) % ht->n_buckets; | |
hash_bucket bucket = ht->buckets[hash_value]; | |
for (hash_entry *cur_entry = bucket.first_entry; cur_entry != NULL; cur_entry = cur_entry->next) { | |
if (ht->are_keys_eq(cur_entry->key, key) == true) | |
return cur_entry; | |
} | |
return NULL; | |
} | |
bool ht_contains_key(hash_table *ht, size_t key) { | |
assert(ht); | |
return ht_get_entry_first_or_default(ht, key) != NULL ? true : false; | |
} | |
void _ht_add_key_to_unique_buffer(hash_table *ht, size_t key) { | |
assert(ht); | |
ht->n_keys++; | |
if (ht_contains_key(ht, key) == true) | |
return; | |
if (ht->key_buffer_size <= ht->n_unique_keys) { | |
ht->key_buffer_size += KEY_BUFFER_INCREMENT; | |
ht->key_buffer = realloc(ht->key_buffer, ht->key_buffer_size * sizeof(size_t)); | |
assert(ht->key_buffer); | |
} | |
ht->key_buffer[ht->n_unique_keys++] = key; | |
} | |
void ht_insert(hash_table *ht, size_t key, hash_value *value) { | |
assert(ht); | |
_ht_add_key_to_unique_buffer(ht, key); | |
unsigned int hash_value = ht->hash(key) % ht->n_buckets; | |
hash_bucket *bucket = &(ht->buckets[hash_value]); | |
hash_entry *entry = calloc(1, sizeof(hash_entry)); | |
entry->key = key; | |
entry->value = strdup((char *)(value)); | |
entry->next = bucket->first_entry; | |
bucket->first_entry = entry; | |
} | |
void ht_put(hash_table *ht, size_t key, hash_value *value) { | |
assert(ht); | |
hash_entry *entry = ht_get_entry_first_or_default(ht, key); | |
if (entry == NULL) { | |
ht_insert(ht, key, value); | |
} | |
else { | |
ht->free_entry(entry); | |
entry->key = key; | |
entry->value = value; | |
} | |
} | |
hash_value *ht_get_first_or_default(hash_table *ht, size_t key) { | |
assert(ht); | |
hash_entry *entry = ht_get_entry_first_or_default(ht, key); | |
if (entry == NULL) | |
return NULL; | |
return entry->value; | |
} | |
hash_query_result *ht_query(hash_table *ht, size_t key) { | |
assert(ht); | |
hash_query_result *ret = calloc(1, sizeof(hash_query_result)); | |
ret->key = key; | |
unsigned int hash_value = ht->hash(key) % ht->n_buckets; | |
hash_bucket bucket = ht->buckets[hash_value]; | |
for (hash_entry *cur_entry = bucket.first_entry; cur_entry != NULL; cur_entry = cur_entry->next) { | |
if (! ht->are_keys_eq(cur_entry->key, key)) | |
continue; | |
hash_entry *entry_copy = ht->dup_entry(cur_entry); | |
entry_copy->next = ret->first_entry; | |
ret->first_entry = entry_copy; | |
ret->n_results++; | |
} | |
return ret; | |
} | |
void ht_query_free(hash_table *ht, hash_query_result *res) { | |
assert(ht); | |
if (res == NULL) | |
return; | |
for (hash_entry *cur_entry = res->first_entry; cur_entry != NULL;) { | |
ht->free_entry(cur_entry); | |
hash_entry *prev_entry = cur_entry; | |
cur_entry = cur_entry->next; | |
free(prev_entry); | |
} | |
free(res); | |
} | |
void ht_query_dump(hash_query_result *res) { | |
printf("[%04lu] { ", res->key); | |
for (hash_entry *cur_entry = res->first_entry; cur_entry != NULL; cur_entry = cur_entry->next) { | |
printf("{%s} ", (char *)cur_entry->value); | |
} | |
printf("}\n"); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef _HASHTABLE_H_ | |
#define _HASHTABLE_H_ | |
#include <assert.h> | |
#include <dirent.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <sys/stat.h> | |
#include <sys/types.h> | |
#define KEY_BUFFER_INCREMENT 1024 | |
typedef char * hash_value; | |
struct _hash_entry; | |
typedef unsigned int (*func_hash)(size_t key); | |
typedef struct _hash_entry *(*func_dup_entry)(struct _hash_entry *orig_entry); | |
typedef void (*func_free_entry)(struct _hash_entry *entry); | |
typedef bool (*func_are_keys_eq)(size_t k1, size_t k2); | |
typedef struct _hash_entry { | |
size_t key; | |
hash_value *value; | |
struct _hash_entry *next; | |
} hash_entry; | |
typedef struct _hash_bucket { | |
hash_entry *first_entry; | |
} hash_bucket; | |
typedef struct _hash_table { | |
unsigned int n_buckets; | |
hash_bucket *buckets; | |
size_t key_buffer_size; | |
size_t n_unique_keys; | |
size_t n_keys; | |
size_t *key_buffer; | |
func_hash hash; | |
func_are_keys_eq are_keys_eq; | |
func_dup_entry dup_entry; | |
func_free_entry free_entry; | |
} hash_table; | |
typedef struct _hash_query_result { | |
unsigned int n_results; | |
size_t key; | |
hash_entry *first_entry; | |
} hash_query_result; | |
hash_table *ht_new(unsigned int capacity); | |
void ht_free(hash_table *ht); | |
void ht_dump(hash_table *ht); | |
hash_entry *ht_get_entry_first_or_default(hash_table *ht, size_t key); | |
bool ht_contains_key(hash_table *ht, size_t key); | |
void ht_insert(hash_table *ht, size_t key, hash_value *value); | |
void ht_put(hash_table *ht, size_t key, hash_value *value); | |
hash_value *ht_get_first_or_default(hash_table *ht, size_t key); | |
hash_query_result *ht_query(hash_table *ht, size_t key); | |
void ht_query_free(hash_table *ht, hash_query_result *res); | |
void ht_query_dump(hash_query_result *res); | |
#endif // _HASHTABLE_H_ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CFLAGS = -Wall -g -std=gnu99 | |
LFLAGS = -R/usr/local/lib -lm | |
SHARED = -fPIC -shared | |
CC = gcc | |
DEPENDFILE = .dependencies | |
SRC = dups.c hashtable.c | |
SHSRC = | |
OBJ = $(SRC:%.c=%.o) | |
SHOBJ = $(SHSRC:%.c=%.so) | |
BIN = dups | |
.PHONY: dep clean | |
all: $(OBJ) $(SHOBJ) | |
$(CC) $(CFLAGS) -o $(BIN) $(OBJ) $(LFLAGS) | |
dep: | |
$(CC) -MM $(SRC) $(SHSRC) > $(DEPENDFILE) | |
-include $(DEPENDFILE) | |
$(SHOBJ): %.so: %.c | |
$(CC) -o $@ $(SHARED) $(CFLAGS) $< $(LFLAGS) | |
%.o: %.c | |
$(CC) $(CFLAGS) -c $< | |
clean: | |
rm -f $(OBJ) $(SHOBJ) $(BIN) $(DEPENDFILE) | |
rm -rf *.dSYMS |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment