Created
April 15, 2019 02:48
-
-
Save vsrinivas/1287d576369bacdcdd2fcc3726d5dcde to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _XOPEN_SOURCE 500 | |
#include <sys/types.h> | |
#include <sys/stat.h> | |
#include <errno.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <strings.h> | |
#include <string.h> | |
#include <unordered_set> | |
#include <leveldb/db.h> | |
/* dbtransactor [ref] [src] */ | |
int main(int argc, char* argv[]) | |
{ | |
std::unordered_set<std::string> ref_hashes; | |
int i; | |
int nRefFiles = 0; | |
int nDuplicates = 0; | |
int nMisses = 0; | |
leveldb::DB* ref; | |
leveldb::Options ref_options; | |
ref_options.create_if_missing = false; | |
leveldb::Status status = leveldb::DB::Open(ref_options, std::string(argv[1]), &ref); | |
if (!status.ok()) { | |
printf("Error opening ref\n"); | |
return -1; | |
} | |
leveldb::DB* candidate; | |
status = leveldb::DB::Open(ref_options, std::string(argv[2]), &candidate); | |
if (!status.ok()) { | |
printf("Error opening candidate\n"); | |
return -1; | |
} | |
leveldb::ReadOptions r_options; | |
r_options.verify_checksums = true; | |
r_options.fill_cache = false; | |
leveldb::Iterator* it = ref->NewIterator(r_options); | |
for (it->SeekToFirst(); it->Valid(); it->Next()) { | |
if (!it->status().ok()) { | |
printf("Iteration error!\n"); | |
break; | |
} | |
auto unique = ref_hashes.insert(it->value().ToString()); | |
if (!unique.second) | |
++nDuplicates; | |
++nRefFiles; | |
} | |
delete it; | |
delete ref; | |
it = candidate->NewIterator(r_options); | |
for (it->SeekToFirst(); it->Valid(); it->Next()) { | |
if (!it->status().ok()) { | |
printf("Iteration error!\n"); | |
break; | |
} | |
bool found = ref_hashes.find(it->value().ToString()) != ref_hashes.end(); | |
if (!found) { | |
printf("Not found in ref: %s, %s\n", | |
it->key().ToString().c_str(), | |
it->value().ToString().c_str()); | |
++nMisses; | |
} | |
} | |
delete it; | |
delete candidate; | |
printf("nRefFiles %d\n", nRefFiles); | |
printf("nDuplicates %d\n", nDuplicates); | |
printf("nRefUnique %d\n", ref_hashes.size()); // nDuplicates + this == nRefFiles | |
printf("nMisses %d\n", nMisses); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _XOPEN_SOURCE 500 | |
#include <sys/types.h> | |
#include <sys/stat.h> | |
#include <errno.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <strings.h> | |
#include <fcntl.h> | |
#include <ftw.h> | |
#include <string.h> | |
#include <leveldb/db.h> | |
#include "md5.h" | |
static leveldb::DB* db; | |
static int nFiles; | |
int cb(const char *path, const struct stat *sb, int typeflag, struct FTW *) { | |
char* val; | |
int rc = 0; | |
switch (typeflag) { | |
case FTW_F: { | |
if ((sb->st_mode & S_IFMT) != S_IFREG) | |
break; | |
leveldb::Slice key(path, strlen(path) + 1); | |
val = md5sum(path); | |
if (!val) { | |
printf("md5sum error: %s\n", path); | |
rc = -1; | |
break; | |
} | |
printf("==> %s, %s\n", path, val); | |
leveldb::Slice value(val, 32 + 1); | |
++nFiles; | |
auto s = db->Put(leveldb::WriteOptions(), key, value); | |
if (!s.ok()) { | |
printf("leveldb error: %s\n", path); | |
rc = -1; | |
} | |
free(val); | |
break; | |
} | |
default: | |
break; | |
} | |
return rc; | |
} | |
/* ftwdb2_leveldb <path> <dbfile> */ | |
/* Make a LevelDB mapping every file to its MD5SUM (in 32-byte ascii string) */ | |
int main(int argc, char *argv[]) | |
{ | |
int i; | |
leveldb::Options options; | |
options.create_if_missing = true; | |
leveldb::Status status = leveldb::DB::Open(options, argv[2], &db); | |
if (!status.ok()) { | |
printf("Error creating db\n"); | |
return -1; | |
} | |
i = nftw(argv[1], cb, 64, FTW_PHYS); | |
if (i) printf("%d err %d\n", i, errno); | |
delete db; | |
printf("%d files\n", nFiles); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <strings.h> | |
#include <limits.h> | |
#include <openssl/md5.h> | |
/* MD5 a file */ | |
char *md5sum(const char *path) { | |
FILE* fp; | |
unsigned char hash_buf[MD5_DIGEST_LENGTH]; | |
MD5_CTX hash_context; | |
size_t bytes; | |
char data[1]; | |
unsigned char sum[2 * MD5_DIGEST_LENGTH + 1]; | |
int i, j; | |
int error = 0; | |
fp = fopen(path, "rb"); | |
if (!fp) | |
return NULL; | |
j = 0; | |
bytes = 0; | |
bzero(data, sizeof(data)); | |
bzero(sum, sizeof(sum)); | |
MD5_Init(&hash_context); | |
for (;;) { | |
bytes = fread(data, sizeof(data), 1, fp); | |
if (bytes < sizeof(data)) { | |
if (ferror(fp)) { | |
// Error! | |
error = 1; | |
break; | |
} | |
} | |
MD5_Update(&hash_context, data, bytes); | |
if (bytes < sizeof(data)) | |
if (feof(fp)) | |
break; | |
} | |
MD5_Final(hash_buf, &hash_context); | |
fclose(fp); | |
for (i = 0; i < MD5_DIGEST_LENGTH; i++) | |
j += sprintf(&sum[j], "%02x", hash_buf[i]); | |
if (error) | |
return NULL; | |
return strdup(sum); | |
} | |
char *md5sumbuf(const char *buf, size_t size) { | |
unsigned char sum[2 * MD5_DIGEST_LENGTH + 1]; | |
unsigned char hash_buf[MD5_DIGEST_LENGTH]; | |
int i, j; | |
MD5_CTX hash_context; | |
MD5_Init(&hash_context); | |
MD5_Update(&hash_context, buf, size); | |
MD5_Final(hash_buf, &hash_context); | |
for (i = 0, j = 0; i < MD5_DIGEST_LENGTH; i++) | |
j += sprintf(&sum[j], "%02x", hash_buf[i]); | |
return strdup(sum); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef _MD5_H_ | |
#define _MD5_H_ | |
#include <stddef.h> | |
/* MD5 a file; return 32-byte string of 128-bit hash */ | |
/* Caller is responsible for deallocating return value w/ ::free */ | |
#ifdef __cplusplus | |
extern "C" | |
#endif | |
char *md5sum(const char *path); | |
#ifdef __cplusplus | |
extern "C" | |
#endif | |
char *md5sumbuf(const char *buf, size_t size); | |
#endif // _MD5_H_ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment