Last active
May 7, 2020 07:17
-
-
Save leochou0729/4983fedebffef733fdfc97f4b1b6696a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <sys/mount.h> | |
#include <sys/types.h> | |
#include <sys/stat.h> | |
#include <unistd.h> | |
#include <dirent.h> | |
#include <set> | |
#include <string> | |
#include <sstream> | |
#include <iomanip> | |
#include <iostream> | |
#include <chrono> | |
#include "xxhash.hpp" | |
std::set<std::string> fileSet; | |
unsigned long long ullFileCount = 0; | |
unsigned long long ullCollision = 0; | |
bool HashFile(const std::string& strFile, std::string& strHash) | |
{ | |
FILE *f; | |
size_t n; | |
unsigned char buf[4096]; | |
if ((f = fopen(strFile.c_str(), "rb")) == nullptr) | |
{ | |
return false; | |
} | |
xxh::hash3_state_t<128> hash_stream; | |
while ((n = fread(buf, 1, sizeof(buf), f)) > 0) | |
{ | |
hash_stream.update(buf, n); | |
} | |
fclose(f); | |
xxh::hash_t<128> result = hash_stream.digest(); | |
unsigned char *pResult = (unsigned char *)&result; | |
std::stringstream ss; | |
ss << std::hex; | |
for (int i = 0; i < 16; ++i) | |
{ | |
ss << std::setw(2) << std::setfill('0') << (0xff & (unsigned int)pResult[i]); | |
} | |
strHash = ss.str(); | |
return true; | |
} | |
bool IsDirectory(std::string& strPath) | |
{ | |
struct stat pathStat; | |
stat(strPath.c_str(), &pathStat); | |
return S_ISDIR(pathStat.st_mode); | |
} | |
bool IsRegularFile(std::string& strPath) | |
{ | |
struct stat pathStat; | |
stat(strPath.c_str(), &pathStat); | |
return S_ISREG(pathStat.st_mode); | |
} | |
void ListFiles(std::string& strBasePath) | |
{ | |
struct dirent *dp; | |
DIR *dir = opendir(strBasePath.c_str()); | |
if (!dir) | |
{ | |
return; | |
} | |
while ((dp = readdir(dir)) != nullptr) | |
{ | |
if (dp->d_type == DT_DIR && strcmp(dp->d_name, ".") != 0 && strcmp(dp->d_name, "..") != 0) | |
{ | |
std::string strDir = strBasePath; | |
if (strDir.back() != '/') | |
{ | |
strDir += "/"; | |
} | |
strDir += dp->d_name; | |
ListFiles(strDir); | |
} | |
else if (dp->d_type == DT_REG && std::string(dp->d_name).substr(0, 1) == ".") | |
{ | |
continue; | |
} | |
else if (dp->d_type == DT_REG) | |
{ | |
std::string strFile = strBasePath; | |
if (strFile.back() != '/') | |
{ | |
strFile += "/"; | |
} | |
strFile += dp->d_name; | |
std::string strHash; | |
if (HashFile(strFile, strHash)) | |
{ | |
ullFileCount++; | |
if (fileSet.insert(std::move(strHash)).second == false) | |
{ | |
ullCollision++; | |
} | |
} | |
} | |
} | |
closedir(dir); | |
} | |
int main(int argc, const char * argv[]) | |
{ | |
std::string rootPath = "/"; | |
auto start = std::chrono::steady_clock::now(); | |
ListFiles(rootPath); | |
auto end = std::chrono::steady_clock::now(); | |
std::cout << "Elapsed time in seconds = " << std::chrono::duration_cast<std::chrono::seconds>(end - start).count() << std::endl; | |
std::cout << "File count = " << ullFileCount << std::endl; | |
std::cout << "Hash collision = " << ullCollision << std::endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment