Last active
June 3, 2019 13:14
-
-
Save nico/b0cca071e5b1fd71f929 to your computer and use it in GitHub Desktop.
benchmarking different file stat()ing techniques on windows (parts based on https://github.com/cpizano/Kodefind/blob/master/src/engine_v1_win.cc)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// cl stattest.cc /Ox /GL /GR- | |
#include <algorithm> | |
#include <iostream> | |
#include <map> | |
#include <set> | |
#include <string> | |
#include <vector> | |
using namespace std; | |
#include <direct.h> | |
#include <stdint.h> | |
#include <time.h> | |
#include <windows.h> | |
struct StringPiece { | |
StringPiece() : str_(NULL), len_(0) {} | |
StringPiece(const string& str) : str_(str.data()), len_(str.size()) {} | |
StringPiece(const char* str) : str_(str), len_(strlen(str)) {} | |
StringPiece(const char* str, size_t len) : str_(str), len_(len) {} | |
bool operator<(StringPiece RHS) const { | |
if (int Res = memcmp(str_, RHS.str_, min(len_, RHS.len_))) | |
return Res < 0; | |
if (len_ == RHS.len_) | |
return false; | |
return len_ < RHS.len_; | |
} | |
bool operator==(const StringPiece& other) const { | |
return len_ == other.len_ && memcmp(str_, other.str_, len_) == 0; | |
} | |
bool operator!=(const StringPiece& other) const { | |
return !(*this == other); | |
} | |
string AsString() const { return len_ ? string(str_, len_) : string(); } | |
const char* str_; | |
size_t len_; | |
}; | |
// About 1s for stat()ing 55000 files. | |
uint64_t stat(const string& path) { | |
WIN32_FILE_ATTRIBUTE_DATA attrs; | |
if (!GetFileAttributesExA(path.c_str(), GetFileExInfoStandard, &attrs)) { | |
DWORD err = GetLastError(); | |
if (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) | |
return 0; | |
fprintf(stderr, "fail %s", path.c_str()); | |
exit(-1); | |
} | |
const FILETIME& filetime = attrs.ftLastWriteTime; | |
// FILETIME is in 100-nanosecond increments since the Windows epoch. | |
// We don't much care about epoch correctness but we do want the | |
// resulting value to fit in an integer. | |
uint64_t mtime = ((uint64_t)filetime.dwHighDateTime << 32) | | |
((uint64_t)filetime.dwLowDateTime); | |
mtime /= 1000000000LL / 100; // 100ns -> s. | |
mtime -= 12622770400LL; // 1600 epoch -> 2000 epoch (subtract 400 years). | |
return mtime; | |
} | |
#if 1 | |
uint64_t statsum(const string& d, const set<StringPiece>& s) { | |
HANDLE hFind = INVALID_HANDLE_VALUE; | |
WIN32_FIND_DATAA ffd; | |
#if 0 | |
hFind = FindFirstFileA((d + "\\*").c_str(), &ffd); | |
#else | |
hFind = FindFirstFileExA((d + "\\*").c_str(), | |
//FindExInfoStandard, | |
FindExInfoBasic, // 30% faster than FindExInfoStandard! | |
&ffd, | |
FindExSearchNameMatch, | |
NULL, | |
0 ); // XXX: check FIND_FIRST_EX_LARGE_FETCH | |
#endif | |
if (hFind == INVALID_HANDLE_VALUE) { | |
fprintf(stderr, "fail %s", d.c_str()); | |
exit(-1); | |
} | |
uint64_t t = 0; | |
do { | |
//fprintf(stderr, "%s %s\n", d.c_str(), ffd.cFileName); | |
if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) | |
continue; | |
string lowername = ffd.cFileName; | |
std::transform(lowername.begin(), lowername.end(), | |
lowername.begin(), ::tolower); | |
if (s.count(lowername) == 0) { | |
//fprintf(stderr, "skipping %s %s\n", d.c_str(), ffd.cFileName); | |
continue; | |
} | |
const FILETIME& filetime = ffd.ftLastWriteTime; | |
// FILETIME is in 100-nanosecond increments since the Windows epoch. | |
// We don't much care about epoch correctness but we do want the | |
// resulting value to fit in an integer. | |
uint64_t mtime = ((uint64_t)filetime.dwHighDateTime << 32) | | |
((uint64_t)filetime.dwLowDateTime); | |
mtime /= 1000000000LL / 100; // 100ns -> s. | |
//if(mtime == 0) | |
//printf(" asdasdfadsfsadf\n"); | |
mtime -= 12622770400LL; // 1600 epoch -> 2000 epoch (subtract 400 years). | |
t += mtime; | |
} while (FindNextFileA(hFind, &ffd)); | |
FindClose(hFind); | |
return t; | |
} | |
#else | |
HANDLE OpenDirectory(const std::string& dir) { | |
DWORD share = FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE; | |
return CreateFileA(dir.c_str(), GENERIC_READ , share, NULL, | |
OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, NULL); | |
} | |
uint64_t statsum(const string& d, const set<StringPiece>& s) { | |
#if 1 | |
FILE_INFO_BY_HANDLE_CLASS handle_class = FileIdBothDirectoryInfo; | |
typedef FILE_ID_BOTH_DIR_INFO HandleClass; | |
#else | |
// Requires win8, likely faster | |
FILE_INFO_BY_HANDLE_CLASS handle_class = FileIdFullDirectoryInfo; | |
typedef FILE_ID_FULL_DIR_INFO HandleClass; | |
#endif | |
const size_t kBufSize = 512 * 1024; | |
char* buf = (char*)malloc(kBufSize); | |
HANDLE hDir = OpenDirectory(d.c_str()); | |
if (hDir == INVALID_HANDLE_VALUE) { | |
fprintf(stderr, "fail %s", d.c_str()); | |
exit(-1); | |
} | |
uint64_t t = 0; | |
while (GetFileInformationByHandleEx(hDir, handle_class, buf, kBufSize)) { | |
HandleClass* c = (HandleClass*)buf; | |
while (1) { | |
if ((c->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) == 0) { | |
wstring wlowername(&c->FileName[0], c->FileNameLength / sizeof(wchar_t)); | |
string lowername(wlowername.begin(), wlowername.end()); // HACK | |
std::transform(lowername.begin(), lowername.end(), | |
lowername.begin(), ::tolower); | |
//fprintf(stderr, "%s %s\n", d.c_str(), lowername.c_str()); | |
if (s.count(lowername) == 0) { | |
//fprintf(stderr, "skipping %s %s\n", d.c_str(), c->FileName); | |
} else { | |
uint64_t mtime = ((uint64_t)c->LastWriteTime.QuadPart); | |
mtime /= 1000000000LL / 100; // 100ns -> s. | |
mtime -= 12622770400LL; // 1600 epoch -> 2000 epoch (subtract 400 years). | |
t += mtime; | |
} | |
} | |
if (c->NextEntryOffset == 0) | |
break; | |
c = (HandleClass*)(ULONG_PTR(c) + c->NextEntryOffset); | |
} | |
} | |
if (GetLastError() != ERROR_NO_MORE_FILES) { | |
fprintf(stderr, "fail %s", d.c_str()); | |
exit(-1); | |
} | |
free(buf); | |
CloseHandle(hDir); | |
return t; | |
} | |
#endif | |
int main() { | |
_chdir("out\\Debug"); | |
clock_t cs = clock(), ce; | |
vector<string> files; | |
string s; | |
while (getline(cin, s)) { | |
std::transform(s.begin(), s.end(), s.begin(), ::tolower); | |
files.push_back(s); | |
} | |
// 0.36s up to here | |
ce = clock(); | |
printf("reading took %f\n", (ce - cs)/float(CLOCKS_PER_SEC)); | |
cs = ce; | |
// takes about 0.1s | |
typedef set<StringPiece> Dirs; | |
typedef map<string, Dirs> DirMap; | |
DirMap m; | |
for (size_t i = 0; i < files.size(); ++i) { | |
string dir; | |
StringPiece base; | |
size_t p = files[i].rfind('\\'); | |
if (p != string::npos) { | |
dir = files[i].substr(0, p); | |
base = StringPiece(files[i].data() + p + 1, files[i].size() - p - 1); | |
//printf("%s %s\n", dir.c_str(), base.c_str()); break; | |
} else { | |
dir = "."; | |
base = files[i]; | |
} | |
//std::transform(base.begin(), base.end(), base.begin(), ::tolower); | |
m[dir].insert(base); | |
} | |
ce = clock(); | |
printf("map building took %f\n", (ce - cs)/float(CLOCKS_PER_SEC)); | |
cs = ce; | |
uint64_t sum; | |
sum = 0; | |
for (DirMap::iterator it = m.begin(), end = m.end(); | |
it != end; ++it) { | |
sum += statsum(it->first, it->second); | |
} | |
printf("s %I64x\n", sum); | |
ce = clock(); | |
printf("per-dir stating took %f\n", (ce - cs)/float(CLOCKS_PER_SEC)); | |
cs = ce; | |
sum = 0; | |
//for (size_t i = 0; i < files.size(); ++i) { | |
// sum += stat(files[i]); | |
//} | |
for (DirMap::iterator it = m.begin(), end = m.end(); | |
it != end; ++it) { | |
for (Dirs::iterator sit = it->second.begin(), send = it->second.end(); | |
sit != send; ++sit) | |
sum += stat(it->first + "\\" + sit->AsString()); | |
} | |
printf("s %I64x\n", sum); | |
ce = clock(); | |
printf("direct stating took %f\n", (ce - cs)/float(CLOCKS_PER_SEC)); | |
cs = ce; | |
return (int)sum; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment