Created
May 17, 2014 01:20
-
-
Save nico/511d9c0522c1c7c4f7e9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <algorithm> | |
#include <iostream> | |
#include <map> | |
#include <set> | |
#include <string> | |
#include <vector> | |
using namespace std; | |
#include <direct.h> | |
#include <stdint.h> | |
#include <time.h> | |
#include <windows.h> | |
// About 1s for stat()ing 55000 files. | |
uint64_t stat(const string& path) { | |
WIN32_FILE_ATTRIBUTE_DATA attrs; | |
if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &attrs)) { | |
DWORD err = GetLastError(); | |
if (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) | |
return 0; | |
fprintf(stderr, "fail %s", path.c_str()); | |
exit(-1); | |
} | |
const FILETIME& filetime = attrs.ftLastWriteTime; | |
// FILETIME is in 100-nanosecond increments since the Windows epoch. | |
// We don't much care about epoch correctness but we do want the | |
// resulting value to fit in an integer. | |
uint64_t mtime = ((uint64_t)filetime.dwHighDateTime << 32) | | |
((uint64_t)filetime.dwLowDateTime); | |
mtime /= 1000000000LL / 100; // 100ns -> s. | |
mtime -= 12622770400LL; // 1600 epoch -> 2000 epoch (subtract 400 years). | |
return mtime; | |
} | |
uint64_t statsum(const string& d, const set<string>& s) { | |
HANDLE hFind = INVALID_HANDLE_VALUE; | |
WIN32_FIND_DATA ffd; | |
hFind = FindFirstFile((d + "\\*").c_str(), &ffd); | |
if (hFind == INVALID_HANDLE_VALUE) { | |
fprintf(stderr, "fail %s", d.c_str()); | |
exit(-1); | |
} | |
uint64_t t = 0; | |
do { | |
//fprintf(stderr, "%s %s\n", d.c_str(), ffd.cFileName); | |
if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) | |
continue; | |
string lowername = ffd.cFileName; | |
std::transform(lowername.begin(), lowername.end(), | |
lowername.begin(), ::tolower); | |
if (s.count(lowername) == 0) { | |
//fprintf(stderr, "skipping %s %s\n", d.c_str(), ffd.cFileName); | |
continue; | |
} | |
const FILETIME& filetime = ffd.ftLastWriteTime; | |
// FILETIME is in 100-nanosecond increments since the Windows epoch. | |
// We don't much care about epoch correctness but we do want the | |
// resulting value to fit in an integer. | |
uint64_t mtime = ((uint64_t)filetime.dwHighDateTime << 32) | | |
((uint64_t)filetime.dwLowDateTime); | |
mtime /= 1000000000LL / 100; // 100ns -> s. | |
//if(mtime == 0) | |
//printf(" asdasdfadsfsadf\n"); | |
mtime -= 12622770400LL; // 1600 epoch -> 2000 epoch (subtract 400 years). | |
t += mtime; | |
} while (FindNextFile(hFind, &ffd)); | |
FindClose(hFind); | |
return t; | |
} | |
int main() { | |
_chdir("out\\Debug"); | |
clock_t cs = clock(), ce; | |
vector<string> files; | |
string s; | |
while (getline(cin, s)) | |
files.push_back(s); | |
// 0.36s up to here | |
ce = clock(); | |
printf("reading took %f\n", (ce - cs)/float(CLOCKS_PER_SEC)); | |
cs = ce; | |
// takes about 0.1s | |
map<string, set<string>> m; | |
for (size_t i = 0; i < files.size(); ++i) { | |
string dir, base; | |
size_t p = files[i].rfind('\\'); | |
if (p != string::npos) { | |
dir = files[i].substr(0, p); | |
base = files[i].substr(p + 1); | |
//printf("%s %s\n", dir.c_str(), base.c_str()); break; | |
} else { | |
dir = "."; | |
base = files[i]; | |
} | |
std::transform(base.begin(), base.end(), base.begin(), ::tolower); | |
m[dir].insert(base); | |
} | |
ce = clock(); | |
printf("map building took %f\n", (ce - cs)/float(CLOCKS_PER_SEC)); | |
cs = ce; | |
uint64_t sum; | |
sum = 0; | |
for (map<string, set<string>>::iterator it = m.begin(), end = m.end(); | |
it != end; ++it) { | |
sum += statsum(it->first, it->second); | |
} | |
printf("s %I64x\n", sum); | |
ce = clock(); | |
printf("per-dir stating took %f\n", (ce - cs)/float(CLOCKS_PER_SEC)); | |
cs = ce; | |
sum = 0; | |
for (size_t i = 0; i < files.size(); ++i) { | |
sum += stat(files[i]); | |
} | |
printf("s %I64x\n", sum); | |
ce = clock(); | |
printf("direct stating took %f\n", (ce - cs)/float(CLOCKS_PER_SEC)); | |
cs = ce; | |
return (int)sum; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment