Skip to content

Instantly share code, notes, and snippets.

@nico
Created May 17, 2014 01:20
Show Gist options
  • Save nico/511d9c0522c1c7c4f7e9 to your computer and use it in GitHub Desktop.
Save nico/511d9c0522c1c7c4f7e9 to your computer and use it in GitHub Desktop.
#include <algorithm>
#include <iostream>
#include <map>
#include <set>
#include <string>
#include <vector>
using namespace std;
#include <direct.h>
#include <stdint.h>
#include <time.h>
#include <windows.h>
// About 1s for stat()ing 55000 files.
uint64_t stat(const string& path) {
WIN32_FILE_ATTRIBUTE_DATA attrs;
if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &attrs)) {
DWORD err = GetLastError();
if (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND)
return 0;
fprintf(stderr, "fail %s", path.c_str());
exit(-1);
}
const FILETIME& filetime = attrs.ftLastWriteTime;
// FILETIME is in 100-nanosecond increments since the Windows epoch.
// We don't much care about epoch correctness but we do want the
// resulting value to fit in an integer.
uint64_t mtime = ((uint64_t)filetime.dwHighDateTime << 32) |
((uint64_t)filetime.dwLowDateTime);
mtime /= 1000000000LL / 100; // 100ns -> s.
mtime -= 12622770400LL; // 1600 epoch -> 2000 epoch (subtract 400 years).
return mtime;
}
uint64_t statsum(const string& d, const set<string>& s) {
HANDLE hFind = INVALID_HANDLE_VALUE;
WIN32_FIND_DATA ffd;
hFind = FindFirstFile((d + "\\*").c_str(), &ffd);
if (hFind == INVALID_HANDLE_VALUE) {
fprintf(stderr, "fail %s", d.c_str());
exit(-1);
}
uint64_t t = 0;
do {
//fprintf(stderr, "%s %s\n", d.c_str(), ffd.cFileName);
if (ffd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
continue;
string lowername = ffd.cFileName;
std::transform(lowername.begin(), lowername.end(),
lowername.begin(), ::tolower);
if (s.count(lowername) == 0) {
//fprintf(stderr, "skipping %s %s\n", d.c_str(), ffd.cFileName);
continue;
}
const FILETIME& filetime = ffd.ftLastWriteTime;
// FILETIME is in 100-nanosecond increments since the Windows epoch.
// We don't much care about epoch correctness but we do want the
// resulting value to fit in an integer.
uint64_t mtime = ((uint64_t)filetime.dwHighDateTime << 32) |
((uint64_t)filetime.dwLowDateTime);
mtime /= 1000000000LL / 100; // 100ns -> s.
//if(mtime == 0)
//printf(" asdasdfadsfsadf\n");
mtime -= 12622770400LL; // 1600 epoch -> 2000 epoch (subtract 400 years).
t += mtime;
} while (FindNextFile(hFind, &ffd));
FindClose(hFind);
return t;
}
int main() {
_chdir("out\\Debug");
clock_t cs = clock(), ce;
vector<string> files;
string s;
while (getline(cin, s))
files.push_back(s);
// 0.36s up to here
ce = clock();
printf("reading took %f\n", (ce - cs)/float(CLOCKS_PER_SEC));
cs = ce;
// takes about 0.1s
map<string, set<string>> m;
for (size_t i = 0; i < files.size(); ++i) {
string dir, base;
size_t p = files[i].rfind('\\');
if (p != string::npos) {
dir = files[i].substr(0, p);
base = files[i].substr(p + 1);
//printf("%s %s\n", dir.c_str(), base.c_str()); break;
} else {
dir = ".";
base = files[i];
}
std::transform(base.begin(), base.end(), base.begin(), ::tolower);
m[dir].insert(base);
}
ce = clock();
printf("map building took %f\n", (ce - cs)/float(CLOCKS_PER_SEC));
cs = ce;
uint64_t sum;
sum = 0;
for (map<string, set<string>>::iterator it = m.begin(), end = m.end();
it != end; ++it) {
sum += statsum(it->first, it->second);
}
printf("s %I64x\n", sum);
ce = clock();
printf("per-dir stating took %f\n", (ce - cs)/float(CLOCKS_PER_SEC));
cs = ce;
sum = 0;
for (size_t i = 0; i < files.size(); ++i) {
sum += stat(files[i]);
}
printf("s %I64x\n", sum);
ce = clock();
printf("direct stating took %f\n", (ce - cs)/float(CLOCKS_PER_SEC));
cs = ce;
return (int)sum;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment