Last active
June 29, 2017 15:49
-
-
Save zeux/90a49b85c8cfdf04ffa5489ec8916271 to your computer and use it in GitHub Desktop.
https://ayende.com/blog/176034/making-code-faster-the-interview-question - 135 ms (dual-core i7)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _CRT_SECURE_NO_WARNINGS | |
#include <vector> | |
#include <memory> | |
#include <future> | |
#include <stdio.h> | |
#include <assert.h> | |
#include <time.h> | |
#include <stdint.h> | |
#include <string.h> | |
#include <emmintrin.h> | |
#ifdef _WIN32 | |
#include <io.h> | |
#include <Windows.h> | |
#endif | |
int get_digit(char ch) | |
{ | |
return ch - '0'; | |
} | |
int get_digits(const char (&ch)[2]) | |
{ | |
return (ch[0] - '0') * 10 + (ch[1] - '0'); | |
} | |
int get_digits(const char (&ch)[4]) | |
{ | |
return (ch[0] - '0') * 1000 + (ch[1] - '0') * 100 + (ch[2] - '0') * 10 + (ch[3] - '0'); | |
} | |
int get_digits(const char (&ch)[8]) | |
{ | |
return | |
(ch[0] - '0') * 10000000 + (ch[1] - '0') * 1000000 + (ch[2] - '0') * 100000 + (ch[3] - '0') * 10000 + | |
(ch[4] - '0') * 1000 + (ch[5] - '0') * 100 + (ch[6] - '0') * 10 + (ch[7] - '0'); | |
} | |
struct DateTimeAscii | |
{ | |
char year[4]; | |
char separator_dash1; | |
char month[2]; | |
char separator_dash2; | |
char day[2]; | |
char separator_T; | |
char hour[2]; | |
char separator_colon1; | |
char minute[2]; | |
char separator_colon2; | |
char second[2]; | |
time_t time() const | |
{ | |
tm tm; | |
tm.tm_sec = get_digits(second); | |
tm.tm_min = get_digits(minute); | |
tm.tm_hour = get_digits(hour); | |
tm.tm_mday = get_digits(day); | |
tm.tm_mon = get_digits(month) - 1; | |
tm.tm_year = get_digits(year) - 1900; | |
tm.tm_wday = 0; // ignored by mktime | |
tm.tm_yday = 0; // ignored by mktime | |
tm.tm_isdst = 0; // assume no DST (we don't have timezone info to figure this out) | |
return mktime(&tm); | |
} | |
int daytime() const | |
{ | |
return get_digits(day) * (3600 * 24) + get_digits(hour) * 3600 + get_digits(minute) * 60 + get_digits(second); | |
} | |
int operator-(const DateTimeAscii& other) const | |
{ | |
// if YYYY.MM. are the same then we can subtract without knowing the correct time | |
if (memcmp(this, &other, 8) == 0) | |
{ | |
assert(daytime() - other.daytime() == time() - other.time()); | |
return daytime() - other.daytime(); | |
} | |
else | |
{ | |
return time() - other.time(); | |
} | |
} | |
}; | |
struct RecordAscii | |
{ | |
DateTimeAscii enter; | |
char separator_space1; | |
DateTimeAscii exit; | |
char separator_space2; | |
char id[8]; | |
char separator_cr; | |
char separator_lf; | |
}; | |
bool validate(const RecordAscii& r) | |
{ | |
// we check 48 bytes with 3 16b comparisons and 2 bytes separately | |
assert(sizeof(RecordAscii) == 48 + 2); | |
if (r.separator_cr != '\r' || r.separator_lf != '\n') | |
return false; | |
const char* kRecordMin = "0000-00-00T00:00:00 0000-00-00T00:00:00 00000000"; | |
const char* kRecordMax = "9999-99-99T99:99:99 9999-99-99T99:99:99 99999999"; | |
#define CMP(i) __m128i \ | |
r##i = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&r) + i), \ | |
min##i = _mm_loadu_si128(reinterpret_cast<const __m128i*>(kRecordMin) + i), \ | |
max##i = _mm_loadu_si128(reinterpret_cast<const __m128i*>(kRecordMax) + i), \ | |
cmp##i = _mm_or_si128(_mm_cmplt_epi8(r##i, min##i), _mm_cmpgt_epi8(r##i, max##i)) | |
CMP(0); CMP(1); CMP(2); | |
#undef CMP | |
return _mm_movemask_epi8(_mm_or_si128(cmp0, _mm_or_si128(cmp1, cmp2))) == 0; | |
} | |
struct Times | |
{ | |
std::vector<std::unique_ptr<unsigned int[]>> data; | |
static const size_t kChunkSize = 1024; | |
Times(size_t max_value) | |
: data((max_value + kChunkSize - 1) / kChunkSize) | |
{ | |
} | |
unsigned int& operator[](unsigned int value) | |
{ | |
assert(value / kChunkSize < data.size()); | |
std::unique_ptr<unsigned int[]>& chunk = data[value / kChunkSize]; | |
if (!chunk) | |
{ | |
chunk.reset(new unsigned int[kChunkSize]); | |
memset(chunk.get(), 0, sizeof(unsigned int) * kChunkSize); | |
} | |
return chunk.get()[value % kChunkSize]; | |
} | |
void merge(Times& other) | |
{ | |
assert(data.size() == other.data.size()); | |
for (size_t i = 0; i < data.size(); ++i) | |
{ | |
if (!other.data[i]) | |
continue; | |
if (data[i]) | |
{ | |
for (size_t j = 0; j < kChunkSize; ++j) | |
data[i][j] += other.data[i][j]; | |
} | |
else | |
{ | |
data[i] = std::move(other.data[i]); | |
} | |
} | |
} | |
}; | |
bool process_buffer(const RecordAscii* buffer, size_t count, Times& times) | |
{ | |
for (size_t i = 0; i < count; ++i) | |
{ | |
const RecordAscii& r = buffer[i]; | |
if (!validate(r)) | |
return false; | |
int diff = r.exit - r.enter; | |
assert(diff >= 0); | |
int id = get_digits(r.id); | |
times[id] += diff; | |
} | |
return true; | |
} | |
bool process_buffer_mt(const RecordAscii* buffer, size_t count, Times& times) | |
{ | |
unsigned int threads = std::thread::hardware_concurrency(); | |
std::vector<Times> times_mt; | |
for (unsigned int i = 0; i < threads; ++i) | |
times_mt.push_back(Times(times.data.size() * Times::kChunkSize)); | |
std::vector<std::future<bool>> futures; | |
for (unsigned int i = 0; i < threads; ++i) | |
{ | |
Times* tt = ×_mt[i]; | |
size_t begin = i * (count / threads); | |
size_t end = (i + 1 == threads) ? count : begin + (count / threads); | |
futures.push_back(std::async([=]() { | |
return process_buffer(buffer + begin, end - begin, *tt); | |
})); | |
} | |
for (auto& f: futures) | |
if (!f.get()) | |
return false; | |
for (auto& t: times_mt) | |
times.merge(t); | |
return true; | |
} | |
bool mmap_file(FILE* file, void*& data, size_t& size) | |
{ | |
#ifdef _WIN32 | |
HANDLE fh = HANDLE(_get_osfhandle(_fileno(file))); | |
assert(fh != INVALID_HANDLE_VALUE); | |
LARGE_INTEGER fs; | |
if (GetFileSizeEx(fh, &fs) && fs.QuadPart) | |
{ | |
HANDLE mh = CreateFileMapping(fh, NULL, PAGE_READONLY, 0, 0, 0); | |
if (mh != INVALID_HANDLE_VALUE) | |
{ | |
data = MapViewOfFile(mh, FILE_MAP_READ, 0, 0, 0); | |
size = fs.QuadPart; | |
CloseHandle(mh); | |
CloseHandle(fh); | |
return data != NULL; | |
} | |
} | |
CloseHandle(fh); | |
return false; | |
#else | |
return false; | |
#endif | |
} | |
void munmap_file(void* data, size_t size) | |
{ | |
#ifdef _WIN32 | |
UnmapViewOfFile(data); | |
#endif | |
} | |
struct Output | |
{ | |
char buffer[32768]; | |
size_t size = 0; | |
char* reserve(size_t count) | |
{ | |
assert(count <= sizeof(buffer)); | |
if (size + count > sizeof(buffer)) | |
flush(); | |
return buffer + size; | |
} | |
void flush() | |
{ | |
if (size) | |
{ | |
fwrite(buffer, 1, size, stdout); | |
size = 0; | |
} | |
} | |
}; | |
void print_chars(Output& output, const char* data, size_t count) | |
{ | |
char* buffer = output.reserve(count); | |
memcpy(buffer, data, count); | |
output.size += count; | |
} | |
void print_uint(Output& output, unsigned int value, unsigned int digits) | |
{ | |
char* buffer = output.reserve(digits); | |
for (unsigned int i = 0; i < digits; ++i) | |
{ | |
buffer[digits - 1 - i] = '0' + value % 10; | |
value /= 10; | |
} | |
assert(value == 0); | |
output.size += digits; | |
} | |
void print_uint(Output& output, unsigned int value) | |
{ | |
char buffer[32]; | |
size_t write = sizeof(buffer); | |
do | |
{ | |
assert(write > 0); | |
write--; | |
buffer[write] = '0' + value % 10; | |
value /= 10; | |
} while (value); | |
print_chars(output, buffer + write, sizeof(buffer) - write); | |
} | |
int main(int argc, const char** argv) | |
{ | |
if (argc < 2) | |
return fprintf(stderr, "Usage: %s [file]\n", argv[0]); | |
clock_t start = clock(); | |
FILE* file = fopen(argv[1], "rb"); | |
if (!file) | |
return fprintf(stderr, "Fatal error: can't open file\n"); | |
Times times(100000000); | |
void* data; | |
size_t size; | |
if (mmap_file(file, data, size)) | |
{ | |
if (!process_buffer_mt(static_cast<RecordAscii*>(data), size / sizeof(RecordAscii), times)) | |
return fprintf(stderr, "Fatal error: data desync\n"); | |
munmap_file(data, size); | |
} | |
else | |
{ | |
RecordAscii buffer[1024]; | |
do | |
{ | |
size_t count = fread(buffer, sizeof(RecordAscii), sizeof(buffer) / sizeof(buffer[0]), file); | |
if (!process_buffer(buffer, count, times)) | |
return fprintf(stderr, "Fatal error: data desync"); | |
} while (!feof(file)); | |
} | |
Output output; | |
size_t memory = 0; | |
for (size_t i = 0; i < times.data.size(); ++i) | |
{ | |
if (!times.data[i]) | |
continue; | |
memory += 4; | |
for (size_t j = 0; j < Times::kChunkSize; ++j) | |
{ | |
if (times.data[i][j]) | |
{ | |
size_t index = i * Times::kChunkSize + j; | |
unsigned int time = times.data[i][j]; | |
print_uint(output, index, 8); | |
print_chars(output, ": ", 2); | |
unsigned int seconds = time % 60; | |
time /= 60; | |
unsigned int minutes = time % 60; | |
time /= 60; | |
unsigned int hours = time % 60; | |
time /= 60; | |
unsigned int days = time; | |
if (days) | |
{ | |
print_uint(output, days); | |
print_chars(output, ".", 1); | |
} | |
print_uint(output, hours, 2); | |
print_chars(output, ":", 1); | |
print_uint(output, minutes, 2); | |
print_chars(output, ":", 1); | |
print_uint(output, seconds, 2); | |
print_chars(output, "\r\n", 2); | |
} | |
} | |
} | |
output.flush(); | |
clock_t end = clock(); | |
fprintf(stderr, "%d ms (%d KB)\n", int(int64_t(end - start) * 1000 / CLOCKS_PER_SEC), int(memory)); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment