-
-
Save kendallsss/a6c451469185f1889690 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <iomanip> | |
#include <fstream> | |
#include <sstream> | |
#include <string> | |
#include <vector> | |
#include <unordered_map> | |
#include <sys/time.h> | |
// We specify the total number of systems explicitly so that we can initialize data structures before reading in the whole csv file. | |
// This lets us process data as we go through the file rather than caching it all until we've counted the number of lines. | |
static constexpr uint32_t NUM_SYSTEMS = 5428; | |
int main(int argc, const char * argv[]) { | |
timeval start, stop; | |
gettimeofday(&start, NULL); | |
std::ifstream dataFile("jumps.csv"); | |
std::string curLine; | |
if(!std::getline(dataFile, curLine)) { | |
std::cerr << "Couldn't load jumps data.\n"; | |
return 1; | |
} | |
std::cout << "Indexing... "; | |
// Discard the first line. It's just a list of human-readable labels. | |
// Create arrays which will be used to execute the algorithm. We allocate a big block just to avoid calling new 5000 times... | |
uint32_t *cost = new uint32_t[NUM_SYSTEMS*NUM_SYSTEMS]; | |
uint32_t *next = new uint32_t[NUM_SYSTEMS*NUM_SYSTEMS]; | |
for(uint32_t i=0; i<NUM_SYSTEMS*NUM_SYSTEMS; i++) { | |
cost[i] = NUM_SYSTEMS; | |
} | |
for(uint32_t i=0; i<NUM_SYSTEMS; i++) { | |
// Any node can reach itself at no cost. | |
cost[(NUM_SYSTEMS * i) + i] = 0; | |
// Any node should advertise itself as the best possible path to itself. | |
next[(NUM_SYSTEMS * i) + i] = i; | |
} | |
// Hash map for translating big IDs into short IDs. | |
std::unordered_map<uint32_t, uint32_t> forwardIDMap; | |
forwardIDMap.reserve(NUM_SYSTEMS); | |
// Vector for translating short IDs back into their big IDs. The ith entry in this vector is the big ID equivalent of the short ID i. | |
std::vector<uint32_t> backwardIDMap; | |
backwardIDMap.reserve(NUM_SYSTEMS); | |
// The next short ID that we will assign to a big ID. | |
uint32_t nextID = 0; | |
// Read in the data file. | |
while(std::getline(dataFile, curLine)) { | |
size_t commaIndex = curLine.find(','); | |
// Big IDs read from the current line. | |
uint32_t from = atol(curLine.substr(0, commaIndex).c_str()); | |
uint32_t to = atol(curLine.substr(commaIndex + 1).c_str()); | |
// Short IDs which we will assign/discover. | |
uint32_t fromID; | |
uint32_t toID; | |
// Look up (or assign) the short ID for the "from" big ID read from the current line. | |
auto fromResultIter = forwardIDMap.find(from); | |
if(fromResultIter == forwardIDMap.end()) { | |
// Assign a new short ID to this big ID. | |
forwardIDMap[from] = nextID; | |
backwardIDMap.push_back(from); | |
fromID = nextID; | |
nextID++; | |
} else { | |
// This big ID has already been assigned a short ID. | |
fromID = fromResultIter->second; | |
} | |
// Look up (or assign) the short ID for the "to" big ID read from the current line. | |
auto toResultIter = forwardIDMap.find(to); | |
if(toResultIter == forwardIDMap.end()) { | |
// Assign a new short ID to this big ID. | |
forwardIDMap[to] = nextID; | |
backwardIDMap.push_back(to); | |
toID = nextID; | |
nextID++; | |
} else { | |
// This big ID has already been assigned a short ID. | |
toID = toResultIter->second; | |
} | |
// You can get from fromID to toID in one hop, since they're adjacent. | |
cost[(NUM_SYSTEMS * fromID) + toID] = 1; | |
// The fastest way to get from fromID to toID is through toID, since they're adjacent. | |
next[(NUM_SYSTEMS * fromID) + toID] = toID; | |
} | |
// Floyd's algorithm | |
for(uint32_t k=0; k<NUM_SYSTEMS; k++) { | |
#pragma omp parallel for shared(k, cost, next) schedule(dynamic) | |
for(uint32_t i=0; i<NUM_SYSTEMS; i++) { | |
if(i==k) { | |
// It's impossible to find a better path from i to any j in this case. Think about it: | |
// After the previous pass, we have a path from i to j using the first k-1 nodes as intermediates. | |
// If we're to improve on it during this pass (using the first k nodes as intermediates instead of the | |
// first k-1 nodes), then the kth node has to be on the path somewhere. Otherwise, what has changed? | |
// So in the case that i==k, we're looking at paths that start at i (which is k), then go on to use k | |
// later as an intermediate elsewhere in the path, and then finally finish at j. There's clearly no way | |
// that this path can be shorter than the previous best which didn't loop back through k. | |
// This is actually really important because without this fact, the various iterations of i wouldn't | |
// be independent and would require synchronization. | |
continue; | |
} | |
for(uint32_t j=0; j<NUM_SYSTEMS; j++) { | |
uint32_t prev = cost[(NUM_SYSTEMS * i) + j]; | |
uint32_t candidate = cost[(NUM_SYSTEMS * i) + k] + cost[(NUM_SYSTEMS * k) + j]; | |
if(candidate < prev) { | |
cost[(NUM_SYSTEMS * i) + j] = candidate; | |
next[(NUM_SYSTEMS * i) + j] = next[(NUM_SYSTEMS * i) + k]; | |
} | |
} | |
} | |
} | |
gettimeofday(&stop, NULL); | |
double elapsed = (stop.tv_sec - start.tv_sec) + ((double)(stop.tv_usec - start.tv_usec))/1000000; | |
std::cout << "done. Elapsed time " << std::fixed << std::setprecision(2) << elapsed << " seconds.\n"; | |
uint32_t startNode = 30000029, endNode = 30000050; | |
uint32_t startID = forwardIDMap[30000029], endID = forwardIDMap[30000050]; | |
std::cout << "Calculating the quickest route from " << startNode << " to " << endNode << "... "; | |
gettimeofday(&start, NULL); | |
uint32_t cur = startID, count = 0; | |
while(cur != endID) { | |
//std::cout << backwardIDMap[cur] << '\n'; | |
cur = next[(NUM_SYSTEMS * cur) + endID]; | |
count++; | |
} | |
gettimeofday(&stop, NULL); | |
elapsed = (stop.tv_sec - start.tv_sec)*1000 + ((double)(stop.tv_usec - start.tv_usec))/1000; | |
std::cout << "done.\nBuilt a " << count << " hop route in " << std::fixed << std::setprecision(3) << elapsed << " ms.\n"; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment