kendallsss · August 29, 2015 14:10
diff --git a/eve-pathfinder.cpp b/eve-pathfinder.cpp
 #include <iostream>
 #include <iomanip>
 #include <fstream>
 #include <sstream>
 #include <string>
 #include <vector>
 #include <unordered_map>
 #include <sys/time.h>

 // We specify the total number of systems explicitly so that we can initialize data structures before reading in the whole csv file.
 // This lets us process data as we go through the file rather than caching it all until we've counted the number of lines.
 static constexpr uint32_t NUM_SYSTEMS = 5428;

 int main(int argc, const char * argv[]) {
    timeval start, stop;
    gettimeofday(&start, NULL);
    
    std::ifstream dataFile("jumps.csv");
    
    std::string curLine;
    if(!std::getline(dataFile, curLine)) {
        std::cerr << "Couldn't load jumps data.\n";
        return 1;
    }
    
    std::cout << "Indexing... ";
    
    // Discard the first line. It's just a list of human-readable labels.
    
    // Create arrays which will be used to execute the algorithm. We allocate a big block just to avoid calling new 5000 times...
    uint32_t *cost = new uint32_t[NUM_SYSTEMS*NUM_SYSTEMS];
    uint32_t *next = new uint32_t[NUM_SYSTEMS*NUM_SYSTEMS];
    
    for(uint32_t i=0; i<NUM_SYSTEMS*NUM_SYSTEMS; i++) {
        cost[i] = NUM_SYSTEMS;
    }
    
    for(uint32_t i=0; i<NUM_SYSTEMS; i++) {
        // Any node can reach itself at no cost.
        cost[(NUM_SYSTEMS * i) + i] = 0;
        
        // Any node should advertise itself as the best possible path to itself.
        next[(NUM_SYSTEMS * i) + i] = i;
    }
    
    // Hash map for translating big IDs into short IDs.
    std::unordered_map<uint32_t, uint32_t> forwardIDMap;
    forwardIDMap.reserve(NUM_SYSTEMS);
    
    // Vector for translating short IDs back into their big IDs. The ith entry in this vector is the big ID equivalent of the short ID i.
    std::vector<uint32_t> backwardIDMap;
    backwardIDMap.reserve(NUM_SYSTEMS);
    
    // The next short ID that we will assign to a big ID.
    uint32_t nextID = 0;
    
    // Read in the data file.
    while(std::getline(dataFile, curLine)) {
        size_t commaIndex = curLine.find(',');
        
        // Big IDs read from the current line.
        uint32_t from = atol(curLine.substr(0, commaIndex).c_str());
        uint32_t to = atol(curLine.substr(commaIndex + 1).c_str());
        
        // Short IDs which we will assign/discover.
        uint32_t fromID;
        uint32_t toID;
        
        // Look up (or assign) the short ID for the "from" big ID read from the current line.
        auto fromResultIter = forwardIDMap.find(from);
        if(fromResultIter == forwardIDMap.end()) {
            // Assign a new short ID to this big ID.
            forwardIDMap[from] = nextID;
            backwardIDMap.push_back(from);
            fromID = nextID;
            nextID++;
        } else {
            // This big ID has already been assigned a short ID.
            fromID = fromResultIter->second;
        }
        
        // Look up (or assign) the short ID for the "to" big ID read from the current line.
        auto toResultIter = forwardIDMap.find(to);
        if(toResultIter == forwardIDMap.end()) {
            // Assign a new short ID to this big ID.
            forwardIDMap[to] = nextID;
            backwardIDMap.push_back(to);
            toID = nextID;
            nextID++;
        } else {
            // This big ID has already been assigned a short ID.
            toID = toResultIter->second;
        }
        
        // You can get from fromID to toID in one hop, since they're adjacent.
        cost[(NUM_SYSTEMS * fromID) + toID] = 1;
        
        // The fastest way to get from fromID to toID is through toID, since they're adjacent.
        next[(NUM_SYSTEMS * fromID) + toID] = toID;
    }
    
    // Floyd's algorithm
    for(uint32_t k=0; k<NUM_SYSTEMS; k++) {
        #pragma omp parallel for shared(k, cost, next) schedule(dynamic)
        for(uint32_t i=0; i<NUM_SYSTEMS; i++) {
            if(i==k) {
                // It's impossible to find a better path from i to any j in this case. Think about it:
                // After the previous pass, we have a path from i to j using the first k-1 nodes as intermediates. 
                // If we're to improve on it during this pass (using the first k nodes as intermediates instead of the 
                // first k-1 nodes), then the kth node has to be on the path somewhere. Otherwise, what has changed?
                // So in the case that i==k, we're looking at paths that start at i (which is k), then go on to use k 
                // later as an intermediate elsewhere in the path, and then finally finish at j. There's clearly no way 
                // that this path can be shorter than the previous best which didn't loop back through k.
                // This is actually really important because without this fact, the various iterations of i wouldn't 
                // be independent and would require synchronization.
                continue;
            }
            for(uint32_t j=0; j<NUM_SYSTEMS; j++) {
                uint32_t prev = cost[(NUM_SYSTEMS * i) + j];
                uint32_t candidate = cost[(NUM_SYSTEMS * i) + k] + cost[(NUM_SYSTEMS * k) + j];
                if(candidate < prev) {
                    cost[(NUM_SYSTEMS * i) + j] = candidate;
                    next[(NUM_SYSTEMS * i) + j] = next[(NUM_SYSTEMS * i) + k];
                }
            }
        }
    }
    
    gettimeofday(&stop, NULL);
    double elapsed = (stop.tv_sec - start.tv_sec) + ((double)(stop.tv_usec - start.tv_usec))/1000000;
    std::cout << "done. Elapsed time " << std::fixed << std::setprecision(2) << elapsed << " seconds.\n";
    
    uint32_t startNode = 30000029, endNode = 30000050;
    uint32_t startID = forwardIDMap[30000029], endID = forwardIDMap[30000050];
    std::cout << "Calculating the quickest route from " << startNode << " to " << endNode << "... ";
    gettimeofday(&start, NULL);
    uint32_t cur = startID, count = 0;
    while(cur != endID) {
        //std::cout << backwardIDMap[cur] << '\n';
        cur = next[(NUM_SYSTEMS * cur) + endID];
        count++;
    }
    gettimeofday(&stop, NULL);
    elapsed = (stop.tv_sec - start.tv_sec)*1000 + ((double)(stop.tv_usec - start.tv_usec))/1000;
    std::cout << "done.\nBuilt a " << count << " hop route in " << std::fixed << std::setprecision(3) << elapsed << " ms.\n";
 }
	#include <iostream>
	#include <iomanip>
	#include <fstream>
	#include <sstream>
	#include <string>
	#include <vector>
	#include <unordered_map>
	#include <sys/time.h>

	// We specify the total number of systems explicitly so that we can initialize data structures before reading in the whole csv file.
	// This lets us process data as we go through the file rather than caching it all until we've counted the number of lines.
	static constexpr uint32_t NUM_SYSTEMS = 5428;

	int main(int argc, const char * argv[]) {
	timeval start, stop;
	gettimeofday(&start, NULL);

	std::ifstream dataFile("jumps.csv");

	std::string curLine;
	if(!std::getline(dataFile, curLine)) {
	std::cerr << "Couldn't load jumps data.\n";
	return 1;
	}

	std::cout << "Indexing... ";

	// Discard the first line. It's just a list of human-readable labels.

	// Create arrays which will be used to execute the algorithm. We allocate a big block just to avoid calling new 5000 times...
	uint32_t cost = new uint32_t[NUM_SYSTEMSNUM_SYSTEMS];
	uint32_t next = new uint32_t[NUM_SYSTEMSNUM_SYSTEMS];

	for(uint32_t i=0; i<NUM_SYSTEMS*NUM_SYSTEMS; i++) {
	cost[i] = NUM_SYSTEMS;
	}

	for(uint32_t i=0; i<NUM_SYSTEMS; i++) {
	// Any node can reach itself at no cost.
	cost[(NUM_SYSTEMS * i) + i] = 0;

	// Any node should advertise itself as the best possible path to itself.
	next[(NUM_SYSTEMS * i) + i] = i;
	}

	// Hash map for translating big IDs into short IDs.
	std::unordered_map<uint32_t, uint32_t> forwardIDMap;
	forwardIDMap.reserve(NUM_SYSTEMS);

	// Vector for translating short IDs back into their big IDs. The ith entry in this vector is the big ID equivalent of the short ID i.
	std::vector<uint32_t> backwardIDMap;
	backwardIDMap.reserve(NUM_SYSTEMS);

	// The next short ID that we will assign to a big ID.
	uint32_t nextID = 0;

	// Read in the data file.
	while(std::getline(dataFile, curLine)) {
	size_t commaIndex = curLine.find(',');

	// Big IDs read from the current line.
	uint32_t from = atol(curLine.substr(0, commaIndex).c_str());
	uint32_t to = atol(curLine.substr(commaIndex + 1).c_str());

	// Short IDs which we will assign/discover.
	uint32_t fromID;
	uint32_t toID;

	// Look up (or assign) the short ID for the "from" big ID read from the current line.
	auto fromResultIter = forwardIDMap.find(from);
	if(fromResultIter == forwardIDMap.end()) {
	// Assign a new short ID to this big ID.
	forwardIDMap[from] = nextID;
	backwardIDMap.push_back(from);
	fromID = nextID;
	nextID++;
	} else {
	// This big ID has already been assigned a short ID.
	fromID = fromResultIter->second;
	}

	// Look up (or assign) the short ID for the "to" big ID read from the current line.
	auto toResultIter = forwardIDMap.find(to);
	if(toResultIter == forwardIDMap.end()) {
	// Assign a new short ID to this big ID.
	forwardIDMap[to] = nextID;
	backwardIDMap.push_back(to);
	toID = nextID;
	nextID++;
	} else {
	// This big ID has already been assigned a short ID.
	toID = toResultIter->second;
	}

	// You can get from fromID to toID in one hop, since they're adjacent.
	cost[(NUM_SYSTEMS * fromID) + toID] = 1;

	// The fastest way to get from fromID to toID is through toID, since they're adjacent.
	next[(NUM_SYSTEMS * fromID) + toID] = toID;
	}

	// Floyd's algorithm
	for(uint32_t k=0; k<NUM_SYSTEMS; k++) {
	#pragma omp parallel for shared(k, cost, next) schedule(dynamic)
	for(uint32_t i=0; i<NUM_SYSTEMS; i++) {
	if(i==k) {
	// It's impossible to find a better path from i to any j in this case. Think about it:
	// After the previous pass, we have a path from i to j using the first k-1 nodes as intermediates.
	// If we're to improve on it during this pass (using the first k nodes as intermediates instead of the
	// first k-1 nodes), then the kth node has to be on the path somewhere. Otherwise, what has changed?
	// So in the case that i==k, we're looking at paths that start at i (which is k), then go on to use k
	// later as an intermediate elsewhere in the path, and then finally finish at j. There's clearly no way
	// that this path can be shorter than the previous best which didn't loop back through k.
	// This is actually really important because without this fact, the various iterations of i wouldn't
	// be independent and would require synchronization.
	continue;
	}
	for(uint32_t j=0; j<NUM_SYSTEMS; j++) {
	uint32_t prev = cost[(NUM_SYSTEMS * i) + j];
	uint32_t candidate = cost[(NUM_SYSTEMS * i) + k] + cost[(NUM_SYSTEMS * k) + j];
	if(candidate < prev) {
	cost[(NUM_SYSTEMS * i) + j] = candidate;
	next[(NUM_SYSTEMS * i) + j] = next[(NUM_SYSTEMS * i) + k];
	}
	}
	}
	}

	gettimeofday(&stop, NULL);
	double elapsed = (stop.tv_sec - start.tv_sec) + ((double)(stop.tv_usec - start.tv_usec))/1000000;
	std::cout << "done. Elapsed time " << std::fixed << std::setprecision(2) << elapsed << " seconds.\n";

	uint32_t startNode = 30000029, endNode = 30000050;
	uint32_t startID = forwardIDMap[30000029], endID = forwardIDMap[30000050];
	std::cout << "Calculating the quickest route from " << startNode << " to " << endNode << "... ";
	gettimeofday(&start, NULL);
	uint32_t cur = startID, count = 0;
	while(cur != endID) {
	//std::cout << backwardIDMap[cur] << '\n';
	cur = next[(NUM_SYSTEMS * cur) + endID];
	count++;
	}
	gettimeofday(&stop, NULL);
	elapsed = (stop.tv_sec - start.tv_sec)*1000 + ((double)(stop.tv_usec - start.tv_usec))/1000;
	std::cout << "done.\nBuilt a " << count << " hop route in " << std::fixed << std::setprecision(3) << elapsed << " ms.\n";
	}