Skip to content

Instantly share code, notes, and snippets.

@lifthrasiir
Created October 22, 2012 01:56
Show Gist options
  • Save lifthrasiir/3929238 to your computer and use it in GitHub Desktop.
Save lifthrasiir/3929238 to your computer and use it in GitHub Desktop.
SK Planet Code Sprint 2012 Round 1 Submission
// Compile: CPPFLAGS='-W -Wall -O3' make shortest.cpp
//
// Okay, what the heck is this? This is a way to exploit the possible integer overflow
// in the submission system. It tries to generate the paths with their maximum length
// very slightly exceeding 2^32. The input graph has several heavy (read: of a great length)
// 2-cycles, so it is possible to construct compact path by repeating 2-cycles.
#include <cstdio>
#include <cstdlib>
#include <stdint.h>
#include <vector>
#include <deque>
#include <map>
#include <utility>
#include <functional>
#include <boost/heap/fibonacci_heap.hpp>
using namespace std;
using namespace boost::heap;
struct node_t {
int16_t diff;
uint16_t value;
};
#ifndef N
#define N 1000000
#endif
#define INF 100000000000ll // > 47397 * 1000000
#define WRAPOVER 0x100000000ll
node_t *graph[N];
int graphdeg[N];
uint64_t dist[N], realdist[N];
int prev[N];
struct less_dist : public binary_function<int, int, bool> {
bool operator()(const int lhs, const int rhs) const {
return dist[lhs] > dist[rhs];
}
};
fibonacci_heap<int, compare<less_dist> > q;
fibonacci_heap<int, compare<less_dist> >::handle_type handles[N];
vector<pair<pair<int,int>,int> > cycletargets;
int main_find_2_cycles(int /*argc*/, char **argv) {
int limit = atoi(argv[2]);
for (int i = 0; i < N; ++i) {
fread(&graphdeg[i], sizeof(int), 1, stdin);
graph[i] = (node_t*) malloc(graphdeg[i] * sizeof(node_t));
fread(graph[i], sizeof(node_t), graphdeg[i], stdin);
for (int j = 0; j < graphdeg[i]; ++j) {
if (graph[i][j].value >= limit) {
cycletargets.push_back(make_pair(make_pair(i, (i+graph[i][j].diff+N)%N), graph[i][j].value));
}
}
if (i % 10000 == 0) fprintf(stderr, "reading %d out of %d... \r", i, N);
}
fprintf(stderr, "read complete, %d cycle targets\n", (int) cycletargets.size());
for (int u = 0; u < (int) cycletargets.size(); ++u) {
int source = cycletargets[u].first.first;
int target = cycletargets[u].first.second;
fprintf(stderr, "trying to find the shortest cycle starting with %d--%d--...--%d\n", source, target, source);
for (int i = 0; i < N; ++i) {
dist[i] = realdist[i] = INF;
prev[i] = -1;
}
dist[target] = realdist[target] = 0;
q.clear();
for (int i = 0; i < N; ++i) {
handles[i] = q.push(i);
}
while (!q.empty()) {
int u = q.top();
if (u == source) break;
q.pop();
if (dist[u] >= INF) break;
if (q.size() % 10000 == 0) fprintf(stderr, "%d vertices to relax... \r", (int) q.size());
for (int i = 0; i < graphdeg[u]; ++i) {
node_t *node = &graph[u][i];
int v = (u + (int) node->diff + N) % N;
uint64_t alt = dist[u] + 1; // since we are minimizing the length of cycle
if (alt < dist[v]) {
dist[v] = alt;
realdist[v] = dist[u] + node->value;
prev[v] = u;
q.decrease(handles[v]);
}
}
}
deque<int> path;
int cur = source;
while (cur >= 0) {
path.push_front(cur);
cur = prev[cur];
}
path.push_front(source);
if (false) {
printf("%lld %lld ", cycletargets[u].second + realdist[source], (cycletargets[u].second + realdist[source]) / (path.size() - 1));
bool first = true;
for (deque<int>::const_iterator it = path.begin(); it != path.end(); ++it) {
if (first) first = false; else printf(",");
printf("%d", *it);
}
printf("\n");
}
if (path.size() == 3) {
uint64_t weight = cycletargets[u].second + realdist[source];
printf("\t\t{%lld, %d, %d}, {%lld, %d, %d},\n", weight, path[0], path[1], weight, path[1], path[0]);
}
}
return 0;
}
int main(int argc, char **argv) {
if (argc > 2 && strcmp(argv[1], "search") == 0) {
return main_find_2_cycles(argc, argv);
}
for (int i = 0; i < N; ++i) {
fread(&graphdeg[i], sizeof(int), 1, stdin);
graph[i] = (node_t*) malloc(graphdeg[i] * sizeof(node_t));
fread(graph[i], sizeof(node_t), graphdeg[i], stdin);
if (i % 10000 == 0) fprintf(stderr, "reading %d out of %d... \r", i, N);
}
fprintf(stderr, "read complete \n");
struct { uint64_t weight; int a, b; } cycles[] = {
#include "2cycles.h"
};
const int ncycles = sizeof(cycles)/sizeof(*cycles);
// strategy:
// extend one of the path (preferably, the longest in the original) like this:
// start ----------------> cycle_a -> cycle_b -> ... -> cycle_a -> cycle_b -----------------> end (unknown)
// to make the length of the path exceeds WRAPOVER. other paths are kept as is.
// 1. find the shortest path from the starts to cycle_a (and others for the later use)
vector<vector<uint64_t> > prefixdists; // prefixdists[start][end]
vector<vector<int> > prefixprevs;
vector<vector<deque<int> > > prefixpaths;
for (int k = 1; k < argc; ++k) {
fprintf(stderr, "phase 1: calculating the path from start %d to cycle starts\n", atoi(argv[k]));
for (int i = 0; i < N; ++i) {
dist[i] = INF;
prev[i] = -1;
}
dist[atoi(argv[k])] = 0;
q.clear();
for (int i = 0; i < N; ++i) {
handles[i] = q.push(i);
}
while (!q.empty()) {
int u = q.top();
q.pop();
if (dist[u] >= INF) break;
if (q.size() % 10000 == 0) fprintf(stderr, "%d vertices to relax... \r", (int) q.size());
for (int i = 0; i < graphdeg[u]; ++i) {
node_t *node = &graph[u][i];
int v = (u + (int) node->diff + N) % N;
uint64_t alt = dist[u] + node->value;
if (alt < dist[v]) {
dist[v] = alt;
prev[v] = u;
q.decrease(handles[v]);
}
}
}
vector<uint64_t> dists;
vector<deque<int> > paths;
for (int c = 0; c < ncycles; ++c) {
deque<int> path;
int cur = cycles[c].a;
while (cur >= 0) {
path.push_front(cur);
cur = prev[cur];
}
paths.push_back(path);
}
prefixdists.push_back(vector<uint64_t>(dist, dist + N));
prefixprevs.push_back(vector<int>(prev, prev + N));
prefixpaths.push_back(paths);
}
// 2. find the shortest path from the cycle_a to every node
// in this case we don't yet know the end node, so we should keep the entire dist/prev arrays
vector<vector<uint64_t> > suffixdists; // suffixdists[suffixindices[cycle_a]][end]
vector<vector<int> > suffixprevs;
vector<int> suffixindices;
map<int, int> suffixmaps;
for (int c = 0; c < ncycles; ++c) {
if (suffixmaps.count(cycles[c].a) > 0) {
// avoid the duplicate calculation
suffixindices.push_back(suffixmaps[cycles[c].a]);
} else {
fprintf(stderr, "phase 2: calculating the path from cycle start %d to others\n", cycles[c].a);
for (int i = 0; i < N; ++i) {
dist[i] = INF;
prev[i] = -1;
}
dist[cycles[c].a] = 0;
q.clear();
for (int i = 0; i < N; ++i) {
handles[i] = q.push(i);
}
while (!q.empty()) {
int u = q.top();
q.pop();
if (dist[u] >= INF) break;
if (q.size() % 10000 == 0) fprintf(stderr, "%d vertices to relax... \r", (int) q.size());
for (int i = 0; i < graphdeg[u]; ++i) {
node_t *node = &graph[u][i];
int v = (u + (int) node->diff + N) % N;
uint64_t alt = dist[u] + node->value;
if (alt < dist[v]) {
dist[v] = alt;
prev[v] = u;
q.decrease(handles[v]);
}
}
}
suffixindices.push_back(suffixdists.size());
suffixmaps[cycles[c].a] = suffixdists.size();
suffixdists.push_back(vector<uint64_t>(dist, dist + N));
suffixprevs.push_back(vector<int>(prev, prev + N));
}
}
// 3. find the best possible end node
// the length of expanded path is:
// dist(start, cycle_a) + #repeats * dist(cycle_a, cycle_b, cycle_a) + dist(cycle_a, end)
// where #repeats is the minimum value such that the entire length >= WRAPOVER. thus:
// #repeats = ceil((WRAPOVER - dist(start, cycle_a) - dist(cycle_a, end)) / dist(cycle_a, cycle_b, cycle_a))
const int ntargets = (int) prefixdists.size();
uint64_t minmaxdist = INF;
int bestc = -1, bestend = -1, bestk0 = -1;
for (int c = 0; c < ncycles; ++c) {
int cyclea = cycles[c].a;
int cycleb = cycles[c].b;
uint64_t cyclew = cycles[c].weight;
fprintf(stderr, "phase 3: expanding a path using cycle %d <-> %d (current best %lld)\n", cyclea, cycleb, minmaxdist);
for (int end = 0; end < N; ++end) {
uint64_t suffixdist = suffixdists[suffixindices[c]][end];
// try to expand the path for k0 and keep the paths for others
for (int k0 = 0; k0 < ntargets; ++k0) {
uint64_t maxdist = 0;
for (int k = 0; k < ntargets; ++k) {
uint64_t totaldist;
if (k == k0) {
totaldist = prefixdists[k][cyclea] + suffixdist;
totaldist += (WRAPOVER - totaldist + cyclew - 1) / cyclew * cyclew; // poor man's ceiling
totaldist %= WRAPOVER;
} else {
totaldist = prefixdists[k][end];
}
maxdist = max(maxdist, totaldist);
}
if (minmaxdist > maxdist) {
minmaxdist = maxdist;
bestc = c;
bestend = end;
bestk0 = k0;
}
}
}
}
// 4. generate paths using the best cycle and end node
int bestcyclea = cycles[bestc].a;
const vector<uint64_t>& bestsuffixdist = suffixdists[suffixindices[bestc]];
const vector<int>& bestsuffixprev = suffixprevs[suffixindices[bestc]];
fprintf(stderr, "best cycle: %d <-> %d, best end node: %d, best expanded path: %d, best score: %lld truncated\n",
bestcyclea, cycles[bestc].b, bestend, bestk0, minmaxdist);
for (int k = 0; k < ntargets; ++k) {
if (k == bestk0) { // expanded path
uint64_t totaldist = prefixdists[k][bestcyclea] + bestsuffixdist[bestend];
uint64_t nrepeats = (WRAPOVER - totaldist + cycles[bestc].weight - 1) / cycles[bestc].weight;
bool first = true;
for (deque<int>::const_iterator it = prefixpaths[k][bestc].begin(); it != prefixpaths[k][bestc].end(); ++it) {
if (first) first = false; else printf(",");
printf("%d", *it);
}
// the prefix will end with cyclea
while (nrepeats-- > 0) printf(",%d,%d", cycles[bestc].b, cycles[bestc].a);
// the cycle part will end with cyclea
vector<int> revsuffix;
int cur = bestend;
while (cur >= 0) {
revsuffix.push_back(cur);
cur = bestsuffixprev[cur];
}
for (int i = (int) revsuffix.size() - 2; i >= 0; --i) printf(",%d", revsuffix[i]);
printf("\n");
} else { // normal path, uses prefixdists[k] for tracing
const vector<int>& bestprefixprev = prefixprevs[k];
deque<int> path;
int cur = bestend;
while (cur >= 0) {
path.push_front(cur);
cur = bestprefixprev[cur];
}
bool first = true;
for (deque<int>::const_iterator it = path.begin(); it != path.end(); ++it) {
if (first) first = false; else printf(",");
printf("%d", *it);
}
printf("\n");
}
}
}
#!/bin/sh
g++ -O3 -W -Wall preproc.cpp -o preproc
g++ -O3 -W -Wall -DN=1000000 cheat.cpp -o cheat
gzcat input3.tar.gz | ./preproc 1000000 1 > cached_graph
# or:
#tar xzvf input3.tar.gz && ./preproc 1000000 0 < input/input.txt > cached_graph
./cheat search 40000 < cached_graph > 2cycles.h
./cheat 3336 100214 250000 370000 403333 603336 700000 860000 973000 < cached_graph > cheat-result.txt
#include <cstdio>
#include <cstdlib>
#include <stdint.h>
#include <vector>
using namespace std;
struct node_t {
int16_t diff;
uint16_t value;
};
int N;
vector<node_t*> graph;
vector<int> graphdeg;
void add_outedges(int inedge, const vector<int>& outedges, const vector<int>& weights) {
int deg = (int) outedges.size();
node_t *edges = (node_t*) malloc(sizeof(node_t) * deg);
for (int i = 0; i < deg; ++i) {
int diff = outedges[i] - inedge;
edges[i].diff = (diff > N/2 ? diff - N : (diff < -N/2 ? diff + N : diff));
edges[i].value = weights[i];
}
graph[inedge] = edges;
graphdeg[inedge] = deg;
}
int main(int argc, char **argv) {
if (argc < 3) return 1;
N = atoi(argv[1]);
graph.resize(N);
graphdeg.resize(N);
int inedge, outedge, weight;
int lastinedge = 0;
vector<int> outedges, weights;
if (atoi(argv[2])) {
char buf[1024];
fread(buf, sizeof buf, 1, stdin); // tar header
}
while (scanf("%d,%d,%d\n", &inedge, &outedge, &weight) == 3) {
if (inedge != lastinedge) {
add_outedges(lastinedge, outedges, weights);
lastinedge = inedge;
outedges.clear();
weights.clear();
if (inedge % 10000 == 0) fprintf(stderr, "reading %d out of %d...\n", inedge, N);
}
outedges.push_back(outedge);
weights.push_back(weight);
}
add_outedges(lastinedge, outedges, weights);
for (int i = 0; i < N; ++i) {
if (i % 10000 == 0) fprintf(stderr, "writing %d out of %d...\n", i, N);
fwrite(&graphdeg[i], sizeof(int), 1, stdout);
fwrite(graph[i], sizeof(node_t), graphdeg[i], stdout);
}
}
#!/bin/sh
g++ -O3 -W -Wall preproc.cpp -o preproc
g++ -O3 -W -Wall -DN=1000000 shortest.cpp -o shortest
gzcat input3.tar.gz | ./preproc 1000000 1 > cached_graph
# or:
#tar xzvf input3.tar.gz && ./preproc 1000000 0 < input/input.txt > cached_graph
./shortest 3336 100214 250000 370000 403333 603336 700000 860000 973000 < cached_graph > result.txt
// Compile: CPPFLAGS='-W -Wall -O3' make shortest.cpp
//
// Straightforward Dijkstra. Fibonacci heap from Boost. Expected memory usage 2.4GB.
// Most notably, the memory usage was reduced by 50% from the fact that the input graph is sparse
// and mostly circular (low difference between in-vertex and out-vertex).
//
// Input graph characterstics:
// - Number of vertices V = 1'000'000
// - Number of edges E = 600'212'420 (thus E << V^2)
// - Maximal weight = 47'397 (thus uint64_t distance)
// - Maximal difference between two endpoints of edges = 8'232 (thus int16_t diff)
#include <cstdio>
#include <cstdlib>
#include <stdint.h>
#include <vector>
#include <deque>
#include <functional>
#include <boost/heap/fibonacci_heap.hpp>
using namespace std;
using namespace boost::heap;
struct node_t {
int16_t diff;
uint16_t value;
};
#ifndef N
#define N 1000000
#endif
#define INF 100000000000ll // > 47397 * 1000000
node_t *graph[N];
int graphdeg[N];
uint64_t dist[N];
int prev[N];
struct less_dist : public binary_function<int, int, bool> {
bool operator()(const int lhs, const int rhs) const {
return dist[lhs] > dist[rhs];
}
};
fibonacci_heap<int, compare<less_dist> > q;
fibonacci_heap<int, compare<less_dist> >::handle_type handles[N];
vector<vector<int> > dists;
vector<vector<int> > prevs;
int main(int argc, char **argv) {
for (int i = 0; i < N; ++i) {
fread(&graphdeg[i], sizeof(int), 1, stdin);
graph[i] = (node_t*) malloc(graphdeg[i] * sizeof(node_t));
fread(graph[i], sizeof(node_t), graphdeg[i], stdin);
if (i % 10000 == 0) fprintf(stderr, "reading %d out of %d...\n", i, N);
}
fprintf(stderr, "read complete\n");
for (int k = 1; k < argc; ++k) {
int target = atoi(argv[k]);
fprintf(stderr, "processing target %d\n", target);
for (int i = 0; i < N; ++i) {
dist[i] = INF;
prev[i] = -1;
}
dist[target] = 0;
q.clear();
for (int i = 0; i < N; ++i) {
handles[i] = q.push(i);
}
while (!q.empty()) {
int u = q.top();
q.pop();
if (dist[u] >= INF) break;
if (q.size() % 10000 == 0) fprintf(stderr, "%d vertices to relax...\n", (int) q.size());
for (int i = 0; i < graphdeg[u]; ++i) {
node_t *node = &graph[u][i];
int v = (u + (int) node->diff + N) % N;
uint64_t alt = dist[u] + node->value;
if (alt < dist[v]) {
dist[v] = alt;
prev[v] = u;
q.decrease(handles[v]);
}
}
}
char buf[200];
sprintf(buf, "target%d.txt", target);
FILE *fp = fopen(buf, "w");
for (int i = 0; i < N; ++i) {
fprintf(fp, "%lld %d\n", dist[i], prev[i]);
}
fclose(fp);
dists.push_back(vector<int>(dist, dist + N));
prevs.push_back(vector<int>(prev, prev + N));
}
int ntargets = (int) dists.size();
int minmaxdist = 999999999, best = -1;
for (int i = 0; i < N; ++i) {
int maxdist = 0;
for (int j = 0; j < ntargets; ++j) {
maxdist = max(maxdist, dists[j][i]);
}
if (minmaxdist > maxdist) {
minmaxdist = maxdist;
best = i;
}
}
fprintf(stderr, "best score: %d for %d\n", minmaxdist, best);
for (int j = 0; j < ntargets; ++j) {
deque<int> path;
int cur = best;
while (cur >= 0) {
path.push_front(cur);
cur = prevs[j][cur];
}
bool first = true;
for (deque<int>::const_iterator it = path.begin(); it != path.end(); ++it) {
if (first) first = false; else printf(",");
printf("%d", *it);
}
printf("\n");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment