Last active
August 29, 2015 14:02
-
-
Save HaiyangXu/d93c832e3070159e0170 to your computer and use it in GitHub Desktop.
Kosaraju's algorithm for compute SCC
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <vector> | |
#include <string> | |
#include <sstream> | |
#include <random> | |
#include <assert.h> | |
#include <algorithm> | |
#include <ctime> | |
#include <utility> | |
#include <stack> | |
#include <unordered_map> | |
using namespace std; | |
class Node{ | |
public: | |
Node(int idd=0 ,int ss=-1,int ff=-1):isVisited(false),id(idd),s(ss),f(ff) {} | |
int id; | |
int s; | |
int f; | |
bool isVisited; | |
vector<int> adj; | |
int operator[](size_t index) | |
{ | |
assert(index< adj.size()); | |
return adj[index]; | |
} | |
}; | |
typedef vector< Node > Graph; | |
// read from file with adjacency list representation of undirected graph | |
void getFromtxt(char *name, Graph &graph) | |
{ | |
std::clock_t c_start = std::clock(); | |
cout<<"Begin to read file!"<<endl; | |
ifstream input; | |
input.open(name);//read from a txt file | |
if(input.is_open()) | |
{ | |
int s,t; | |
std::string line; | |
graph.push_back( Node());// graph index begin with 1 not 0 | |
graph.reserve(875715); | |
while (input>> s >> t) | |
{ | |
if(s>=graph.size())graph.resize(s+1); | |
graph[s].adj.push_back(t); | |
} | |
std::clock_t c_end = std::clock(); | |
cout<<"Read file finished!"<<endl; | |
std::cout << "CPU time used: " | |
<< 1000.0 * (c_end-c_start) / CLOCKS_PER_SEC | |
<< " ms\n"; | |
} | |
else cout<<"Cannot open "<<name<<endl; | |
} | |
bool cmp(pair<int,int> &a, pair<int,int> &b) | |
{ | |
return a.first<b.first; | |
} | |
class SCC{ | |
public: | |
void compute(Graph &graph) | |
{ | |
std::clock_t c_start = std::clock(); | |
//prepare data | |
_graph.swap(graph); | |
getReverseGraph(_graph); | |
leader.resize(_graph.size()); | |
finished.resize(_graph.size()); | |
//first pass dfs compute finished | |
dfsloop(_graphrev); | |
getOrder(); | |
//second pass search graph node in the decreased order of their finished score compute by the first pass | |
dfsloop(_graph,2); | |
getSCCs(); | |
std::clock_t c_end = std::clock(); | |
std::cout << "Total time used: " | |
<< 1000.0 * (c_end-c_start) / CLOCKS_PER_SEC | |
<< " ms\n"; | |
} | |
void dfsloop(Graph &graph,int pass=1) | |
{ | |
std::clock_t c_start = std::clock(); | |
total=0; | |
start=-1; | |
if(pass==1)//first pass | |
{ | |
cout<<"first pass"<<endl; | |
int node=graph.size(); | |
while(--node>0) | |
{ | |
if(!graph[node].isVisited) | |
{ | |
start=node; | |
dfs(graph,node); | |
} | |
} | |
} | |
else //second pass | |
{ | |
cout<<"second pass"<<endl; | |
int node=finished.size(); | |
while(--node>0) | |
{ | |
if(!graph[finished[node].second].isVisited) | |
{ | |
start=finished[node].second; | |
dfs(graph,finished[node].second); | |
} | |
} | |
} | |
std::clock_t c_end = std::clock(); | |
std::cout << "CPU time used: " | |
<< 1000.0 * (c_end-c_start) / CLOCKS_PER_SEC | |
<< " ms\n"; | |
} | |
void dfs(Graph &graph,int node) | |
{ | |
graph[node].isVisited=true; | |
graph[node].s=start; | |
for(int i=0;i<graph[node].adj.size();++i) | |
if(!graph[graph[node][i]].isVisited) | |
dfs(graph,graph[node][i]); | |
++total; | |
graph[node].f=total;//finished time | |
//-------------------------------no recursive---------------------------- | |
//stack<Node> nodestack; | |
//nodestack.push(graph[node]); | |
//while(!nodestack.empty()) | |
//{ | |
// Node cur=nodestack.top(); | |
// nodestack.pop(); | |
// cur.isVisited=true; | |
// cur.s=start; | |
// int size=nodestack.size(); | |
// for(int i=0;i<cur.adj.size();++i) | |
// { | |
// if(!graph[cur[i]].isVisited) | |
// nodestack.push(graph[cur[i]]); | |
// } | |
// if(size==nodestack.size()) | |
// { | |
// ++total; | |
// | |
// } | |
//} | |
//-------------------------------------------------------------------------- | |
} | |
void getReverseGraph(Graph &graph) | |
{ | |
cout<<"Reverse Graph begin!"<<endl; | |
std::clock_t c_start = std::clock(); | |
_graphrev.resize(graph.size()); | |
for(int i=1;i<graph.size();++i) | |
{ | |
for(int j=0;j<graph[i].adj.size();++j) | |
_graphrev[graph[i][j]].adj.push_back(i); | |
} | |
std::clock_t c_end = std::clock(); | |
cout<<"Reverse Graph end!"<<endl; | |
std::cout << "CPU time used: " | |
<< 1000.0 * (c_end-c_start) / CLOCKS_PER_SEC | |
<< " ms\n"; | |
} | |
void getOrder() | |
{ | |
if(finished.size()<_graphrev.size()) | |
finished.resize(_graphrev.size()); | |
for(int i=1;i<_graphrev.size();++i) | |
{ | |
finished[i]=std::make_pair<int,int>(_graphrev[i].f,i); | |
} | |
sort(finished.begin(),finished.end(), cmp); | |
} | |
void getSCCs() | |
{ | |
if(leader.size()<_graph.size()) | |
leader.resize(_graph.size()); | |
for(int i=1;i<_graph.size();++i) | |
{ | |
leader[i]=_graph[i].s; | |
} | |
// | |
unordered_map<int,int> umap; | |
for(vector<int>::iterator it=leader.begin()+1;it!=leader.end();++it) | |
{ | |
if(umap.find(*it)!=umap.end()) | |
umap[*it]++; | |
else umap[*it]=1; | |
} | |
for(unordered_map<int,int>::iterator it=umap.begin();it!=umap.end();++it) | |
{ | |
sccNums.push_back(it->second); | |
} | |
sort(sccNums.begin(),sccNums.end(),std::greater<int>()); | |
for(int i=0;i<5&&i<sccNums.size();++i) | |
cout<<sccNums[i]<<endl; | |
/* | |
sort(leader.begin(),leader.end()); | |
vector<int> leaderid; | |
leaderid.resize(leader.size()); | |
vector<int>::iterator last=unique_copy(leader.begin(),leader.end(),leaderid.begin()); | |
for(vector<int>::iterator it=leaderid.begin()+1;it!=last;++it) | |
sccNums.push_back(count(leader.begin()+1,leader.end(),*it)); | |
sort(sccNums.begin(),sccNums.end(),std::greater<int>()); | |
for(int i=0;i<sccNums.size();++i) | |
cout<<sccNums[i]<<endl; | |
*/ | |
} | |
private: | |
Graph _graph; | |
Graph _graphrev; | |
// leader and finished are extra variables | |
vector<int> leader; | |
vector<std::pair<int,int>> finished; | |
int start;//start of dfs | |
int total;//total visited | |
vector<int> sccNums; | |
}; | |
int main(int argv,char **argc) | |
{ | |
Graph graph; | |
getFromtxt("SCC.txt",graph); | |
SCC scc; | |
if(graph.size()>2) | |
scc.compute(graph); | |
system("pause"); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Download the text file here. Zipped version here. (Right click and save link as)
The file contains the edges of a directed graph. Vertices are labeled as positive integers from 1 to 875714. Every row indicates an edge, the vertex label in first column is the tail and the vertex label in second column is the head (recall the graph is directed, and the edges are directed from the first column vertex to the second column vertex). So for example, the 11th row looks liks : "2 47646". This just means that the vertex with label 2 has an outgoing edge to the vertex with label 47646
Your task is to code up the algorithm from the video lectures for computing strongly connected components (SCCs), and to run this algorithm on the given graph.
Output Format: You should output the sizes of the 5 largest SCCs in the given graph, in decreasing order of sizes, separated by commas (avoid any spaces). So if your algorithm computes the sizes of the five largest SCCs to be 500, 400, 300, 200 and 100, then your answer should be "500,400,300,200,100". If your algorithm finds less than 5 SCCs, then write 0 for the remaining terms. Thus, if your algorithm computes only 3 SCCs whose sizes are 400, 300, and 100, then your answer should be "400,300,100,0,0".
WARNING: This is the most challenging programming assignment of the course. Because of the size of the graph you may have to manage memory carefully. The best way to do this depends on your programming language and environment, and we strongly suggest that you exchange tips for doing this on the discussion forums.