Last active
September 17, 2021 09:32
-
-
Save iwiwi/5351417 to your computer and use it in GitHub Desktop.
Download and decode WebGraph format graphs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# | |
# http://webgraph.dsi.unimi.it/ | |
# | |
# Usage (example): | |
# | |
# % ls ~/Desktop/WebGraph | |
# enron-nat.graph enron-nat.properties | |
# | |
# % ./decode.sh ~/Desktop/WebGraph/enron-nat | |
# | |
# % ls ~/Desktop/WebGraph | |
# enron-nat.graph enron-nat.offsets enron-nat.txt | |
# enron-nat.obl enron-nat.properties | |
# | |
# CP=`ls | ruby -e "puts \\$stdin.read.gsub(/\s+/, ':')"` | |
CP=`ls -w 1000000000 --format=commas | sed 's/, /:/g'` | |
javac -cp $CP WebGraphDecoder.java | |
java -cp $CP it.unimi.dsi.webgraph.BVGraph -o -O -L $1 && | |
java -cp $CP:. WebGraphDecoder $1 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# http://law.di.unimi.it/datasets.php | |
# | |
# Usage: | |
# % download.sh http://data.law.di.unimi.it/webdata/cnr-2000/cnr-2000 | |
# | |
for ext in .properties .graph .md5sums; do | |
wget -c $1$ext | |
done |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import it.unimi.dsi.fastutil.ints.IntArrayFIFOQueue; | |
import it.unimi.dsi.fastutil.ints.IntArrays; | |
import it.unimi.dsi.logging.ProgressLogger; | |
import it.unimi.dsi.webgraph.GraphClassParser; | |
import it.unimi.dsi.webgraph.ImmutableGraph; | |
import it.unimi.dsi.webgraph.LazyIntIterator; | |
import java.io.*; | |
import java.util.*; | |
public class WebGraphDecoder { | |
static public void main(String arg[]) throws Exception { | |
ImmutableGraph graph = ImmutableGraph.load(arg[0]); | |
BufferedWriter bw = new BufferedWriter(new FileWriter(arg[0] + ".tsv")); | |
int num_v = graph.numNodes(); | |
System.out.printf("Vertices: %d\n", num_v); | |
System.out.printf("Edges: %d\n", graph.numArcs()); | |
int num_e = 0; | |
for (int v = 0; v < num_v; ++v) { | |
LazyIntIterator successors = graph.successors(v); | |
for (int i = 0; i < graph.outdegree(v); ++i) { | |
int w = successors.nextInt(); | |
bw.write(Integer.toString(v)); | |
bw.write("\t"); | |
bw.write(Integer.toString(w)); | |
bw.write("\n"); | |
++num_e; | |
} | |
} | |
bw.flush(); | |
System.out.printf("Output Edges: %d\n", num_e); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment