Skip to content

Instantly share code, notes, and snippets.

@conradlee
Created November 4, 2011 11:31
Show Gist options
  • Save conradlee/1339145 to your computer and use it in GitHub Desktop.
Save conradlee/1339145 to your computer and use it in GitHub Desktop.
Edgelist parser using python and numpy's mmap
import numpy
import subprocess
weighted_edge_dtype = [("n1", numpy.uint32),("n2", numpy.uint32),("weight", numpy.float64)]
def convert_edgelist_to_mmap(in_filename):
# First determine number of edges because we will need to
# pre-allocate memmap object and that action requires a size
# Use unix's wc (WordCount) to count lines because it is
# quicker than python at parsing text files
wc_output = subprocess.check_output("wc -l " + in_filename, shell=True)
num_edges = int(wc_output.split()[0]) # wc_output looks like "numlines fname\n"
# Save num_edges somewhere---you will need this to load the mmap!
# Let's be clever and save it in the filename so we don't lose it
out_filename = in_filename.split(".")[0] + "_" + str(num_edges) + ".mmap_edgelist"
# Note: because we're using unsigned 32-bit integers, node
# ids must be positive and not exceed 4,294,967,295
fp = numpy.memmap(out_filename, dtype=weighted_edge_dtype, mode="w+", shape=(num_edges,))
with open(in_filename) as infile:
for i, line in enumerate(infile):
n1, n2, weight = line.rstrip("\n").split()
fp[i] = (int(n1), int(n2), float(weight))
return out_filename
def get_mmap_edges(mmap_filename):
num_edges = int(mmap_filename.split(".")[-2].split("_")[-1])
return numpy.memmap(mmap_filename, dtype=weighted_edge_dtype, mode="r", shape=(num_edges,))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment