conradlee · November 4, 2011 11:31
diff --git a/mmap_edgelist_parser.py b/mmap_edgelist_parser.py
 import numpy
 import subprocess

 weighted_edge_dtype = [("n1", numpy.uint32),("n2", numpy.uint32),("weight", numpy.float64)]

 def convert_edgelist_to_mmap(in_filename):
    # First determine number of edges because we will need to
    # pre-allocate memmap object and that action requires a size

    # Use unix's wc (WordCount) to count lines because it is
    # quicker than python at parsing text files
    wc_output = subprocess.check_output("wc -l " + in_filename, shell=True)
    num_edges = int(wc_output.split()[0]) # wc_output looks like "numlines fname\n"

    # Save num_edges somewhere---you will need this to load the mmap!
    # Let's be clever and save it in the filename so we don't lose it
    out_filename = in_filename.split(".")[0] + "_" + str(num_edges) + ".mmap_edgelist"
    
    # Note: because we're using unsigned 32-bit integers, node 
    # ids must be positive and not exceed 4,294,967,295
    fp = numpy.memmap(out_filename, dtype=weighted_edge_dtype, mode="w+", shape=(num_edges,))

    with open(in_filename) as infile:
        for i, line in enumerate(infile):
            n1, n2, weight = line.rstrip("\n").split()
            fp[i] = (int(n1), int(n2), float(weight))
    return out_filename

 def get_mmap_edges(mmap_filename):
    num_edges = int(mmap_filename.split(".")[-2].split("_")[-1])
    return numpy.memmap(mmap_filename, dtype=weighted_edge_dtype, mode="r", shape=(num_edges,))
	import numpy
	import subprocess

	weighted_edge_dtype = [("n1", numpy.uint32),("n2", numpy.uint32),("weight", numpy.float64)]

	def convert_edgelist_to_mmap(in_filename):
	# First determine number of edges because we will need to
	# pre-allocate memmap object and that action requires a size

	# Use unix's wc (WordCount) to count lines because it is
	# quicker than python at parsing text files
	wc_output = subprocess.check_output("wc -l " + in_filename, shell=True)
	num_edges = int(wc_output.split()[0]) # wc_output looks like "numlines fname\n"

	# Save num_edges somewhere---you will need this to load the mmap!
	# Let's be clever and save it in the filename so we don't lose it
	out_filename = in_filename.split(".")[0] + "_" + str(num_edges) + ".mmap_edgelist"

	# Note: because we're using unsigned 32-bit integers, node
	# ids must be positive and not exceed 4,294,967,295
	fp = numpy.memmap(out_filename, dtype=weighted_edge_dtype, mode="w+", shape=(num_edges,))

	with open(in_filename) as infile:
	for i, line in enumerate(infile):
	n1, n2, weight = line.rstrip("\n").split()
	fp[i] = (int(n1), int(n2), float(weight))
	return out_filename

	def get_mmap_edges(mmap_filename):
	num_edges = int(mmap_filename.split(".")[-2].split("_")[-1])
	return numpy.memmap(mmap_filename, dtype=weighted_edge_dtype, mode="r", shape=(num_edges,))