Last active
August 29, 2015 14:09
-
-
Save technige/c61c12a38ca216076974 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- encoding: utf-8 -*- | |
from __future__ import print_function | |
import random | |
from time import time | |
from py2neo import Graph, GraphError | |
CONSONANTS = "bcdfghjklmnprstvwz" | |
VOWELS = "aeiou" | |
CREATE_NODE = """\ | |
CREATE (a:Person) | |
SET a = {A} | |
RETURN a | |
""" | |
CREATE_UNIQUE_RELATIONSHIP = """\ | |
MATCH (a:Person), (b:Person) | |
WHERE a.user_id = {A} AND b.user_id = {B} | |
CREATE UNIQUE (a)-[ab:FOLLOWS]->(b) | |
RETURN ab | |
""" | |
def random_name_sequence(): | |
while True: | |
words = [] | |
for n in range(2): | |
word = [random.choice(CONSONANTS).upper()] | |
for syllable in range(random.randint(1, 4)): | |
word.append(random.choice(VOWELS)) | |
word.append(random.choice(CONSONANTS)) | |
words.append("".join(word)) | |
yield " ".join(words) | |
class RandomGraphGenerator(object): | |
def __init__(self, graph): | |
self.graph = graph | |
try: | |
self.graph.schema.create_uniqueness_constraint("Person", "user_id") | |
except GraphError: | |
pass | |
self.count = 0 | |
self.names = random_name_sequence() | |
def create_nodes(self, count): | |
""" Create a number of nodes in a single Cypher transaction. | |
""" | |
tx = self.graph.cypher.begin() | |
for i in range(count): | |
self.count += 1 | |
parameters = { | |
"A": { | |
"user_id": self.count, | |
"name": next(self.names), | |
"born": random.randint(1900, 1999), | |
} | |
} | |
tx.append(CREATE_NODE, parameters) | |
tx.commit() | |
def create_unique_relationships(self, count): | |
""" Create a number of unique relationships in a single Cypher transaction. | |
""" | |
tx = self.graph.cypher.begin() | |
for i in range(count): | |
start_user_id = random.randint(1, self.count) | |
end_user_id = start_user_id | |
while end_user_id == start_user_id: | |
end_user_id = random.randint(1, self.count) | |
parameters = { | |
"A": start_user_id, | |
"B": end_user_id, | |
} | |
tx.append(CREATE_UNIQUE_RELATIONSHIP, parameters) | |
tx.commit() | |
def main(): | |
total = 120000 | |
tx_size = 1000 | |
graph = Graph() | |
generator = RandomGraphGenerator(graph) | |
print("Creating %d nodes and merging %d relationships in " | |
"batches of %d" % (total, total, tx_size)) | |
t0 = time() | |
for i in range(total // tx_size): | |
# Create nodes | |
t1 = time() | |
generator.create_nodes(tx_size) | |
t2 = time() | |
print("Created %d nodes in %f seconds" % (tx_size, t2 - t1)) | |
# Create relationships | |
t3 = time() | |
generator.create_unique_relationships(tx_size) | |
t4 = time() | |
print("Merged %d relationships in %f seconds" % (tx_size, t4 - t3)) | |
t5 = time() | |
print("Entire bulk import took %f seconds" % (t5 - t0)) | |
if __name__ == "__main__": | |
# Run this script against a fresh database then use the browser to explore | |
# the data created with a query such as `MATCH (p:Person {user_id:1}) RETURN p` | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment