Created
August 24, 2016 21:17
-
-
Save johnymontana/886688399d4f72c83ff2665476e0566c to your computer and use it in GitHub Desktop.
Import script for Enron emails into Neo4j
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE CONSTRAINT ON (u:User) ASSERT u.eid IS UNIQUE; | |
CREATE CONSTRAINT ON (f:Folder) ASSERT f.name IS UNIQUE; | |
CREATE CONSTRAINT ON (m:Message) ASSERT m.mid IS UNIQUE; | |
CREATE INDEX ON :User(email); | |
LOAD CSV WITH HEADERS FROM "file:///employeelist.csv" AS row | |
MERGE (u:User {eid: row.eid}) | |
SET u.firstName = row.firstName, | |
u.lastName = row.lastName, | |
u.email = row.Email_id; | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///message.csv" AS row | |
MERGE (m:Message {mid: row.mid}) | |
SET m.body = row.body, | |
m.subject = row.subject, | |
m.message_id = row.message_id, | |
m.date = row.date, | |
m.sender = row.sender | |
MERGE (u:User {email: row.sender}); | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///message.csv" AS row | |
MATCH (m:Message {mid: row.mid}) | |
MATCH (u:User {email: row.sender}) | |
MERGE (u)-[r:SENT]->(m) | |
SET r.date = row.date | |
WITH m, row WHERE row.folder IS NOT NULL | |
MERGE (f:Folder {name: row.folder}) | |
MERGE (m)-[:IN_FOLDER]->(f); | |
LOAD CSV WITH HEADERS FROM "file:///recipientinfo.csv" AS row | |
MATCH (m:Message) WHERE m.mid = row.mid | |
MERGE (u:User {email: row.rvalue}) | |
WITH row, m, u, CASE row.rtype WHEN "TO" THEN [1] ELSE [] END AS to | |
WITH row, m, u, to, CASE row.rtype WHEN "CC" THEN [1] ELSE [] END AS cc | |
WITH row, m, u, to, cc, CASE row.rtype WHEN "BCC" THEN [1] ELSE [] END AS bcc | |
FOREACH ( _ IN to | MERGE (m)-[:TO]->(u) ) | |
FOREACH ( _ IN cc | MERGE (m)-[:CC]->(u) ) | |
FOREACH ( _ IN bcc | MERGE (m)-[:BCC]->(u) ); |
Hi! I would also be interested in having access to the cleaned Database. Or even to a Neo4j dump!
not cleaned: https://www.cs.cmu.edu/~enron/
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi, Can you point me to the dataset matching version to this Cypher Query?