Created
June 28, 2015 13:13
-
-
Save Btibert3/365fea161e831b389153 to your computer and use it in GitHub Desktop.
Stackoverflow - Load CSV import
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Apply the Constraints | |
CREATE CONSTRAINT ON (s:Student) ASSERT s.pidm IS UNIQUE; | |
CREATE CONSTRAINT ON (v:Vendor) ASSERT v.name IS UNIQUE; | |
CREATE CONSTRAINT ON (t:Topic) ASSERT t.name IS UNIQUE; | |
CREATE CONSTRAINT ON (e:Email) ASSERT e.msgid IS UNIQUE; | |
CREATE CONSTRAINT ON (a:Ability) ASSERT a.name IS UNIQUE; | |
CREATE CONSTRAINT ON (c:Contact) ASSERT c.cid IS UNIQUE; | |
CREATE CONSTRAINT ON (p:Template) ASSERT p.name IS UNIQUE; | |
CREATE CONSTRAINT ON (v:Version) ASSERT v.version IS UNIQUE; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Create the Vendors so we can just match on them - 404 ms, 783 on 2.2 | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/vendors.csv" AS row | |
WITH row | |
MERGE (v:Vendor {name:row.vendors}); | |
// Create the abilities - 164ms, 164 | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/ability.csv" AS row | |
WITH row | |
MERGE (a:Ability {name:row.ability}); | |
// Create the topics - 154 ms, 468 after a huge gap | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/topics.csv" AS row | |
WITH row | |
MERGE (t:Topic {name:row.topics}); | |
// Load the templates - 554 ms, 543 | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/templates.csv" AS row | |
WITH row | |
MATCH (r:Vendor {name:row.vendor}) | |
WITH row, r | |
MERGE (p:Template {name:row.template_clean}) | |
MERGE (v:Version {version:row.template_ver}) | |
MERGE (p)-[:FROM_VERSION]->(v) | |
MERGE (p)-[:CREATED_BY]->(r); | |
// Associate templates with topics - 239 ms, 233 | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/template_topic.csv" AS row | |
WITH row | |
MATCH (p:Template {name:row.template_clean}) | |
MATCH (t:Topic {name:row.topic}) | |
WITH row, p, t | |
MERGE (p)-[:HAS_TOPIC]->(t) | |
// Create the students - 16434 ms, 35964 (77962 students) | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/recs.csv" AS row | |
WITH row | |
MERGE (s:Student {pidm:toInt(row.pidm), | |
hash_pidm:toInt(row.hash_pidm), | |
sid:row.id, | |
race:row.ethn_code, | |
sex:row.sex, | |
major:row.majr_code, | |
rsta:row.rsta_code, | |
sanc:row.sanc_score, | |
sahc:row.sahc_score, | |
hs_state: row.schl_state_code}); | |
// Associate the students with vendors and abilities - 16408 ms, 80404 ms | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/recs.csv" AS row | |
WITH row | |
MATCH (s:Student {pidm: toInt(row.pidm)} ) | |
MATCH (v:Vendor {name: row.vendor} ) | |
MATCH (a:Ability {name: row.ability} ) | |
WITH row, s, v, a | |
MERGE (s)-[:PURCHASED_FROM]->(v) | |
MERGE (s)-[:HAS_ABILITY]->(a); | |
// Create the contacts - 15782 ms | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/cont.csv" AS row | |
WITH row | |
MATCH (s:Student {pidm:toInt(row.pidm)} ) | |
WITH row, s | |
MERGE (c:Contact {code:row.contcode, | |
cid:row.cid, | |
category:row.contcat, | |
seqnum:toInt(row.seqnum), | |
timestamp:toInt(row.timestamp)} ) | |
MERGE (s)-[:HAS_CONTACT]->(c); | |
// First and last contacts - 15959 ms | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/cont.csv" AS row | |
WITH row | |
MATCH (s:Student {pidm:toInt(row.pidm)}) | |
WITH row, s, toInt(row.seqnum) as seq | |
WHERE seq = 1 | |
MATCH (f:Contact {cid:row.first_cont}) | |
MATCH (l:Contact {cid:row.last_cont}) | |
WITH s, f, l | |
MERGE (s)-[:FIRST]->(f) | |
MERGE (s)-[:LAST]->(l); | |
// Create the contact chain - 1351 ms | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/cont.csv" AS row | |
WITH row, toInt(row.seqnum) as seq | |
WHERE seq > 1 | |
MATCH (c:Contact {cid:row.cid}) | |
MATCH (p:Contact {cid:row.prev_contact}) | |
WITH c, p | |
MERGE (p)-[:NEXT]->(c); | |
// BRM email data | |
// About 5 minutes for a load of all data from scratch | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/brm.csv" AS row | |
WITH row | |
MATCH (s:Student {pidm:toInt(row.pidm)}) | |
WITH row, s | |
MERGE (e:Email {refdesc:row.refdesc, | |
subject:row.subject, | |
msgid:row.msgid, | |
commdate:row.commdate, | |
seqnum:toInt(row.seqnum), | |
deliv: toInt(row.delivflag), | |
optout: toInt(row.optoutflag), | |
open_ind: toInt(row.open_ind), | |
opens: toInt(row.opens), | |
click_ind: toInt(row.click_ind), | |
clicks: toInt(row.clicks)} ) | |
MERGE (s)-[:WAS_SENT]->(e); | |
// First and last emails | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/email_first_last.csv" AS row | |
WITH row | |
MATCH (s:Student {pidm:toInt(row.pidm)}) | |
MATCH (f:Email {msgid:row.first_email}) | |
MATCH (l:Email {msgid:row.last_email}) | |
WITH s, f, l | |
MERGE (s)-[:FIRST]->(f) | |
MERGE (s)-[:LAST]->(l); | |
// Create the email chain | |
USING PERIODIC COMMIT 10000 | |
LOAD CSV WITH HEADERS FROM "file:///Users/btibert/Dropbox/Projects/bentley-search-neo4j/data/email_chain.csv" AS row | |
WITH row | |
MATCH (c:Email {cid:row.msgid}) | |
MATCH (p:Email {cid:row.last_email}) | |
WITH c, p | |
MERGE (p)-[:NEXT]->(c); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment