Last active
October 3, 2020 19:42
-
-
Save mikeblum/c69b2af5a6409651a58a4f01e77c9e9a to your computer and use it in GitHub Desktop.
Import XKCD Into Neo4j
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CALL apoc.periodic.commit(" | |
OPTIONAL MATCH (current:Xkcd) WITH current ORDER BY current.id DESC LIMIT 1 | |
OPTIONAL MATCH (oldest:Xkcd) WITH current, oldest ORDER BY oldest.id ASC LIMIT 1 | |
WITH current.id AS current_issue, | |
CASE | |
WHEN oldest IS NULL THEN 1 | |
WHEN oldest.id = 1 THEN current.id END | |
AS oldest_issue, 'https://xkcd.com/info.0.json' AS uri | |
CALL apoc.load.jsonParams(uri, null, null) | |
YIELD value WITH value.num AS latest_issue, current_issue, oldest_issue | |
WITH latest_issue, current_issue, oldest_issue, | |
CASE WHEN oldest_issue = 1 AND latest_issue = current_issue | |
THEN [] | |
ELSE RANGE(latest_issue, oldest_issue, -1) END | |
AS issue_ids | |
UNWIND issue_ids AS issue_id | |
WITH latest_issue, oldest_issue, 'https://xkcd.com/' + issue_id + '/info.0.json' AS uri | |
CALL apoc.load.jsonParams(uri, null, null, null, {failOnError: false}) | |
YIELD value | |
MERGE (issue:Xkcd {id: value.num}) | |
SET issue.link = CASE WHEN TRIM(value.link) <> '' THEN TRIM(value.link) ELSE NULL END, | |
issue.news = CASE WHEN TRIM(value.news) <> '' THEN TRIM(value.news) ELSE NULL END, | |
issue.safe_title = value.safe_title, | |
issue.transcript = CASE WHEN TRIM(value.transcript) <> '' THEN TRIM(value.transcript) ELSE NULL END, | |
issue.alt = CASE WHEN TRIM(value.alt) <> '' THEN TRIM(value.alt) ELSE NULL END, | |
issue.img = value.img, | |
issue.title = value.title, | |
issue.date = date({year: toInteger(value.year), month: toInteger(value.month), day: toInteger(value.day)}) | |
WITH issue, latest_issue | |
OPTIONAL MATCH (xkcd:Xkcd) | |
RETURN latest_issue - COUNT(xkcd);") YIELD failedBatches RETURN failedBatches; | |
// indexes | |
WITH 1 AS schema | |
WHERE NOT apoc.schema.node.indexExists('Xkcd', ['id']) | |
CALL db.createIndex('xkcd_id', ['Xkcd'], ['id'], 'native-btree-1.0') YIELD name, labels, properties | |
RETURN name, labels, properties; | |
WITH 1 AS schema | |
WHERE NOT apoc.schema.node.indexExists('Xkcd', ['date']) | |
CALL db.createIndex('xkcd_date', ['Xkcd'], ['date'], 'native-btree-1.0') YIELD name, labels, properties | |
RETURN name, labels, properties; | |
WITH 1 AS schema | |
WHERE NOT apoc.schema.node.indexExists('Xkcd', ['title', 'safe_title', 'transcript', 'alt']) | |
CALL db.index.fulltext.createNodeIndex("titlesAndDescriptions",["Xkcd"],["title", "safe_title", "transcript", "alt"]) | |
RETURN schema; | |
// Relationships | |
MATCH (issue:Xkcd) | |
WITH issue ORDER BY issue.id ASC | |
WITH COLLECT(issue.id) AS issues | |
CALL apoc.coll.zipToRows(issues, TAIL(issues)) YIELD value AS pair | |
MATCH (a:Xkcd {id: pair[0]}) WITH pair, a | |
MATCH (b:Xkcd {id: pair[1]}) WHERE a.id <> pair[1] | |
MERGE (a)-[:NEXT]->(b) | |
WITH a, b | |
MERGE (a)<-[:PREV]-(b); | |
// find the latest issue | |
MATCH (xkcd:Xkcd) WHERE NOT (xkcd)-[:NEXT]->() RETURN xkcd; | |
// find the oldest issue | |
MATCH (xkcd:Xkcd) WHERE NOT (xkcd)-[:PREV]->() RETURN xkcd; | |
// delete the latest issue | |
MATCH (latest:Xkcd) WHERE NOT (latest)-[:NEXT]->() WITH latest LIMIT 1 DETACH DELETE latest; | |
// full-text search across titles, alt-text, etc | |
CALL db.index.fulltext.queryNodes("titlesAndDescriptions", "giraffes") YIELD node, score | |
RETURN node.title, node.alt, score; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment