Skip to content

Instantly share code, notes, and snippets.

@mikeblum
Last active October 3, 2020 19:42
Show Gist options
  • Save mikeblum/c69b2af5a6409651a58a4f01e77c9e9a to your computer and use it in GitHub Desktop.
Save mikeblum/c69b2af5a6409651a58a4f01e77c9e9a to your computer and use it in GitHub Desktop.
Import XKCD Into Neo4j
CALL apoc.periodic.commit("
OPTIONAL MATCH (current:Xkcd) WITH current ORDER BY current.id DESC LIMIT 1
OPTIONAL MATCH (oldest:Xkcd) WITH current, oldest ORDER BY oldest.id ASC LIMIT 1
WITH current.id AS current_issue,
CASE
WHEN oldest IS NULL THEN 1
WHEN oldest.id = 1 THEN current.id END
AS oldest_issue, 'https://xkcd.com/info.0.json' AS uri
CALL apoc.load.jsonParams(uri, null, null)
YIELD value WITH value.num AS latest_issue, current_issue, oldest_issue
WITH latest_issue, current_issue, oldest_issue,
CASE WHEN oldest_issue = 1 AND latest_issue = current_issue
THEN []
ELSE RANGE(latest_issue, oldest_issue, -1) END
AS issue_ids
UNWIND issue_ids AS issue_id
WITH latest_issue, oldest_issue, 'https://xkcd.com/' + issue_id + '/info.0.json' AS uri
CALL apoc.load.jsonParams(uri, null, null, null, {failOnError: false})
YIELD value
MERGE (issue:Xkcd {id: value.num})
SET issue.link = CASE WHEN TRIM(value.link) <> '' THEN TRIM(value.link) ELSE NULL END,
issue.news = CASE WHEN TRIM(value.news) <> '' THEN TRIM(value.news) ELSE NULL END,
issue.safe_title = value.safe_title,
issue.transcript = CASE WHEN TRIM(value.transcript) <> '' THEN TRIM(value.transcript) ELSE NULL END,
issue.alt = CASE WHEN TRIM(value.alt) <> '' THEN TRIM(value.alt) ELSE NULL END,
issue.img = value.img,
issue.title = value.title,
issue.date = date({year: toInteger(value.year), month: toInteger(value.month), day: toInteger(value.day)})
WITH issue, latest_issue
OPTIONAL MATCH (xkcd:Xkcd)
RETURN latest_issue - COUNT(xkcd);") YIELD failedBatches RETURN failedBatches;
// indexes
WITH 1 AS schema
WHERE NOT apoc.schema.node.indexExists('Xkcd', ['id'])
CALL db.createIndex('xkcd_id', ['Xkcd'], ['id'], 'native-btree-1.0') YIELD name, labels, properties
RETURN name, labels, properties;
WITH 1 AS schema
WHERE NOT apoc.schema.node.indexExists('Xkcd', ['date'])
CALL db.createIndex('xkcd_date', ['Xkcd'], ['date'], 'native-btree-1.0') YIELD name, labels, properties
RETURN name, labels, properties;
WITH 1 AS schema
WHERE NOT apoc.schema.node.indexExists('Xkcd', ['title', 'safe_title', 'transcript', 'alt'])
CALL db.index.fulltext.createNodeIndex("titlesAndDescriptions",["Xkcd"],["title", "safe_title", "transcript", "alt"])
RETURN schema;
// Relationships
MATCH (issue:Xkcd)
WITH issue ORDER BY issue.id ASC
WITH COLLECT(issue.id) AS issues
CALL apoc.coll.zipToRows(issues, TAIL(issues)) YIELD value AS pair
MATCH (a:Xkcd {id: pair[0]}) WITH pair, a
MATCH (b:Xkcd {id: pair[1]}) WHERE a.id <> pair[1]
MERGE (a)-[:NEXT]->(b)
WITH a, b
MERGE (a)<-[:PREV]-(b);
// find the latest issue
MATCH (xkcd:Xkcd) WHERE NOT (xkcd)-[:NEXT]->() RETURN xkcd;
// find the oldest issue
MATCH (xkcd:Xkcd) WHERE NOT (xkcd)-[:PREV]->() RETURN xkcd;
// delete the latest issue
MATCH (latest:Xkcd) WHERE NOT (latest)-[:NEXT]->() WITH latest LIMIT 1 DETACH DELETE latest;
// full-text search across titles, alt-text, etc
CALL db.index.fulltext.queryNodes("titlesAndDescriptions", "giraffes") YIELD node, score
RETURN node.title, node.alt, score;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment