Last active
January 12, 2018 16:50
-
-
Save MatMoore/b478226c010cc0ad7582956e7ddd3591 to your computer and use it in GitHub Desktop.
Building a graph of GOV.UK
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Run in publishing_api_development | |
# These queries need reviewing to filter out links to non-live content | |
\copy (select l.target_content_id,coalesce(d.content_id, d2.content_id) as source_content_id, l.link_type, l.position, case when l.edition_id is null then 'document' else 'edition' end as link_scope from links l left outer join link_sets ls on l.link_set_id=ls.id left outer join documents d on ls.content_id=d.content_id left outer join editions e on e.id=l.edition_id left outer join documents d2 on d2.id=e.document_id) to 'content-links.csv' with csv header; | |
\copy (select d.content_id,d.locale,e.title,e.description,e.document_type,e.analytics_identifier,e.state from documents d join editions e on e.document_id=d.id and e.content_store='live') to 'content.csv' with csv header; | |
\copy (select d.content_id, d2.content_id as owning_content_id from documents d join documents d2 on d.owning_document_id=d2.id) to owning_documents.csv with csv header |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# First copy CSV to some stupid directory like /Users/matmoore/Library/Application\ Support/Neo4j\ Desktop/Application/neo4jDatabases/database-488c5f78-fb53-4b30-bcbb-a5737ee9f59b/installation-3.3.1/import/ | |
# Create nodes | |
using periodic commit | |
load csv with headers from "file:///content.csv" as row | |
CREATE (:PublishedContentItem {contentId: row.content_id, locale: row.locale, title: row.title, basePath: row.base_path, description: row.description, documentType: row.document_type, analyticsIdentifier: row.analytics_identifier}); | |
# Create indexes | |
CREATE INDEX ON :PublishedContentItem(contentId); | |
CREATE INDEX ON :PublishedContentItem(analyticsIdentifier); | |
CREATE INDEX ON :PublishedContentItem(basePath); | |
CREATE INDEX ON :PublishedContentItem(documentType); | |
# Create some relationships. | |
# This may be missing stuff. | |
USING PERIODIC COMMIT | |
LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row | |
WITH row | |
WHERE row.link_type = "taxons" | |
MATCH (target:PublishedContentItem {contentId: row.target_content_id}) | |
MATCH (source:PublishedContentItem {contentId: row.source_content_id}) | |
MERGE (source)-[:BELONGS_TO_TAXON]->(target); | |
USING PERIODIC COMMIT | |
LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row | |
WITH row | |
WHERE row.link_type = "parent_taxons" | |
MATCH (target:PublishedContentItem {contentId: row.target_content_id}) | |
MATCH (source:PublishedContentItem {contentId: row.source_content_id}) | |
MERGE (source)-[:CHILD_TAXON_OF]->(target); | |
USING PERIODIC COMMIT | |
LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row | |
WITH row | |
WHERE row.link_type = "root_taxon" or row.link_type="root_taxons" | |
MATCH (target:PublishedContentItem {contentId: row.target_content_id}) | |
MATCH (source:PublishedContentItem {contentId: row.source_content_id}) | |
MERGE (source)-[:CHILD_TAXON_OF]->(target); | |
USING PERIODIC COMMIT | |
LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row | |
WITH row | |
WHERE row.link_type="children" | |
MATCH (target:PublishedContentItem {contentId: row.target_content_id}) | |
MATCH (source:PublishedContentItem {contentId: row.source_content_id}) | |
MERGE (target)-[:CHILD_OF]->(source); | |
USING PERIODIC COMMIT | |
LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row | |
WITH row | |
WHERE row.link_type="mainstream_browse_pages" | |
MATCH (target:PublishedContentItem {contentId: row.target_content_id}) | |
MATCH (source:PublishedContentItem {contentId: row.source_content_id}) | |
MERGE (source)-[:BELONGS_TO_BROWSE_PAGE]->(target); | |
USING PERIODIC COMMIT | |
LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row | |
WITH row | |
WHERE row.link_type="parent" | |
MATCH (target:PublishedContentItem {contentId: row.target_content_id}) | |
MATCH (source:PublishedContentItem {contentId: row.source_content_id}) | |
MERGE (source)-[:PART_OF]->(target); | |
USING PERIODIC COMMIT | |
LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row | |
WITH row | |
WHERE row.link_type="topics" | |
MATCH (target:PublishedContentItem {contentId: row.target_content_id}) | |
MATCH (source:PublishedContentItem {contentId: row.source_content_id}) | |
MERGE (source)-[:BELONGS_TO_SPECIALIST_TOPIC]->(target); | |
USING PERIODIC COMMIT | |
LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row | |
WITH row | |
WHERE row.link_type="policies" | |
MATCH (target:PublishedContentItem {contentId: row.target_content_id}) | |
MATCH (source:PublishedContentItem {contentId: row.source_content_id}) | |
MERGE (source)-[:BELONGS_TO_POLICY]->(target); | |
USING PERIODIC COMMIT | |
LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row | |
WITH row | |
WHERE row.link_type="policies" | |
MATCH (target:PublishedContentItem {contentId: row.target_content_id}) | |
MATCH (source:PublishedContentItem {contentId: row.source_content_id}) | |
MERGE (source)-[:BELONGS_TO_POLICY]->(target); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment