MatMoore · January 12, 2018 16:50
diff --git a/Export graph of GOV.UK b/Export graph of GOV.UK
 # Run in publishing_api_development
 # These queries need reviewing to filter out links to non-live content

 \copy (select l.target_content_id,coalesce(d.content_id, d2.content_id) as source_content_id, l.link_type, l.position, case when l.edition_id is null then 'document' else 'edition' end as link_scope from links l left outer join link_sets ls on l.link_set_id=ls.id left outer join documents d on ls.content_id=d.content_id left outer join editions e on e.id=l.edition_id left outer join documents d2 on d2.id=e.document_id) to 'content-links.csv' with csv header;

 \copy (select d.content_id,d.locale,e.title,e.description,e.document_type,e.analytics_identifier,e.state from documents d join editions e on e.document_id=d.id and e.content_store='live') to 'content.csv' with csv header;

 \copy (select d.content_id, d2.content_id as owning_content_id from documents d join documents d2 on d.owning_document_id=d2.id) to owning_documents.csv with csv header
diff --git a/Load graph of GOV.UK b/Load graph of GOV.UK
 # First copy CSV to some stupid directory like /Users/matmoore/Library/Application\ Support/Neo4j\ Desktop/Application/neo4jDatabases/database-488c5f78-fb53-4b30-bcbb-a5737ee9f59b/installation-3.3.1/import/

 # Create nodes
 using periodic commit
 load csv with headers from "file:///content.csv" as row
 CREATE (:PublishedContentItem {contentId: row.content_id, locale: row.locale, title: row.title, basePath: row.base_path, description: row.description, documentType: row.document_type, analyticsIdentifier: row.analytics_identifier});

 # Create indexes
 CREATE INDEX ON :PublishedContentItem(contentId);
 CREATE INDEX ON :PublishedContentItem(analyticsIdentifier);
 CREATE INDEX ON :PublishedContentItem(basePath);
 CREATE INDEX ON :PublishedContentItem(documentType);

 # Create some relationships.
 # This may be missing stuff.
 USING PERIODIC COMMIT
 LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
 WITH row
 WHERE row.link_type = "taxons"
 MATCH (target:PublishedContentItem {contentId: row.target_content_id})
 MATCH (source:PublishedContentItem {contentId: row.source_content_id})
 MERGE (source)-[:BELONGS_TO_TAXON]->(target);

 USING PERIODIC COMMIT
 LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
 WITH row
 WHERE row.link_type = "parent_taxons"
 MATCH (target:PublishedContentItem {contentId: row.target_content_id})
 MATCH (source:PublishedContentItem {contentId: row.source_content_id})
 MERGE (source)-[:CHILD_TAXON_OF]->(target);

 USING PERIODIC COMMIT
 LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
 WITH row
 WHERE row.link_type = "root_taxon" or row.link_type="root_taxons"
 MATCH (target:PublishedContentItem {contentId: row.target_content_id})
 MATCH (source:PublishedContentItem {contentId: row.source_content_id})
 MERGE (source)-[:CHILD_TAXON_OF]->(target);

 USING PERIODIC COMMIT
 LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
 WITH row
 WHERE row.link_type="children"
 MATCH (target:PublishedContentItem {contentId: row.target_content_id})
 MATCH (source:PublishedContentItem {contentId: row.source_content_id})
 MERGE (target)-[:CHILD_OF]->(source);

 USING PERIODIC COMMIT
 LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
 WITH row
 WHERE row.link_type="mainstream_browse_pages"
 MATCH (target:PublishedContentItem {contentId: row.target_content_id})
 MATCH (source:PublishedContentItem {contentId: row.source_content_id})
 MERGE (source)-[:BELONGS_TO_BROWSE_PAGE]->(target);

 USING PERIODIC COMMIT
 LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
 WITH row
 WHERE row.link_type="parent"
 MATCH (target:PublishedContentItem {contentId: row.target_content_id})
 MATCH (source:PublishedContentItem {contentId: row.source_content_id})
 MERGE (source)-[:PART_OF]->(target);

 USING PERIODIC COMMIT
 LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
 WITH row
 WHERE row.link_type="topics"
 MATCH (target:PublishedContentItem {contentId: row.target_content_id})
 MATCH (source:PublishedContentItem {contentId: row.source_content_id})
 MERGE (source)-[:BELONGS_TO_SPECIALIST_TOPIC]->(target);

 USING PERIODIC COMMIT
 LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
 WITH row
 WHERE row.link_type="policies"
 MATCH (target:PublishedContentItem {contentId: row.target_content_id})
 MATCH (source:PublishedContentItem {contentId: row.source_content_id})
 MERGE (source)-[:BELONGS_TO_POLICY]->(target);

 USING PERIODIC COMMIT
 LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
 WITH row
 WHERE row.link_type="policies"
 MATCH (target:PublishedContentItem {contentId: row.target_content_id})
 MATCH (source:PublishedContentItem {contentId: row.source_content_id})
 MERGE (source)-[:BELONGS_TO_POLICY]->(target);
	# Run in publishing_api_development
	# These queries need reviewing to filter out links to non-live content

	\copy (select l.target_content_id,coalesce(d.content_id, d2.content_id) as source_content_id, l.link_type, l.position, case when l.edition_id is null then 'document' else 'edition' end as link_scope from links l left outer join link_sets ls on l.link_set_id=ls.id left outer join documents d on ls.content_id=d.content_id left outer join editions e on e.id=l.edition_id left outer join documents d2 on d2.id=e.document_id) to 'content-links.csv' with csv header;

	\copy (select d.content_id,d.locale,e.title,e.description,e.document_type,e.analytics_identifier,e.state from documents d join editions e on e.document_id=d.id and e.content_store='live') to 'content.csv' with csv header;

	\copy (select d.content_id, d2.content_id as owning_content_id from documents d join documents d2 on d.owning_document_id=d2.id) to owning_documents.csv with csv header
	# First copy CSV to some stupid directory like /Users/matmoore/Library/Application\ Support/Neo4j\ Desktop/Application/neo4jDatabases/database-488c5f78-fb53-4b30-bcbb-a5737ee9f59b/installation-3.3.1/import/

	# Create nodes
	using periodic commit
	load csv with headers from "file:///content.csv" as row
	CREATE (:PublishedContentItem {contentId: row.content_id, locale: row.locale, title: row.title, basePath: row.base_path, description: row.description, documentType: row.document_type, analyticsIdentifier: row.analytics_identifier});

	# Create indexes
	CREATE INDEX ON :PublishedContentItem(contentId);
	CREATE INDEX ON :PublishedContentItem(analyticsIdentifier);
	CREATE INDEX ON :PublishedContentItem(basePath);
	CREATE INDEX ON :PublishedContentItem(documentType);

	# Create some relationships.
	# This may be missing stuff.
	USING PERIODIC COMMIT
	LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
	WITH row
	WHERE row.link_type = "taxons"
	MATCH (target:PublishedContentItem {contentId: row.target_content_id})
	MATCH (source:PublishedContentItem {contentId: row.source_content_id})
	MERGE (source)-[:BELONGS_TO_TAXON]->(target);

	USING PERIODIC COMMIT
	LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
	WITH row
	WHERE row.link_type = "parent_taxons"
	MATCH (target:PublishedContentItem {contentId: row.target_content_id})
	MATCH (source:PublishedContentItem {contentId: row.source_content_id})
	MERGE (source)-[:CHILD_TAXON_OF]->(target);

	USING PERIODIC COMMIT
	LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
	WITH row
	WHERE row.link_type = "root_taxon" or row.link_type="root_taxons"
	MATCH (target:PublishedContentItem {contentId: row.target_content_id})
	MATCH (source:PublishedContentItem {contentId: row.source_content_id})
	MERGE (source)-[:CHILD_TAXON_OF]->(target);

	USING PERIODIC COMMIT
	LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
	WITH row
	WHERE row.link_type="children"
	MATCH (target:PublishedContentItem {contentId: row.target_content_id})
	MATCH (source:PublishedContentItem {contentId: row.source_content_id})
	MERGE (target)-[:CHILD_OF]->(source);

	USING PERIODIC COMMIT
	LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
	WITH row
	WHERE row.link_type="mainstream_browse_pages"
	MATCH (target:PublishedContentItem {contentId: row.target_content_id})
	MATCH (source:PublishedContentItem {contentId: row.source_content_id})
	MERGE (source)-[:BELONGS_TO_BROWSE_PAGE]->(target);

	USING PERIODIC COMMIT
	LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
	WITH row
	WHERE row.link_type="parent"
	MATCH (target:PublishedContentItem {contentId: row.target_content_id})
	MATCH (source:PublishedContentItem {contentId: row.source_content_id})
	MERGE (source)-[:PART_OF]->(target);

	USING PERIODIC COMMIT
	LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
	WITH row
	WHERE row.link_type="topics"
	MATCH (target:PublishedContentItem {contentId: row.target_content_id})
	MATCH (source:PublishedContentItem {contentId: row.source_content_id})
	MERGE (source)-[:BELONGS_TO_SPECIALIST_TOPIC]->(target);

	USING PERIODIC COMMIT
	LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
	WITH row
	WHERE row.link_type="policies"
	MATCH (target:PublishedContentItem {contentId: row.target_content_id})
	MATCH (source:PublishedContentItem {contentId: row.source_content_id})
	MERGE (source)-[:BELONGS_TO_POLICY]->(target);

	USING PERIODIC COMMIT
	LOAD CSV WITH HEADERS FROM "file:///content-links.csv" AS row
	WITH row
	WHERE row.link_type="policies"
	MATCH (target:PublishedContentItem {contentId: row.target_content_id})
	MATCH (source:PublishedContentItem {contentId: row.source_content_id})
	MERGE (source)-[:BELONGS_TO_POLICY]->(target);