Last active
August 22, 2018 09:55
-
-
Save jvilledieu/82ee3a27b4d3d41a73f2dabbc9f494fe to your computer and use it in GitHub Desktop.
Queries to extend the paradise papers dataset with pagrank, community detection, geocoding, etc
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Check schema | |
CALL db.schema() | |
//Identify countries with limited # of adresses | |
MATCH (a:Address) | |
WITH a, a.countries as countries | |
RETURN countries, count(a) as score | |
ORDER BY score ASC | |
LIMIT 50 | |
//Tag 10 addresses in France | |
MATCH (a:Address) WHERE a.countries="France" WITH a LIMIT 10 | |
CALL apoc.spatial.geocodeOnce(a.address) YIELD location | |
WITH a, location.latitude AS latitude, location.longitude AS longitude, location.description AS description | |
SET a.latitude = latitude, | |
a.longitude = longitude, | |
a.description = description | |
//Tag all addresses in France | |
CALL apoc.periodic.iterate('MATCH (a:Address) WHERE a.countries="France" RETURN a', | |
'CALL apoc.spatial.geocodeOnce(a.name) YIELD location | |
WITH a, location.latitude AS latitude, location.longitude AS longitude, | |
location.description AS description | |
SET a.latitude = latitude, | |
a.longitude = longitude, | |
a.description = description', {batchSize:100, parallel:true}) | |
//Compute PageRank | |
CALL algo.pageRank(null,null,{write:true,writeProperty:'pagerank_g'}) | |
//Detect entities with high PageRank | |
MATCH (a:Officer) | |
WHERE a.pagerank > 0.5 | |
WITH a | |
MATCH (a)-[r]-(b) | |
RETURN a, r, b, a.pagerank as score | |
//Detect communities with Louvain algo | |
CALL algo.louvain( | |
'MATCH (p:Officer) RETURN id(p) as id', | |
'MATCH (p1:Officer)-[:OFFICER_OF]->(:Entity)<-[:OFFICER_OF]-(p2:Officer) | |
RETURN id(p1) as source, id(p2) as target', | |
{graph:'cypher',write:true}); | |
//Top 20 clusters by size | |
MATCH (a:Officer) | |
RETURN a.communityLouvain as community, count(*) as size_of_community | |
ORDER by size_of_community DESC LIMIT 20; | |
//Detect communities with label propagation algo | |
CALL algo.labelPropagation( | |
'MATCH (p:Officer) RETURN id(p) as id', | |
'MATCH (p1:Officer)-[:OFFICER_OF]->(:Entity)<-[:OFFICER_OF]-(p2:Officer) | |
RETURN id(p1) as source, id(p2) as target', | |
"OUT", | |
{graph:'cypher',writeProperty:'communityLabelPropagation',write:true}); | |
//Identify connected components | |
CALL algo.unionFind(null, null, {write:true, partitionProperty:"component",weightProperty:'weight', defaultValue:0.0, threshold:1.0, concurrency:1}) | |
YIELD nodes, setCount, loadMillis, computeMillis, writeMillis; | |
//Top 20 connected components | |
MATCH (a) | |
RETURN a.component as component,count(*) as size_of_component | |
ORDER by size_of_component DESC LIMIT 20; | |
//Officer who are connected to other officers via same address or same company | |
MATCH (a:Officer) | |
WITH a | |
MATCH (a)-[rel1:REGISTERED_ADDRESS]->(b)<-[rel2:REGISTERED_ADDRESS]-(c:Officer), (a)-[rel3:OFFICER_OF|:SHAREHOLDER_OF]->(d:Entity)<-[rel4:OFFICER_OF]-(e:Officer) | |
RETURN a, b, c, d, e, rel1, rel2, rel3, rel4 | |
//Query to retrieve officers connected to an officer at depth of 4 | |
MATCH p = (a:Officer)-[*..4]-(b:Officer) | |
RETURN p | |
//Query to retrieve officers connected to an officer at depth of 4 | |
MATCH (a:Officer) | |
WHERE ID(a) = {{"My param":node}} | |
WITH a | |
MATCH p = (a:Officer)-[*..4]-(b:Officer) | |
WHERE a.communityLouvain = b.communityLouvain | |
RETURN p | |
//Remove homonyms (first name and last name) | |
MATCH (o:Officer) | |
WITH split(toLower(o.name), " ") AS name_parts, o | |
WITH name_parts[0] + " " + name_parts[-1] as name, collect(o.name) AS names, count(*) AS count | |
WHERE count > 1 | |
RETURN name, names, count | |
ORDER BY count DESC | |
//Addresses with multiple registered companies (and lat/long coordinates) | |
MATCH (a:Address)-[]-(b) | |
WITH count(b) as score, a where a.latitude >0 | |
RETURN a.description, score | |
ORDER BY score DESC limit 10 | |
//All the nodes which are part of the same community | |
MATCH (a:Officer)-[*]-(b:Officer) | |
WHERE a.communityLouvain = b.communityLouvain | |
RETURN a, b, * | |
//Add node degree | |
MATCH (a)-[r]-(b) | |
WITH a, count(b) as degree | |
SET a.degree = degree; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment