Last active
August 29, 2015 14:08
-
-
Save jvilledieu/bd7ee8e0df479f65d51a to your computer and use it in GitHub Desktop.
Cypher queries for Crunchbase. This is used to create analyse Crunch via Neo4j (see blog post: http://linkurio.us/the-crunchbase-graph-data-modelling/). The data remains the property of Crunchbase and its use is limited by the following license: https://info.crunchbase.com/docs/licensing-policy-v1/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//What is the biggest startup scene? | |
MATCH (a:CITY)<-[:HAS_CITY]-(b:COMPANY) | |
RETURN a.name as city, count(b) as score | |
ORDER BY score DESC | |
LIMIT 5 | |
//What is the market positioning of startups in San Francisco | |
MATCH (a:CITY {name:'San Francisco'})<-[:HAS_CITY]-(b:COMPANY) | |
WITH b | |
MATCH b-[:HAS_MARKET]->(c:MARKET) | |
RETURN c.name as market, count(c) as score | |
ORDER BY score DESC | |
LIMIT 5 | |
//The market positioning of French startups | |
MATCH (a:CITY {name:'Paris'})<-[:HAS_CITY]-(b:COMPANY)-[:HAS_MARKET]->(d:MARKET) | |
RETURN d.name, count(d) as score | |
ORDER BY score DESC | |
//VC-backed operating startups in Paris and their investors | |
MATCH (a:CITY {name:"Paris"})<-[:HAS_CITY]-(b:COMPANY) | |
WITH b | |
MATCH b-[:HAS_STATUS]->(a:STATUS {name:'operating'}), b-[:HAS_BEEN_FUNDED]->(d:FUNDING_ROUND) | |
WITH b, d | |
MATCH d<-[:HAS_FUNDED]-(investor:INVESTOR), d-[:HAS_ROUND_TYPE]->(e:ROUND_TYPE {name: 'seed'}) | |
RETURN b, collect(investor), count(investor) as score | |
ORDER BY score DESC | |
LIMIT 5 | |
//Investors who do seeds in SF | |
MATCH (a:CITY {name:'San Francisco'})<-[:HAS_CITY]-(b:COMPANY)-[:HAS_BEEN_FUNDED]->(c:FUNDING_ROUND)<-[:HAS_FUNDED]-(d:INVESTOR) | |
WITH b, c, d | |
MATCH c-[:HAS_ROUND_TYPE]->(e:ROUND_TYPE {name:'seed'}) | |
RETURN d.name as potential_investor, count(c) as count | |
ORDER BY count DESC | |
LIMIT 5 | |
//What do seed-stage startups do in France? | |
MATCH (a:CITY {name:'Paris'})<-[:HAS_CITY]-(b:COMPANY)-[:HAS_BEEN_FUNDED]->(c:FUNDING_ROUND)-[:HAS_ROUND_TYPE]->(e:ROUND_TYPE {name:'seed'}) | |
WITH b,c | |
MATCH b-[:HAS_MARKET]->(f:MARKET) | |
RETURN f.name as market , count(c)as score | |
ORDER BY score DESC | |
LIMIT 10 | |
//Investors who like early-stage Paris-based startups | |
MATCH (a:CITY {name:'Paris'})<-[:HAS_CITY]-(b:COMPANY)-[:HAS_BEEN_FUNDED]->(c:FUNDING_ROUND)<-[:HAS_FUNDED]-(d:INVESTOR) | |
WITH b, c, d | |
MATCH c-[:HAS_ROUND_TYPE]->(e:ROUND_TYPE {name:'seed'}) | |
RETURN d.name as potential_investor, count(b) as count | |
ORDER BY count DESC | |
LIMIT 5 | |
//The investor with the most investments? | |
MATCH (a:INVESTOR)-[r:HAS_FUNDED]->(b:FUNDING_ROUND) | |
RETURN a.name as name, count(r) as count | |
ORDER BY count DESC | |
LIMIT 5 | |
//What companies should Sequoia Capital co-invest with next? | |
MATCH (investor)-[:HAS_FUNDED]->(a)<-[:HAS_FUNDED]-(coinvestor) | |
WHERE investor.name = 'Sequoia Capital' AND NOT investor=coinvestor | |
WITH investor, coinvestor | |
MATCH (coinvestor)-[:HAS_FUNDED]->(round:FUNDING_ROUND)<-[:HAS_BEEN_FUNDED]-(opportunity:COMPANY) | |
WHERE NOT (investor)-[:HAS_FUNDED]->(round) | |
RETURN opportunity.name, count(DISTINCT coinvestor) as score | |
ORDER BY score DESC | |
LIMIT 5 | |
//What SF-based startup should Sequoia Capital invest in next? | |
MATCH (investor)-[:HAS_FUNDED]->(a)<-[:HAS_FUNDED]-(coinvestor) | |
WHERE investor.name = 'Sequoia Capital' AND NOT investor=coinvestor | |
WITH investor, coinvestor | |
MATCH (coinvestor)-[:HAS_FUNDED]->(round:FUNDING_ROUND)<-[:HAS_BEEN_FUNDED]-(opportunity:COMPANY) | |
WHERE NOT (investor)-[:HAS_FUNDED]->(round) AND (opportunity)-[:HAS_CITY]->(:CITY {name: 'San Francisco'}) | |
RETURN opportunity.name, count(DISTINCT round) as score | |
ORDER BY score DESC | |
LIMIT 5 | |
//Create a graph of co-investisors | |
//Create a graph of co-investisors | |
create constraint on (a:INVESTOR) assert a.permalink is unique; | |
create constraint on (i:FUNDING_ROUND) assert i.permalink is unique; | |
USING PERIODIC COMMIT 2000 | |
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line | |
FIELDTERMINATOR ';' | |
MERGE (a:INVESTOR { permalink: line.investor_permalink }) | |
ON CREATE SET a.name = line.investor_name, | |
a.category = line.investor_category_list, | |
a.market = line.investor_market, | |
a.country = line.investor_country_code, | |
a.state = line.investor_state_code, | |
a.region = line.investor_region, | |
a.city = line.investor_city, | |
a.logo = 'http://www.crunchbase.com' + line.permalink + '/primary-image/raw', | |
a.url = 'http://www.crunchbase.com' + line.investor_permalink; | |
USING PERIODIC COMMIT 1000 | |
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line | |
FIELDTERMINATOR ';' | |
MERGE (h:FUNDING_ROUND {permalink: line.funding_round_permalink}) | |
ON CREATE SET h.funded_at = line.funded_at, h.funded_month = line.funded_month, h.funded_quarter = line.funded_quarter, h.funded_year = toInt(line.funded_year), h.raised_amount_usd = toInt(line.raised_amount_usd), h. funding_round_code = line.funding_round_code, h.funding_round_type = line.funding_round_type, h.url = 'http://www.crunchbase.com' + line.permalink; | |
DROP CONSTRAINT ON (a:INVESTOR) ASSERT a.permalink IS UNIQUE; | |
CREATE INDEX ON :INVESTOR(permalink); | |
DROP CONSTRAINT ON (a:FUNDING_ROUND) ASSERT a.permalink IS UNIQUE; | |
CREATE INDEX ON :FUNDING_ROUND(permalink); | |
USING PERIODIC COMMIT 1000 | |
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line | |
FIELDTERMINATOR ';' | |
MATCH (a:INVESTOR { permalink: line.investor_permalink}),(b:FUNDING_ROUND { permalink: line.funding_round_permalink}) | |
CREATE (a)-[:HAS_FUNDED]->(b); | |
MATCH (investor)-[:HAS_FUNDED]->(a)<-[:HAS_FUNDED]-(coinvestor) | |
CREATE (investor)-[:KNOWS]->(coinvestor); | |
MATCH (a:FUNDING_ROUND)<-[r]-() | |
DELETE a, r; | |
//Degree centrality for the Investor's graph | |
match n-[r:KNOWS]->m | |
return n.name as Investor, count(r) as DegreeScore | |
order by DegreeScore desc | |
limit 5 | |
//Closeness centrality for the Investor's graph | |
MATCH (a), (b) WHERE a<>b WITH length(shortestPath((a)-[:KNOWS]-(b))) AS dist, a, b | |
RETURN DISTINCT sum(1.0/dist) AS close_central, a.name | |
ORDER BY close_central DESC | |
//Betweeness centrality for the Investor's graph | |
MATCH p=allShortestPaths((source:INVESTOR)-[*]-(target:INVESTOR)) | |
WHERE id(source) < id(target) and length(p) > 1 | |
UNWIND nodes(p)[1..-1] as n | |
RETURN n.name as Investor, count(*) as betweenness | |
limit 5 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment