Skip to content

Instantly share code, notes, and snippets.

@jvilledieu
Last active August 29, 2015 14:08
Show Gist options
  • Save jvilledieu/bd7ee8e0df479f65d51a to your computer and use it in GitHub Desktop.
Save jvilledieu/bd7ee8e0df479f65d51a to your computer and use it in GitHub Desktop.
Cypher queries for Crunchbase. This is used to create analyse Crunch via Neo4j (see blog post: http://linkurio.us/the-crunchbase-graph-data-modelling/). The data remains the property of Crunchbase and its use is limited by the following license: https://info.crunchbase.com/docs/licensing-policy-v1/
//What is the biggest startup scene?
MATCH (a:CITY)<-[:HAS_CITY]-(b:COMPANY)
RETURN a.name as city, count(b) as score
ORDER BY score DESC
LIMIT 5
//What is the market positioning of startups in San Francisco
MATCH (a:CITY {name:'San Francisco'})<-[:HAS_CITY]-(b:COMPANY)
WITH b
MATCH b-[:HAS_MARKET]->(c:MARKET)
RETURN c.name as market, count(c) as score
ORDER BY score DESC
LIMIT 5
//The market positioning of French startups
MATCH (a:CITY {name:'Paris'})<-[:HAS_CITY]-(b:COMPANY)-[:HAS_MARKET]->(d:MARKET)
RETURN d.name, count(d) as score
ORDER BY score DESC
//VC-backed operating startups in Paris and their investors
MATCH (a:CITY {name:"Paris"})<-[:HAS_CITY]-(b:COMPANY)
WITH b
MATCH b-[:HAS_STATUS]->(a:STATUS {name:'operating'}), b-[:HAS_BEEN_FUNDED]->(d:FUNDING_ROUND)
WITH b, d
MATCH d<-[:HAS_FUNDED]-(investor:INVESTOR), d-[:HAS_ROUND_TYPE]->(e:ROUND_TYPE {name: 'seed'})
RETURN b, collect(investor), count(investor) as score
ORDER BY score DESC
LIMIT 5
//Investors who do seeds in SF
MATCH (a:CITY {name:'San Francisco'})<-[:HAS_CITY]-(b:COMPANY)-[:HAS_BEEN_FUNDED]->(c:FUNDING_ROUND)<-[:HAS_FUNDED]-(d:INVESTOR)
WITH b, c, d
MATCH c-[:HAS_ROUND_TYPE]->(e:ROUND_TYPE {name:'seed'})
RETURN d.name as potential_investor, count(c) as count
ORDER BY count DESC
LIMIT 5
//What do seed-stage startups do in France?
MATCH (a:CITY {name:'Paris'})<-[:HAS_CITY]-(b:COMPANY)-[:HAS_BEEN_FUNDED]->(c:FUNDING_ROUND)-[:HAS_ROUND_TYPE]->(e:ROUND_TYPE {name:'seed'})
WITH b,c
MATCH b-[:HAS_MARKET]->(f:MARKET)
RETURN f.name as market , count(c)as score
ORDER BY score DESC
LIMIT 10
//Investors who like early-stage Paris-based startups
MATCH (a:CITY {name:'Paris'})<-[:HAS_CITY]-(b:COMPANY)-[:HAS_BEEN_FUNDED]->(c:FUNDING_ROUND)<-[:HAS_FUNDED]-(d:INVESTOR)
WITH b, c, d
MATCH c-[:HAS_ROUND_TYPE]->(e:ROUND_TYPE {name:'seed'})
RETURN d.name as potential_investor, count(b) as count
ORDER BY count DESC
LIMIT 5
//The investor with the most investments?
MATCH (a:INVESTOR)-[r:HAS_FUNDED]->(b:FUNDING_ROUND)
RETURN a.name as name, count(r) as count
ORDER BY count DESC
LIMIT 5
//What companies should Sequoia Capital co-invest with next?
MATCH (investor)-[:HAS_FUNDED]->(a)<-[:HAS_FUNDED]-(coinvestor)
WHERE investor.name = 'Sequoia Capital' AND NOT investor=coinvestor
WITH investor, coinvestor
MATCH (coinvestor)-[:HAS_FUNDED]->(round:FUNDING_ROUND)<-[:HAS_BEEN_FUNDED]-(opportunity:COMPANY)
WHERE NOT (investor)-[:HAS_FUNDED]->(round)
RETURN opportunity.name, count(DISTINCT coinvestor) as score
ORDER BY score DESC
LIMIT 5
//What SF-based startup should Sequoia Capital invest in next?
MATCH (investor)-[:HAS_FUNDED]->(a)<-[:HAS_FUNDED]-(coinvestor)
WHERE investor.name = 'Sequoia Capital' AND NOT investor=coinvestor
WITH investor, coinvestor
MATCH (coinvestor)-[:HAS_FUNDED]->(round:FUNDING_ROUND)<-[:HAS_BEEN_FUNDED]-(opportunity:COMPANY)
WHERE NOT (investor)-[:HAS_FUNDED]->(round) AND (opportunity)-[:HAS_CITY]->(:CITY {name: 'San Francisco'})
RETURN opportunity.name, count(DISTINCT round) as score
ORDER BY score DESC
LIMIT 5
//Create a graph of co-investisors
//Create a graph of co-investisors
create constraint on (a:INVESTOR) assert a.permalink is unique;
create constraint on (i:FUNDING_ROUND) assert i.permalink is unique;
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line
FIELDTERMINATOR ';'
MERGE (a:INVESTOR { permalink: line.investor_permalink })
ON CREATE SET a.name = line.investor_name,
a.category = line.investor_category_list,
a.market = line.investor_market,
a.country = line.investor_country_code,
a.state = line.investor_state_code,
a.region = line.investor_region,
a.city = line.investor_city,
a.logo = 'http://www.crunchbase.com' + line.permalink + '/primary-image/raw',
a.url = 'http://www.crunchbase.com' + line.investor_permalink;
USING PERIODIC COMMIT 1000
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line
FIELDTERMINATOR ';'
MERGE (h:FUNDING_ROUND {permalink: line.funding_round_permalink})
ON CREATE SET h.funded_at = line.funded_at, h.funded_month = line.funded_month, h.funded_quarter = line.funded_quarter, h.funded_year = toInt(line.funded_year), h.raised_amount_usd = toInt(line.raised_amount_usd), h. funding_round_code = line.funding_round_code, h.funding_round_type = line.funding_round_type, h.url = 'http://www.crunchbase.com' + line.permalink;
DROP CONSTRAINT ON (a:INVESTOR) ASSERT a.permalink IS UNIQUE;
CREATE INDEX ON :INVESTOR(permalink);
DROP CONSTRAINT ON (a:FUNDING_ROUND) ASSERT a.permalink IS UNIQUE;
CREATE INDEX ON :FUNDING_ROUND(permalink);
USING PERIODIC COMMIT 1000
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line
FIELDTERMINATOR ';'
MATCH (a:INVESTOR { permalink: line.investor_permalink}),(b:FUNDING_ROUND { permalink: line.funding_round_permalink})
CREATE (a)-[:HAS_FUNDED]->(b);
MATCH (investor)-[:HAS_FUNDED]->(a)<-[:HAS_FUNDED]-(coinvestor)
CREATE (investor)-[:KNOWS]->(coinvestor);
MATCH (a:FUNDING_ROUND)<-[r]-()
DELETE a, r;
//Degree centrality for the Investor's graph
match n-[r:KNOWS]->m
return n.name as Investor, count(r) as DegreeScore
order by DegreeScore desc
limit 5
//Closeness centrality for the Investor's graph
MATCH (a), (b) WHERE a<>b WITH length(shortestPath((a)-[:KNOWS]-(b))) AS dist, a, b
RETURN DISTINCT sum(1.0/dist) AS close_central, a.name
ORDER BY close_central DESC
//Betweeness centrality for the Investor's graph
MATCH p=allShortestPaths((source:INVESTOR)-[*]-(target:INVESTOR))
WHERE id(source) < id(target) and length(p) > 1
UNWIND nodes(p)[1..-1] as n
RETURN n.name as Investor, count(*) as betweenness
limit 5
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment