Last active
August 29, 2015 14:11
-
-
Save jvilledieu/e087e396c6e24c5b524c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Importing the file Companies.csv | |
//Creating the constraints | |
create constraint on (a:COMPANY) assert a.permalink is unique; | |
create constraint on (e:CITY) assert e.name is unique; | |
//Creating the companies & Cities | |
USING PERIODIC COMMIT 2000 | |
LOAD CSV WITH HEADERS FROM "file:c:/Companies.csv" AS line | |
FIELDTERMINATOR ';' | |
MERGE (e:CITY {name: line.city}); | |
USING PERIODIC COMMIT 2000 | |
LOAD CSV WITH HEADERS FROM "file:c:/Companies.csv" AS line | |
FIELDTERMINATOR ';' | |
CREATE (a:COMPANY { | |
name: line.name, | |
permalink: line.permalink, | |
homepage_url: line.homepage_url, | |
funding_total: toInt(line.funding_total_usd), | |
funding_rounds: toInt(line.funding_rounds), | |
founded_at: line.founded_at, | |
founded_month: line.founded_month, | |
founded_quarter: line.founded_quarter, | |
founded_year: toInt(line.founded_year), | |
first_funding_at: line.first_funding_at, | |
last_funding_at: line.last_funding_at, | |
category: line.category_list, | |
market: line.market, | |
status: line.status, | |
country: line.country_code, | |
state: line.state_code, | |
region: line.region, | |
city: line.city, | |
url: 'http://www.crunchbase.com' + line.permalink, | |
logo: 'http://www.crunchbase.com' + line.permalink + '/primary-image/raw' | |
}); | |
//Countries and Companies | |
MATCH (a:COMPANY) | |
MATCH (b:CITY { name: a.city }) | |
CREATE (a)-[:HAS_CITY]->(b); | |
//Creating an index on the companies | |
DROP CONSTRAINT ON (a:COMPANY) ASSERT a.permalink IS UNIQUE; | |
CREATE INDEX ON :COMPANY(permalink); | |
//Importing the file Investments.csv | |
//Creating the constraints | |
create constraint on (a:INVESTOR) assert a.permalink is unique; | |
//Creating the investors | |
USING PERIODIC COMMIT 1000 | |
LOAD CSV WITH HEADERS FROM "file:c:/Users/Jean/Downloads/Investments.csv" AS line | |
FIELDTERMINATOR ';' | |
MERGE (d:CITY {name: line.investor_city}) | |
USING PERIODIC COMMIT 2000 | |
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line | |
FIELDTERMINATOR ';' | |
MATCH (a:COMPANY { permalink: line.investor_permalink }) | |
SET a:INVESTOR; | |
USING PERIODIC COMMIT 2000 | |
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line | |
FIELDTERMINATOR ';' | |
MERGE (a:INVESTOR { permalink: line.investor_permalink }) | |
ON CREATE SET a.name = line.investor_name, | |
a.category = line.investor_category_list, | |
a.market = line.investor_market, | |
a.country = line.investor_country_code, | |
a.state = line.investor_state_code, | |
a.region = line.investor_region, | |
a.city = line.investor_city, | |
a.logo = 'http://www.crunchbase.com' + line.permalink + '/primary-image/raw', | |
a.url = 'http://www.crunchbase.com' + line.investor_permalink; | |
//Creating an index on the investors and funding rounds to speed up the import | |
DROP CONSTRAINT ON (a:INVESTOR) ASSERT a.permalink IS UNIQUE; | |
CREATE INDEX ON :INVESTOR(permalink); | |
DROP CONSTRAINT ON (a:CITY) ASSERT a.name IS UNIQUE; | |
CREATE INDEX ON :CITY(name); | |
//Investors and Countries | |
MATCH (a:INVESTOR) | |
MATCH (b:CITY { name: a.city}) | |
MERGE (a)-[:HAS_CITY]->(b); | |
//Investors & companies | |
USING PERIODIC COMMIT 2000 | |
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line | |
FIELDTERMINATOR ';' | |
MATCH (a:INVESTOR { permalink: line.investor_permalink }) | |
MATCH (b:COMPANY { permalink: line.company_permalink }) | |
CREATE (a)-[:INVESTED_IN { | |
funded_at: line.funded_at, | |
funded_month: line.funded_month, | |
funded_quarter: line.funded_quarter, | |
funded_year: toInt(line.funded_year), | |
raised_amount_usd: toInt(line.raised_amount_usd), | |
funding_round_code: line.funding_round_code, | |
funding_round_type: line.funding_round_type, | |
permalink: line.funding_round_permalink, | |
url: 'http://www.crunchbase.com' + line.permalink | |
}]->(b); | |
//Importing the file Acquisitions.csv | |
USING PERIODIC COMMIT 2000 | |
LOAD CSV WITH HEADERS FROM "file:c:/Acquisitions.csv" AS line | |
FIELDTERMINATOR ';' | |
MATCH (a:COMPANY { permalink: line.acquirer_permalink }) | |
MATCH (b:COMPANY { permalink: line.company_permalink }) | |
CREATE (a)-[:ACQUIRED { | |
acquisition_date: line.acquired_at, | |
month: line.acquired_month, | |
quarter: line.acquired_quarter, | |
year: line.acquired_year, | |
price_amount: toInt(line.price_amount), | |
currency: line.price_currency_code | |
}]->(b); | |
//Creating relationships between competitors | |
//MATCH (a:COMPANY), (b:COMPANY) | |
//WHERE a.market = b.market | |
//CREATE (a)-[:IS_COMPETITOR]->(b); | |
//Cleaning up | |
MATCH (n:COMPANY) | |
WHERE n.state = '' | |
REMOVE n.state; | |
MATCH (n:COMPANY) | |
WHERE n.homepage_url = '' | |
REMOVE n.homepage_url; | |
MATCH (n:COMPANY) | |
WHERE n.funding_total = '' | |
REMOVE n.funding_total; | |
MATCH (n:COMPANY) | |
WHERE n.funding_rounds = '' | |
REMOVE n.funding_rounds; | |
MATCH (n:COMPANY) | |
WHERE n.founded_at = '' | |
REMOVE n.founded_at; | |
MATCH (n:COMPANY) | |
WHERE n.founded_month = '' | |
REMOVE n.founded_month; | |
MATCH (n:COMPANY) | |
WHERE n.founded_quarter = '' | |
REMOVE n.founded_quarter; | |
MATCH (n:COMPANY) | |
WHERE n.founded_year = '' | |
REMOVE n.founded_year; | |
MATCH (n:COMPANY) | |
WHERE n.first_funding_at = '' | |
REMOVE n.first_funding_at; | |
MATCH (n:COMPANY) | |
WHERE n.last_funding_at = '' | |
REMOVE n.last_funding_at; | |
MATCH (n:COMPANY) | |
WHERE n.category = '' | |
REMOVE n.category; | |
MATCH (n:COMPANY) | |
WHERE n.market = '' | |
REMOVE n.market; | |
MATCH (n:COMPANY) | |
WHERE n.status = '' | |
REMOVE n.status; | |
MATCH (n:COMPANY) | |
WHERE n.country = '' | |
REMOVE n.country; | |
MATCH (n:COMPANY) | |
WHERE n.state = '' | |
REMOVE n.state; | |
MATCH (n:COMPANY) | |
WHERE n.region = '' | |
REMOVE n.region; | |
MATCH (n:COMPANY) | |
WHERE n.city = '' | |
REMOVE n.city; | |
MATCH (n:INVESTOR) | |
WHERE n.investor_category = '' | |
REMOVE n.investor_category; | |
MATCH (n:INVESTOR) | |
WHERE n.investor_market = '' | |
REMOVE n.investor_market; | |
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY) | |
WHERE n.funded_at = '' | |
REMOVE n.funded_at; | |
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY) | |
WHERE n.funded_month = '' | |
REMOVE n.funded_month; | |
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY) | |
WHERE n.funded_quarter = '' | |
REMOVE n.funded_quarter; | |
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY) | |
WHERE n.funded_year = '' | |
REMOVE n.funded_year; | |
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY) | |
WHERE n.raised_amount_usd = '' | |
REMOVE n.raised_amount_usd; | |
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY) | |
WHERE n.funding_round_code = '' | |
REMOVE n.funding_round_code; | |
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY) | |
WHERE n.funding_round_type = '' | |
REMOVE n.funding_round_type; | |
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY) | |
WHERE n.funded_month = '' | |
REMOVE n.funded_month; | |
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY) | |
WHERE n.funded_at = '' | |
REMOVE n.funded_at; | |
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY) | |
WHERE n.funded_at = '' | |
REMOVE n.funded_at; | |
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY) | |
WHERE n.funded_at = '' | |
REMOVE n.funded_at; | |
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY) | |
WHERE n.funded_at = '' | |
REMOVE n.funded_at; | |
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY) | |
WHERE n.acquisition_date = '' | |
REMOVE n.acquisition_date; | |
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY) | |
WHERE n.month = '' | |
REMOVE n.month; | |
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY) | |
WHERE n.quarter = '' | |
REMOVE n.quarter; | |
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY) | |
WHERE n.year = '' | |
REMOVE n.year; | |
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY) | |
WHERE n.price_amount = '' | |
REMOVE n.price_amount; | |
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY) | |
WHERE n.currency = '' | |
REMOVE n.currency; | |
MATCH (n:CITY {name :''})-[r]-() | |
DELETE n, r; |
Neat. Where can I get the Crunchbase data to try this and play with it? Thanks.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
fixed version https://gist.github.com/sheymann/676261d4265959e8d4b2