Skip to content

Instantly share code, notes, and snippets.

@jvilledieu
Last active August 29, 2015 14:11
Show Gist options
  • Save jvilledieu/e087e396c6e24c5b524c to your computer and use it in GitHub Desktop.
Save jvilledieu/e087e396c6e24c5b524c to your computer and use it in GitHub Desktop.
//Importing the file Companies.csv
//Creating the constraints
create constraint on (a:COMPANY) assert a.permalink is unique;
create constraint on (e:CITY) assert e.name is unique;
//Creating the companies & Cities
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Companies.csv" AS line
FIELDTERMINATOR ';'
MERGE (e:CITY {name: line.city});
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Companies.csv" AS line
FIELDTERMINATOR ';'
CREATE (a:COMPANY {
name: line.name,
permalink: line.permalink,
homepage_url: line.homepage_url,
funding_total: toInt(line.funding_total_usd),
funding_rounds: toInt(line.funding_rounds),
founded_at: line.founded_at,
founded_month: line.founded_month,
founded_quarter: line.founded_quarter,
founded_year: toInt(line.founded_year),
first_funding_at: line.first_funding_at,
last_funding_at: line.last_funding_at,
category: line.category_list,
market: line.market,
status: line.status,
country: line.country_code,
state: line.state_code,
region: line.region,
city: line.city,
url: 'http://www.crunchbase.com' + line.permalink,
logo: 'http://www.crunchbase.com' + line.permalink + '/primary-image/raw'
});
//Countries and Companies
MATCH (a:COMPANY)
MATCH (b:CITY { name: a.city })
CREATE (a)-[:HAS_CITY]->(b);
//Creating an index on the companies
DROP CONSTRAINT ON (a:COMPANY) ASSERT a.permalink IS UNIQUE;
CREATE INDEX ON :COMPANY(permalink);
//Importing the file Investments.csv
//Creating the constraints
create constraint on (a:INVESTOR) assert a.permalink is unique;
//Creating the investors
USING PERIODIC COMMIT 1000
LOAD CSV WITH HEADERS FROM "file:c:/Users/Jean/Downloads/Investments.csv" AS line
FIELDTERMINATOR ';'
MERGE (d:CITY {name: line.investor_city})
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line
FIELDTERMINATOR ';'
MATCH (a:COMPANY { permalink: line.investor_permalink })
SET a:INVESTOR;
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line
FIELDTERMINATOR ';'
MERGE (a:INVESTOR { permalink: line.investor_permalink })
ON CREATE SET a.name = line.investor_name,
a.category = line.investor_category_list,
a.market = line.investor_market,
a.country = line.investor_country_code,
a.state = line.investor_state_code,
a.region = line.investor_region,
a.city = line.investor_city,
a.logo = 'http://www.crunchbase.com' + line.permalink + '/primary-image/raw',
a.url = 'http://www.crunchbase.com' + line.investor_permalink;
//Creating an index on the investors and funding rounds to speed up the import
DROP CONSTRAINT ON (a:INVESTOR) ASSERT a.permalink IS UNIQUE;
CREATE INDEX ON :INVESTOR(permalink);
DROP CONSTRAINT ON (a:CITY) ASSERT a.name IS UNIQUE;
CREATE INDEX ON :CITY(name);
//Investors and Countries
MATCH (a:INVESTOR)
MATCH (b:CITY { name: a.city})
MERGE (a)-[:HAS_CITY]->(b);
//Investors & companies
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Investments.csv" AS line
FIELDTERMINATOR ';'
MATCH (a:INVESTOR { permalink: line.investor_permalink })
MATCH (b:COMPANY { permalink: line.company_permalink })
CREATE (a)-[:INVESTED_IN {
funded_at: line.funded_at,
funded_month: line.funded_month,
funded_quarter: line.funded_quarter,
funded_year: toInt(line.funded_year),
raised_amount_usd: toInt(line.raised_amount_usd),
funding_round_code: line.funding_round_code,
funding_round_type: line.funding_round_type,
permalink: line.funding_round_permalink,
url: 'http://www.crunchbase.com' + line.permalink
}]->(b);
//Importing the file Acquisitions.csv
USING PERIODIC COMMIT 2000
LOAD CSV WITH HEADERS FROM "file:c:/Acquisitions.csv" AS line
FIELDTERMINATOR ';'
MATCH (a:COMPANY { permalink: line.acquirer_permalink })
MATCH (b:COMPANY { permalink: line.company_permalink })
CREATE (a)-[:ACQUIRED {
acquisition_date: line.acquired_at,
month: line.acquired_month,
quarter: line.acquired_quarter,
year: line.acquired_year,
price_amount: toInt(line.price_amount),
currency: line.price_currency_code
}]->(b);
//Creating relationships between competitors
//MATCH (a:COMPANY), (b:COMPANY)
//WHERE a.market = b.market
//CREATE (a)-[:IS_COMPETITOR]->(b);
//Cleaning up
MATCH (n:COMPANY)
WHERE n.state = ''
REMOVE n.state;
MATCH (n:COMPANY)
WHERE n.homepage_url = ''
REMOVE n.homepage_url;
MATCH (n:COMPANY)
WHERE n.funding_total = ''
REMOVE n.funding_total;
MATCH (n:COMPANY)
WHERE n.funding_rounds = ''
REMOVE n.funding_rounds;
MATCH (n:COMPANY)
WHERE n.founded_at = ''
REMOVE n.founded_at;
MATCH (n:COMPANY)
WHERE n.founded_month = ''
REMOVE n.founded_month;
MATCH (n:COMPANY)
WHERE n.founded_quarter = ''
REMOVE n.founded_quarter;
MATCH (n:COMPANY)
WHERE n.founded_year = ''
REMOVE n.founded_year;
MATCH (n:COMPANY)
WHERE n.first_funding_at = ''
REMOVE n.first_funding_at;
MATCH (n:COMPANY)
WHERE n.last_funding_at = ''
REMOVE n.last_funding_at;
MATCH (n:COMPANY)
WHERE n.category = ''
REMOVE n.category;
MATCH (n:COMPANY)
WHERE n.market = ''
REMOVE n.market;
MATCH (n:COMPANY)
WHERE n.status = ''
REMOVE n.status;
MATCH (n:COMPANY)
WHERE n.country = ''
REMOVE n.country;
MATCH (n:COMPANY)
WHERE n.state = ''
REMOVE n.state;
MATCH (n:COMPANY)
WHERE n.region = ''
REMOVE n.region;
MATCH (n:COMPANY)
WHERE n.city = ''
REMOVE n.city;
MATCH (n:INVESTOR)
WHERE n.investor_category = ''
REMOVE n.investor_category;
MATCH (n:INVESTOR)
WHERE n.investor_market = ''
REMOVE n.investor_market;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE n.funded_at = ''
REMOVE n.funded_at;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE n.funded_month = ''
REMOVE n.funded_month;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE n.funded_quarter = ''
REMOVE n.funded_quarter;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE n.funded_year = ''
REMOVE n.funded_year;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE n.raised_amount_usd = ''
REMOVE n.raised_amount_usd;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE n.funding_round_code = ''
REMOVE n.funding_round_code;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE n.funding_round_type = ''
REMOVE n.funding_round_type;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE n.funded_month = ''
REMOVE n.funded_month;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE n.funded_at = ''
REMOVE n.funded_at;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE n.funded_at = ''
REMOVE n.funded_at;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE n.funded_at = ''
REMOVE n.funded_at;
MATCH (n:INVESTOR)-[r:INVESTED_IN]->(b:COMPANY)
WHERE n.funded_at = ''
REMOVE n.funded_at;
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY)
WHERE n.acquisition_date = ''
REMOVE n.acquisition_date;
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY)
WHERE n.month = ''
REMOVE n.month;
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY)
WHERE n.quarter = ''
REMOVE n.quarter;
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY)
WHERE n.year = ''
REMOVE n.year;
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY)
WHERE n.price_amount = ''
REMOVE n.price_amount;
MATCH (n:COMPANY)-[r:ACQUIRED]->(b:COMPANY)
WHERE n.currency = ''
REMOVE n.currency;
MATCH (n:CITY {name :''})-[r]-()
DELETE n, r;
@sheymann
Copy link

sheymann commented Jan 6, 2015

@chrisco
Copy link

chrisco commented Mar 28, 2015

Neat. Where can I get the Crunchbase data to try this and play with it? Thanks.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment