CREATE INDEX ON :Match(id);
CREATE INDEX ON :WorldCup(name);
CREATE INDEX ON :Stadium(name);
CREATE INDEX ON :Phase(phase);
CREATE INDEX ON :Country(name);
CREATE INDEX ON :Time(time);
CREATE INDEX ON :MatchNumber(value);
USING PERIODIC COMMIT 1000
LOAD CSV WITH HEADERS FROM "https://dl.dropboxusercontent.com/u/7619809/matches.csv" AS csvLine
WITH csvLine, toInt(csvLine.match_number) AS matchNumber
WITH csvLine,
CASE WHEN csvLine.phase = ""
THEN
CASE WHEN matchNumber <= 48 THEN "Group matches"
WHEN matchNumber > 48 AND matchNumber <= 56 THEN "Round of 16"
WHEN matchNumber > 56 AND matchNumber <= 60 THEN "Quarter-finals"
WHEN matchNumber > 60 AND matchNumber <= 62 THEN "Semi-finals"
WHEN matchNumber = 63 THEN "Third place"
ELSE "Final"
END
ELSE
csvLine.phase
END AS phase, matchNumber
MERGE (match:Match {id: csvLine.id})
SET match.h_score = csvLine.h_score,
match.a_score = csvLine.a_score,
match.attendance = csvLine.attendance,
match.date = csvLine.date,
match.description = csvLine.home + " vs. " + csvLine.away
MERGE (host:Country {name: csvLine.host})
MERGE (home:Country {name: csvLine.home})
MERGE (match)-[:HOME_TEAM]->(home)
MERGE (away:Country {name: csvLine.away})
MERGE (match)-[:AWAY_TEAM]->(away)
MERGE (year:Year {year: toInt(csvLine.year)})
MERGE (worldCup:WorldCup {name: csvLine.world_cup})
MERGE (match)<-[:CONTAINS_MATCH]-(worldCup)
MERGE (host)<-[:HOSTED_BY]-(worldCup)
MERGE (year)<-[:IN_YEAR]-(worldCup)
MERGE (stadium:Stadium {name: csvLine.stadium})
MERGE (match)-[:PLAYED_IN_STADIUM]->(stadium)
MERGE (p:Phase {name: phase})
MERGE (match)-[:IN_PHASE]->(p)
MERGE (mn:MatchNumber {value: matchNumber})
MERGE (match)-[:HAS_MATCH_NUMBER]->(mn)
MERGE (time:Time {time: csvLine.time})
MERGE (match)-[:PLAYED_AT_TIME]->(time)
RETURN count(*) as matches;
The graph is too large to visualize nicely, that’s why here only a few matches from 2010 as example.
MATCH (match)<-[:CONTAINS_MATCH]-(worldCup)-[:HOSTED_BY]-(host),
(worldCup)-[:IN_YEAR]-(year:Year {year:2010})
RETURN *
LIMIT 10
MATCH (stadium:Stadium)<-[:PLAYED_IN_STADIUM]-()<-[:CONTAINS_MATCH]-(worldCup:WorldCup)-[:HOSTED_BY]-(host:Country),
(worldCup)-[:IN_YEAR]-(year:Year)
RETURN worldCup.name,year.year,host.name,collect(distinct stadium.name)
ORDER BY year.year ASC
MATCH (stadium:Stadium)<-[:PLAYED_IN_STADIUM]-()<-[:CONTAINS_MATCH]-(wc)-[:HOSTED_BY]-(host),
(wc)-[:IN_YEAR]-(year)
WITH stadium, host, COUNT(*) as count, COLLECT(DISTINCT year.year) AS years
UNWIND years as year
WITH stadium, host, count, year
ORDER BY stadium.name, host.name, year
RETURN stadium.name, host.name, COLLECT(year) AS years, count
ORDER BY count DESC
LIMIT 5
MATCH (host:Country)<-[:HOSTED_BY]-()-[:IN_YEAR]->(year)
WITH host, COUNT(*) AS times, COLLECT(year.year) AS years
UNWIND years AS year
WITH host, times, year
ORDER BY times DESC, year
RETURN host.name, times, COLLECT(year) AS years
ORDER BY times DESC