Created
July 15, 2018 23:26
-
-
Save NicMcPhee/72899cc7c7ccbf37b33c1d7e2cf98318 to your computer and use it in GitHub Desktop.
GECCO tutorial graph database demo code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Clear the DB for a clean start | |
MATCH (n) DETACH DELETE n; | |
CREATE CONSTRAINT ON (i:Individual) ASSERT i.uuid IS UNIQUE; | |
CREATE CONSTRAINT ON (e:Errors) ASSERT e.Errors_vector IS UNIQUE; | |
CREATE INDEX ON :Individual(generation); | |
CREATE INDEX ON :Errors(total_error); | |
USING PERIODIC COMMIT | |
LOAD CSV WITH HEADERS FROM | |
'http://facultypages.morris.umn.edu/~mcphee/Research/GECCO2016_tutorial/push_regression_run_2.csv' AS line | |
WITH line, | |
[toInteger(line.TC0), toInteger(line.TC1), toInteger(line.TC2), toInteger(line.TC3), toInteger(line.TC4), toInteger(line.TC5), toInteger(line.TC6), toInteger(line.TC7), toInteger(line.TC8), toInteger(line.TC9)] | |
AS errors_vector | |
CREATE (individual:Individual {uuid: line.uuid}) | |
SET individual.generation = toInteger(line.generation), | |
individual.location = toInteger(line.location), | |
individual.plush_genome_size = toInteger(line.`plush-genome-size`), | |
individual.push_program_size = toInteger(line.`push-program-size`), | |
individual.plush_genome = line.`plush-genome` | |
MERGE (errors:Errors {errors_vector: errors_vector, total_error: toInteger(line.`total-error`)}) | |
CREATE (individual)-[r:HAS]->(errors) | |
; | |
USING PERIODIC COMMIT | |
LOAD CSV WITH HEADERS FROM | |
'http://facultypages.morris.umn.edu/~mcphee/Research/GECCO2016_tutorial/push_regression_run_2.csv' AS line | |
WITH line, SPLIT(line.`parent-uuids`, ' ') AS parent_uuids | |
MATCH (child:Individual {uuid: line.uuid}) | |
UNWIND parent_uuids as parent_uuid | |
MATCH (parent:Individual {uuid: parent_uuid}) | |
CREATE (parent)-[r:PARENT_OF]->(child) | |
SET r.genetic_operator = line.`genetic-operators` | |
; | |
// Set all the individuals to have 0 selections by default | |
MATCH (n:Individual) | |
SET n += {num_selections: 0} | |
; | |
// Update num_selections for individuals with more than zero selections | |
MATCH (parent:Individual)-[e:PARENT_OF]->(child:Individual) | |
WITH parent, count(e) as num_selections | |
SET parent.num_selections = num_selections | |
; | |
//////////////////////////// | |
// Done setting up the DB // | |
//////////////////////////// | |
// Now some queries! // | |
//////////////////////////// | |
// Open with the five generation graph nicely laid out. | |
// Then find out how many nodes and edges there are. | |
// Then show the Schema diagram | |
// Were there winners? | |
MATCH (winner:Individual)-[:HAS]->(errors:Errors {total_error: 0}) | |
RETURN DISTINCT winner | |
; | |
// Click open a few nodes to show how we can explore in the GUI | |
// What do the last two generations look like? | |
MATCH (winner:Individual)-[:HAS]->(:Errors {total_error: 0}) | |
MATCH (parent:Individual)-[:PARENT_OF]->(winner) | |
RETURN DISTINCT winner, parent | |
; | |
// What do the errors look like in the last five generations? | |
MATCH (winner:Individual)-[:HAS]->(:Errors {total_error: 0}) | |
MATCH (ancestor:Individual)-[:PARENT_OF*0..4]->(winner) | |
MATCH (ancestor:Individual)-[:HAS]->(errors:Errors) | |
RETURN DISTINCT ancestor, errors | |
; | |
// How many distinct ancestors were there in the first generation? | |
MATCH (winner:Individual)-[:HAS]->(:Errors {total_error: 0}) | |
MATCH (winner)<-[:PARENT_OF*39]-(ancestor:Individual) | |
WITH DISTINCT ancestor | |
MATCH (ancestor)-[:HAS]->(errors:Errors) | |
RETURN DISTINCT ancestor.uuid, ancestor.num_selections, errors | |
ORDER BY ancestor.num_selections DESC | |
; | |
// How many selections were there in the first generation? | |
MATCH (n:Individual {generation: 0}) | |
RETURN SUM(n.num_selections) | |
; | |
// What was the average number of selections? | |
// Ignore the last generation since no selections where made there | |
MATCH (n:Individual) | |
WHERE n.generation < 39 | |
RETURN AVG(n.num_selections) | |
; | |
// How many 10% hyperselections were there? | |
// Use 14 as the cutoff | |
MATCH (n:Individual) | |
WHERE n.num_selections > 14 | |
MATCH (n)-[:HAS]->(errors:Errors) | |
RETURN n.uuid, n.num_selections, n.generation, errors | |
ORDER BY n.num_selections DESC | |
; | |
// How many 10% *semantic* hyperselections were there | |
// in a single generation? I.e., how often in a single | |
// generation were more than 10% of the selections (14 here) | |
// all from individuals with the same error vector. | |
MATCH (errors:Errors) | |
MATCH (n:Individual)-[:HAS]->(errors) | |
WITH errors, n.generation AS gen, COUNT(DISTINCT n) as num_individuals, SUM(n.num_selections) AS semantic_selections | |
WHERE semantic_selections > 14 | |
RETURN gen, semantic_selections, num_individuals, errors | |
ORDER BY semantic_selections DESC, num_individuals DESC, gen ASC | |
; | |
MATCH (errors:Errors) | |
MATCH (n:Individual)-[:HAS]->(errors) | |
WITH errors, n.generation AS gen, COUNT(DISTINCT n) as num_individuals, SUM(n.num_selections) AS semantic_selections | |
WHERE semantic_selections > 14 AND num_individuals >= 10 | |
RETURN gen, semantic_selections, num_individuals, errors | |
ORDER BY gen ASC, semantic_selections DESC, num_individuals DESC | |
; | |
// How often is there no change in errors from parent to child? | |
MATCH (parent:Individual)-[:PARENT_OF]->(child:Individual) | |
MATCH (parent)-[:HAS]->(e:Errors) | |
MATCH (child)-[:HAS]->(e) | |
RETURN COUNT(DISTINCT [parent.uuid, child.uuid]) | |
; | |
/////////////////////////////////////////////////// | |
// How many distinct errors were there? | |
MATCH (errors:Errors) return count(*) | |
; | |
// What errors had total error < 100? | |
MATCH (errors:Errors) WHERE errors.total_error < 100 | |
RETURN errors | |
ORDER BY errors.total_error | |
; | |
// How many individual had those errors & when were they introduced? | |
MATCH (errors:Errors) WHERE errors.total_error < 100 | |
MATCH (n:Individual)-[:HAS]->(errors:Errors) | |
RETURN errors, MIN(n.generation), count(n) | |
ORDER BY errors.total_error | |
; | |
// What's the maximum number of selections in each generation? | |
UNWIND RANGE(0, 38) AS gen | |
MATCH (n:Individual {generation: gen}) | |
WITH gen, MAX(n.num_selections) AS max_selections | |
MATCH (most_selected:Individual {generation: gen, num_selections: max_selections}) | |
MATCH (most_selected:Individual)-[:HAS]->(errors:Errors) | |
RETURN gen, max_selections, errors | |
ORDER BY gen; | |
// How often do things get worse before (immediately) getting better? | |
MATCH (grandparent:Individual)-[:PARENT_OF]->(parent:Individual) | |
MATCH (grandparent:Individual)-[:HAS]->(gpe:Errors) | |
MATCH (parent:Individual)-[:HAS]->(pe:Errors) | |
WHERE gpe.total_error < pe.total_error | |
MATCH (parent:Individual)-[:PARENT_OF]->(child:Individual) | |
MATCH (child:Individual)-[:HAS]->(ce:Errors) | |
WHERE gpe.total_error > ce.total_error | |
RETURN DISTINCT grandparent.generation, gpe.total_error, pe.total_error, ce.total_error | |
ORDER BY grandparent.generation; | |
// Find the errors that eventually led to a success | |
MATCH (w:Individual)-[:HAS]-(:Errors {total_error: 0}) | |
MATCH (n:Individual)-[:PARENT_OF*0..40]->(w:Individual) | |
MATCH (e:Errors)<-[:HAS]-(n:Individual) | |
RETURN DISTINCT n.generation, e | |
ORDER BY n.generation; | |
// Count how many times each errors appeared in a winner ancestry | |
MATCH (w:Individual)-[:HAS]-(:Errors {total_error: 0}) | |
MATCH (n:Individual)-[:PARENT_OF*0..40]->(w:Individual) | |
MATCH (e:Errors)<-[:HAS]-(n:Individual) | |
RETURN DISTINCT e, COUNT(DISTINCT n) | |
ORDER BY COUNT(DISTINCT n) DESC; | |
// How many distinct ancestors did the winner(s) have? | |
MATCH (w:Individual)-[:HAS]-(:Errors {total_error: 0}) | |
MATCH (n:Individual)-[:PARENT_OF*0..40]->(w:Individual) | |
RETURN COUNT(DISTINCT n); | |
// Create LEADS_TO edges between parent and child | |
// semantics | |
MATCH (p:Individual)-[:PARENT_OF]->(c:Individual) | |
MATCH (p)-[:HAS]-(pe:Errors) | |
MATCH (c)-[:HAS]-(ce:Errors) | |
MERGE (pe)-[:LEADS_TO]->(ce); | |
match (e:Errors) set e.num_leads_to=0; | |
match (e:Errors)-[:LEADS_TO]->(ce:Errors) | |
with e, count(distinct ce) as num_children | |
set e.num_leads_to = num_children; | |
MATCH (pe:Errors)-[:LEADS_TO]->(e:Errors) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment