Last active
August 2, 2017 03:09
-
-
Save NicMcPhee/9acc7e841130be22c6cc2254d8bf964e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Clear the DB for a clean start | |
MATCH (n) DETACH DELETE n; | |
CREATE CONSTRAINT ON (i:Individual) ASSERT i.uuid IS UNIQUE; | |
CREATE CONSTRAINT ON (e:Errors) ASSERT e.Errors_vector IS UNIQUE; | |
CREATE INDEX ON :Individual(generation); | |
CREATE INDEX ON :Errors(total_error); | |
USING PERIODIC COMMIT | |
LOAD CSV WITH HEADERS FROM | |
'http://facultypages.morris.umn.edu/~mcphee/Research/GECCO2016_tutorial/push_regression_run_2.csv' AS line | |
WITH line, | |
[TOINT(line.TC0), TOINT(line.TC1), TOINT(line.TC2), TOINT(line.TC3), TOINT(line.TC4), TOINT(line.TC5), TOINT(line.TC6), TOINT(line.TC7), TOINT(line.TC8), TOINT(line.TC9)] | |
AS errors_vector | |
CREATE (individual:Individual {uuid: line.uuid}) | |
SET individual.generation = TOINT(line.generation), | |
individual.location = TOINT(line.location), | |
individual.plush_genome_size = TOINT(line.`plush-genome-size`), | |
individual.push_program_size = TOINT(line.`push-program-size`), | |
individual.plush_genome = line.`plush-genome` | |
MERGE (errors:Errors {errors_vector: errors_vector, total_error: TOINT(line.`total-error`)}) | |
CREATE (individual)-[r:HAS]->(errors) | |
; | |
USING PERIODIC COMMIT | |
LOAD CSV WITH HEADERS FROM | |
'http://facultypages.morris.umn.edu/~mcphee/Research/GECCO2016_tutorial/push_regression_run_2.csv' AS line | |
WITH line, SPLIT(line.`parent-uuids`, ' ') AS parent_uuids | |
MATCH (child:Individual {uuid: line.uuid}) | |
UNWIND parent_uuids as parent_uuid | |
MATCH (parent:Individual {uuid: parent_uuid}) | |
CREATE (parent)-[r:PARENT_OF]->(child) | |
SET r.genetic_operator = line.`genetic-operators` | |
; | |
// Set all the individuals to have 0 selections by default | |
MATCH (n:Individual) | |
SET n += {num_selections: 0} | |
; | |
// Update num_selections for individuals with more than zero selections | |
MATCH (parent:Individual)-[e:PARENT_OF]->(child:Individual) | |
WITH parent, count(e) as num_selections | |
SET parent.num_selections = num_selections | |
; | |
//////////////////////////// | |
// Done setting up the DB // | |
//////////////////////////// | |
// Now some queries! // | |
//////////////////////////// | |
// Open with the five generation graph nicely laid out. | |
// Then find out how many nodes and edges there are. | |
// Then show the Schema diagram | |
// Were there winners? | |
MATCH (winner:Individual)-[:HAS]->(errors:Errors {total_error: 0}) | |
RETURN DISTINCT winner | |
; | |
// Click open a few nodes to show how we can explore in the GUI | |
// What do the last two generations look like? | |
MATCH (winner:Individual)-[:HAS]->(:Errors {total_error: 0}) | |
MATCH (parent:Individual)-[:PARENT_OF]->(winner) | |
RETURN DISTINCT winner, parent | |
; | |
// What do the errors look like in the last five generations? | |
MATCH (winner:Individual)-[:HAS]->(:Errors {total_error: 0}) | |
MATCH (ancestor:Individual)-[:PARENT_OF*0..4]->(winner) | |
MATCH (ancestor:Individual)-[:HAS]->(errors:Errors) | |
RETURN DISTINCT ancestor, errors | |
; | |
// How many distinct ancestors were there in the first generation? | |
MATCH (winner:Individual)-[:HAS]->(:Errors {total_error: 0}) | |
MATCH (winner)<-[:PARENT_OF*39]-(ancestor:Individual) | |
WITH DISTINCT ancestor | |
MATCH (ancestor)-[:HAS]->(errors:Errors) | |
RETURN DISTINCT ancestor.uuid, ancestor.num_selections, errors | |
ORDER BY ancestor.num_selections DESC | |
; | |
// How many selections were there in the first generation? | |
MATCH (n:Individual {generation: 0}) | |
RETURN SUM(n.num_selections) | |
; | |
// What was the average number of selections? | |
// Ignore the last generation since no selections where made there | |
MATCH (n:Individual) | |
WHERE n.generation < 39 | |
RETURN AVG(n.num_selections) | |
; | |
// How many 10% hyperselections were there? | |
// Use 14 as the cutoff | |
MATCH (n:Individual) | |
WHERE n.num_selections > 14 | |
MATCH (n)-[:HAS]->(errors:Errors) | |
RETURN n.uuid, n.num_selections, n.generation, errors | |
ORDER BY n.num_selections DESC | |
; | |
// How often is there no change in errors from parent to child? | |
MATCH (parent:Individual)-[:PARENT_OF]->(child:Individual) | |
MATCH (parent)-[:HAS]->(e:Errors) | |
MATCH (child)-[:HAS]->(e) | |
RETURN COUNT(DISTINCT [parent.uuid, child.uuid]) | |
; | |
/////////////////////////////////////////////////// | |
// How many distinct errors were there? | |
MATCH (errors:Errors) return count(*) | |
; | |
// What errors had total error < 100? | |
MATCH (errors:Errors) WHERE errors.total_error < 100 | |
RETURN errors | |
ORDER BY errors.total_error | |
; | |
// How many individual had those errors & when were they introduced? | |
MATCH (errors:Errors) WHERE errors.total_error < 100 | |
MATCH (n:Individual)-[:HAS]->(errors:Errors) | |
RETURN errors, MIN(n.generation), count(n) | |
ORDER BY errors.total_error | |
; | |
// What's the maximum number of selections in each generation? | |
UNWIND RANGE(0, 38) AS gen | |
MATCH (n:Individual {generation: gen}) | |
WITH gen, MAX(n.num_selections) AS max_selections | |
MATCH (most_selected:Individual {generation: gen, num_selections: max_selections}) | |
MATCH (most_selected:Individual)-[:HAS]->(errors:Errors) | |
RETURN gen, max_selections, errors | |
ORDER BY gen; | |
// How often do things get worse before (immediately) getting better? | |
MATCH (grandparent:Individual)-[:PARENT_OF]->(parent:Individual) | |
MATCH (grandparent:Individual)-[:HAS]->(gpe:Errors) | |
MATCH (parent:Individual)-[:HAS]->(pe:Errors) | |
WHERE gpe.total_error < pe.total_error | |
MATCH (parent:Individual)-[:PARENT_OF]->(child:Individual) | |
MATCH (child:Individual)-[:HAS]->(ce:Errors) | |
WHERE gpe.total_error > ce.total_error | |
RETURN DISTINCT grandparent.generation, gpe.total_error, pe.total_error, ce.total_error | |
ORDER BY grandparent.generation; | |
// Find the errors that eventually led to a success | |
MATCH (w:Individual)-[:HAS]-(:Errors {total_error: 0}) | |
MATCH (n:Individual)-[:PARENT_OF*0..40]->(w:Individual) | |
MATCH (e:Errors)<-[:HAS]-(n:Individual) | |
RETURN DISTINCT n.generation, e | |
ORDER BY n.generation; | |
// Count how many times each errors appeared in a winner ancestry | |
MATCH (w:Individual)-[:HAS]-(:Errors {total_error: 0}) | |
MATCH (n:Individual)-[:PARENT_OF*0..40]->(w:Individual) | |
MATCH (e:Errors)<-[:HAS]-(n:Individual) | |
RETURN DISTINCT e, COUNT(DISTINCT n) | |
ORDER BY COUNT(DISTINCT n) DESC; | |
// How many distinct ancestors did the winner(s) have? | |
MATCH (w:Individual)-[:HAS]-(:Errors {total_error: 0}) | |
MATCH (n:Individual)-[:PARENT_OF*0..40]->(w:Individual) | |
RETURN COUNT(DISTINCT n); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment