This graph gist demonstrates use of Jaccard calculation to determine similarity between two sets of nodes
create (_2:`Specimen` {`name`:"s1"})
create (_3:`Specimen` {`name`:"s2"})
create (_4:`Specimen` {`name`:"s3"})
create (_5:`Specimen` {`name`:"s4"})
create (_6:`Person` {`name`:"nicky"})
create (_7:`Person` {`name`:"mark"})
create (_10:`Group` {`name`:"mark's group"})
create (_11:`Group` {`name`:"nicky's group"})
create _6-[:`HAS`]->_11
create _7-[:`HAS`]->_10
create _10-[:`CONTAINS`]->_2
create _10-[:`CONTAINS`]->_3
create _10-[:`CONTAINS`]->_4
create _10-[:`CONTAINS`]->_5
create _11-[:`CONTAINS`]->_4
create _11-[:`CONTAINS`]->_3
create _11-[:`CONTAINS`]->_2
Jaccard value is the size of intersection divided by the size of the union:
MATCH (p:Person{name:"nicky"})--(g:Group)-->(s:Specimen), (pOther:Person)-->(gOther:Group)-->(s)
WHERE p <> pOther AND g <> gOther
WITH p, g, gOther
MATCH (g)-->(s)<--(gOther)
WITH g, count(DISTINCT s) AS intersection, gOther
MATCH (s_all:Specimen) WHERE (s_all)<--(g) OR (s_all)<--(gOther)
RETURN g.name, toFloat(intersection)/toFloat(COUNT (DISTINCT s_all)) AS Jaccard, gOther.name