kbastani · August 14, 2013 19:06
diff --git a/neo4j-cypher-delete-id-list.txt b/neo4j-cypher-delete-id-list.txt
 // Delete duplicate nodes as a list collected from the output of neo4j-cypher-duplicate-get-node.txt

 START n=node(1120038,1120039,1120040,1120042,1120044,1120048,1120049,1120050,1120053,1120067,1120068)
 // Replace IDs above with the IDs from CommaSeparatedListOfIds in neo4j-duplicate-get-node.txt
 MATCH n-[r]-()
 DELETE r, n
diff --git a/neo4j-cypher-duplicate-get-node.txt b/neo4j-cypher-duplicate-get-node.txt
 // Collect IDs of indexed nodes with duplicated unique properties

 START n=node:invoices("PO_NUMBER:(\"112233\")") 
 WITH n
 ORDER BY id(n) DESC  // Order by descending to delete the most recent duplicated record
 WITH n.Key? as DuplicateKey, COUNT(n) as ColCount, COLLECT(id(n)) as ColNode
 WITH DuplicateKey, ColCount, ColNode, HEAD(ColNode) as DuplicateId
 WHERE ColCount > 1 AND (DuplicateKey is not null) AND (DuplicateId is not null)
 WITH DuplicateKey, ColCount, ColNode, DuplicateId 
 ORDER BY DuplicateId 
 RETURN DuplicateKey, ColCount, DuplicateId 
 //RETURN COLLECT(DuplicateId) as CommaSeparatedListOfIds

 // ** Toggle comments for the return statements above to validate duplicate records 
 // ** Do not proceed to delete without validating
	// Collect IDs of indexed nodes with duplicated unique properties

	START n=node:invoices("PO_NUMBER:(\"112233\")")
	WITH n
	ORDER BY id(n) DESC // Order by descending to delete the most recent duplicated record
	WITH n.Key? as DuplicateKey, COUNT(n) as ColCount, COLLECT(id(n)) as ColNode
	WITH DuplicateKey, ColCount, ColNode, HEAD(ColNode) as DuplicateId
	WHERE ColCount > 1 AND (DuplicateKey is not null) AND (DuplicateId is not null)
	WITH DuplicateKey, ColCount, ColNode, DuplicateId
	ORDER BY DuplicateId
	RETURN DuplicateKey, ColCount, DuplicateId
	//RETURN COLLECT(DuplicateId) as CommaSeparatedListOfIds

	// ** Toggle comments for the return statements above to validate duplicate records
	// ** Do not proceed to delete without validating