rvanbruggen · January 17, 2016 19:44
diff --git a/importing_graphbloggraph.cql b/importing_graphbloggraph.cql
 //Bruggen blog feed
 //https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=0

 create index on :Blog(name);
 create constraint on (p:Page) assert p.url is unique;

 create (b:Blog {name:"Bruggen", url:"http://blog.bruggen.com"});
 create (n:Blog {name:"Neo4j Blog", url:"http://neo4j.com/blog"});
 create (n:Blog {name:"JEXP Blog", url:"http://jexp.de/blog/"});
 create (n:Blog {name:"Armbruster-IT Blog", url:"http://blog.armbruster-it.de/"});
 create (n:Blog {name:"Max De Marzi's Blog", url:"http://maxdemarzi.com/"});
 create (n:Blog {name:"Will Lyon's Blog", url:"http://lyonwj.com/"});
 create (n:Blog {name:"Mark Needham's Blog", url:"http://markhneedham.com/blog"});



 //create the Bruggen blog entries
 load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=0" as csv
 match (b:Blog {name:"Bruggen", url:"http://blog.bruggen.com"})
 create (p:Page {url: csv.URL, title: csv.Title, created: csv.Date})-[:PART_OF]->(b);

 //create the link graph
 load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=0" as csv
 with csv.URL as URL, csv.Links as row
 unwind row as listoflinks
 with URL, [l in split(listoflinks,"****") | trim(l)] as links
 unwind range(0,size(links)-2) as idx
 MERGE (l:Page {url:links[idx]})
 WITH l, URL
 MATCH (p:Page {url: URL})
 MERGE (p)-[:LINKS_TO]->(l);

 //remove the redundant links
 //linking to pages with same url (eg. archive pages, label pages...)
 match (b:Blog {name:"Bruggen"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
 where p2.url starts with "http://blog.bruggen.com"
 and not ((b)<-[:PART_OF]-(p2))
 detach delete p2;
 //linking to other posts of the same blog
 match (p1:Page)-[:PART_OF]->(b:Blog {name:"Bruggen"})<-[:PART_OF]-(p2:Page),
 (p1)-[lt:LINKS_TO]-(p2)
 delete lt;
 //linking to itself
 match (p1:Page)-[:PART_OF]->(b:Blog {name:"Bruggen"}),
 (p1)-[lt:LINKS_TO]-(p1)
 delete lt;
 //linking to the blog provider (Blogger)
 match (p:Page)
 where p.url contains "//www.blogger.com"
 detach delete p;


 //Neo4j blog feed
 //https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=999553051

 //create the blog entries
 load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=999553051" as csv
 merge (p:Page {url: csv.URL})
 on match set  p.title=csv.Title, p.created=csv.Date
 on create set  p.title=csv.Title, p.created=csv.Date
 with p
 match (b:Blog {name:"Neo4j Blog", url:"http://neo4j.com/blog"})
 create (p)-[:PART_OF]->(b);

 //create the link graph
 load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=999553051" as csv
 with csv.URL as URL, csv.Links as row
 unwind row as linklist
 with URL, [l in split(linklist,"****") | trim(l)] as links
 unwind range(0,size(links)-2) as idx
 MERGE (l:Page {url:links[idx]})
 WITH l, URL
 MATCH (p:Page {url: URL})
 CREATE (p)-[:LINKS_TO]->(l);

 //remove the redundant links
 match (b:Blog {name:"Neo4j Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
 where p2.url starts with "http://neo4j.com/blog"
 and not ((b)<-[:PART_OF]-(p2))
 detach delete p2;

 match (b:Blog {name:"Neo4j Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
 where p2.url contains "//neo4j.com/"
 and not ((b)<-[:PART_OF]-(p2))
 detach delete p2;

 match (p1:Page)-[:PART_OF]->(b:Blog {name:"Neo4j Blog"})<-[:PART_OF]-(p2:Page),
 (p1)-[lt:LINKS_TO]-(p2)
 delete lt;

 match (p1:Page)-[:PART_OF]->(b:Blog {name:"Neo4j Blog"}),
 (p1)-[lt:LINKS_TO]-(p1)
 delete lt;


 //JEXP blog feed
 //https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=2066848510

 //create the blog entries
 load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=2066848510" as csv
 merge (p:Page {url: csv.URL})
 on match set  p.title=csv.Title, p.created=csv.Date
 on create set  p.title=csv.Title, p.created=csv.Date
 with p
 match (b:Blog {name:"JEXP Blog", url:"http://jexp.de/blog/"})
 create (p)-[:PART_OF]->(b);

 //create the link graph
 load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=2066848510" as csv
 with csv.URL as URL, csv.Links as row
 unwind row as linklist
 with URL, [l in split(linklist,"****") | trim(l)] as links
 unwind range(0,size(links)-2) as idx
 MERGE (l:Page {url:links[idx]})
 WITH l, URL
 MATCH (p:Page {url: URL})
 CREATE (p)-[:LINKS_TO]->(l);

 //remove the redundant links
 match (b:Blog {name:"JEXP Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
 where p2.url starts with "http://jexp.de/blog"
 and not ((b)<-[:PART_OF]-(p2))
 detach delete p2;

 match (b:Blog {name:"JEXP Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
 where p2.url contains "//jexp.de/"
 and not ((b)<-[:PART_OF]-(p2))
 detach delete p2;

 match (p1:Page)-[:PART_OF]->(b:Blog {name:"JEXP Blog"})<-[:PART_OF]-(p2:Page),
 (p1)-[lt:LINKS_TO]-(p2)
 delete lt;

 match (p1:Page)-[:PART_OF]->(b:Blog {name:"JEXP Blog"}),
 (p1)-[lt:LINKS_TO]-(p1)
 delete lt;



 //Stefan Armbruster-IT https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1143698530

 //create the blog entries
 load csv with headers from "https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1143698530" as csv
 merge (p:Page {url: csv.URL})
 on match set  p.title=csv.Title, p.created=csv.Date
 on create set  p.title=csv.Title, p.created=csv.Date
 with p
 match (b:Blog {name:"Armbruster-IT Blog", url:"http://blog.armbruster-it.de/"})
 create (p)-[:PART_OF]->(b);

 //create the link graph
 load csv with headers from "https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1143698530" as csv
 with csv.URL as URL, csv.Links as row
 unwind row as linklist
 with URL, [l in split(linklist,"****") | trim(l)] as links
 unwind range(0,size(links)-2) as idx
 MERGE (l:Page {url:links[idx]})
 WITH l, URL
 MATCH (p:Page {url: URL})
 CREATE (p)-[:LINKS_TO]->(l);

 //remove the redundant links
 match (b:Blog {name:"Armbruster-IT Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
 where p2.url starts with "http://blog.armbruster-it.de"
 and not ((b)<-[:PART_OF]-(p2))
 detach delete p2;

 match (b:Blog {name:"Armbruster-IT Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
 where p2.url contains "//blog.armbruster-it.de/"
 and not ((b)<-[:PART_OF]-(p2))
 detach delete p2;

 match (p1:Page)-[:PART_OF]->(b:Blog {name:"Armbruster-IT Blog"})<-[:PART_OF]-(p2:Page),
 (p1)-[lt:LINKS_TO]-(p2)
 delete lt;

 match (p1:Page)-[:PART_OF]->(b:Blog {name:"Armbruster-IT Blog"}),
 (p1)-[lt:LINKS_TO]-(p1)
 delete lt;



 //Max De Marzi
 https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1473003649

 //create the blog entries
 load csv with headers from "https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1473003649" as csv
 merge (p:Page {url: csv.URL})
 on match set  p.title=csv.Title, p.created=csv.Date
 on create set  p.title=csv.Title, p.created=csv.Date
 with p
 match (b:Blog {name:"Max De Marzi's Blog", url:"http://maxdemarzi.com/"})
 create (p)-[:PART_OF]->(b);

 //create the link graph
 load csv with headers from "https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1473003649" as csv
 with csv.URL as URL, csv.Links as row
 unwind row as linklist
 with URL, [l in split(linklist,"****") | trim(l)] as links
 unwind range(0,size(links)-2) as idx
 MERGE (l:Page {url:links[idx]})
 WITH l, URL
 MATCH (p:Page {url: URL})
 CREATE (p)-[:LINKS_TO]->(l);

 // //remove the redundant links
 // match (b:Blog {name:"Max De Marzi's Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
 // where p2.url starts with "http://maxdemarzi.com"
 // and not ((b)<-[:PART_OF]-(p2))
 // detach delete p2;

 match (b:Blog {name:"Max De Marzi's Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
 where p2.url contains "//maxdemarzi.com/"
 and not ((b)<-[:PART_OF]-(p2))
 detach delete p2;

 match (p1:Page)-[:PART_OF]->(b:Blog {name:"Max De Marzi's Blog"})<-[:PART_OF]-(p2:Page),
 (p1)-[lt:LINKS_TO]-(p2)
 delete lt;

 match (p1:Page)-[:PART_OF]->(b:Blog {name:"Max De Marzi's Blog"}),
 (p1)-[lt:LINKS_TO]-(p1)
 delete lt;




 //Will Lyon
 https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1326206509

 //create the blog entries
 load csv with headers from "https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1326206509" as csv
 merge (p:Page {url: csv.URL})
 on match set  p.title=csv.Title, p.created="Unknown"
 on create set  p.title=csv.Title, p.created="Unknown"
 with p
 match (b:Blog {name:"Will Lyon's Blog", url:"http://lyonwj.com/"})
 create (p)-[:PART_OF]->(b);

 //create the link graph
 load csv with headers from "https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1326206509" as csv
 with csv.URL as URL, csv.Links as row
 unwind row as linklist
 with URL, [l in split(linklist,"****") | trim(l)] as links
 unwind range(0,size(links)-2) as idx
 MERGE (l:Page {url:links[idx]})
 WITH l, URL
 MATCH (p:Page {url: URL})
 CREATE (p)-[:LINKS_TO]->(l);

 // //remove the redundant links

 match (b:Blog {name:"Will Lyon's Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
 where p2.url contains "//lyonwj.com/"
 and not ((b)<-[:PART_OF]-(p2))
 detach delete p2;

 match (p1:Page)-[:PART_OF]->(b:Blog {name:"Will Lyon's Blog"})<-[:PART_OF]-(p2:Page),
 (p1)-[lt:LINKS_TO]-(p2)
 delete lt;

 match (p1:Page)-[:PART_OF]->(b:Blog {name:"Will Lyon's Blog"}),
 (p1)-[lt:LINKS_TO]-(p1)
 delete lt;

 //Mark Needham
 https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1346442478

 //create the blog entries
 load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1346442478" as csv
 merge (p:Page {url: csv.URL})
 on match set  p.title=csv.Title, p.created="Unknown"
 on create set  p.title=csv.Title, p.created="Unknown"
 with p
 match (b:Blog {name:"Mark Needham's Blog"})
 create (p)-[:PART_OF]->(b);

 //create the link graph
 load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1346442478" as csv
 with csv.URL as URL, csv.Links as row
 unwind row as linklist
 with URL, [l in split(linklist,"****") | trim(l)] as links
 unwind range(0,size(links)-2) as idx
 MERGE (l:Page {url:links[idx]})
 WITH l, URL
 MATCH (p:Page {url: URL})
 CREATE (p)-[:LINKS_TO]->(l);

 // //remove the redundant links
 match (b:Blog {name:"Mark Needham's Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
 where p2.url contains "//www.markhneedham.com/"
 and not ((b)<-[:PART_OF]-(p2))
 detach delete p2;

 match (p1:Page)-[:PART_OF]->(b:Blog {name:"Mark Needham's Blog"})<-[:PART_OF]-(p2:Page),
 (p1)-[lt:LINKS_TO]-(p2)
 delete lt;

 match (p1:Page)-[:PART_OF]->(b:Blog {name:"Mark Needham's Blog"}),
 (p1)-[lt:LINKS_TO]-(p1)
 delete lt;

 //clear out the relative and incomplete URLs
 match (p:Page)
 where p.url starts with "/"
 detach delete p;

 match (p:Page)
 where p.url starts with "#"
 detach delete p;

 match (p:Page)
 where p.url starts with "javascript"
 detach delete p;
diff --git a/query_graphbloggraph.cql b/query_graphbloggraph.cql
 //Query the Bloggraph

 //Which pages have the most links
 match (b:Blog)--(p:Page)-[r:LINKS_TO]->(p2:Page)
 return b.name, p.title, count(r)
 order by count(r) desc

 //Which pages are being linked to most
 match ()-[r:LINKS_TO]->(p:Page)
 return p.url, count(r)
 order by count(r) DESC
 limit 10;

 //links between blogposts
 MATCH p=((n1:Blog)--(p1:Page)-[:LINKS_TO]-(p2:Page)--(b2:Blog))
 RETURN p

 //show all the paths between blogs
 match (b1:Blog {name:"Bruggen"}), (b2:Blog {name:"Neo4j Blog"}),
 p1 = allshortestpaths((b1)-[*]-(b2))
 return p1 as paths
 union
 match (b1:Blog {name:"Bruggen"}),(b3:Blog {name:"JEXP Blog"}),
 p2 = allshortestpaths((b1)-[*]-(b3))
 return p2 as paths
 // union
 // match (b2:Blog {name:"Neo4j Blog"}),(b3:Blog {name:"JEXP Blog"}),
 // p3 = allshortestpaths((b2)-[*]-(b3))
 // return p3 as paths;

 //Pagerank ftw
 //1. calculate the PageRank
 UNWIND range(1,50) AS round
 MATCH (n:Page)
 WHERE rand() < 0.1
 MATCH (n:Page)-[:LINKS_TO*..10]->(m:Page)
 SET m.rank = coalesce(m.rank,0) + 1

 //2. Show the PageRank
 match (n:Page)
 where n.rank is not null
 return n.url, n.rank
 order by n.rank desc
 limit 10;
	//Bruggen blog feed
	//https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=0

	create index on :Blog(name);
	create constraint on (p:Page) assert p.url is unique;

	create (b:Blog {name:"Bruggen", url:"http://blog.bruggen.com"});
	create (n:Blog {name:"Neo4j Blog", url:"http://neo4j.com/blog"});
	create (n:Blog {name:"JEXP Blog", url:"http://jexp.de/blog/"});
	create (n:Blog {name:"Armbruster-IT Blog", url:"http://blog.armbruster-it.de/"});
	create (n:Blog {name:"Max De Marzi's Blog", url:"http://maxdemarzi.com/"});
	create (n:Blog {name:"Will Lyon's Blog", url:"http://lyonwj.com/"});
	create (n:Blog {name:"Mark Needham's Blog", url:"http://markhneedham.com/blog"});



	//create the Bruggen blog entries
	load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=0" as csv
	match (b:Blog {name:"Bruggen", url:"http://blog.bruggen.com"})
	create (p:Page {url: csv.URL, title: csv.Title, created: csv.Date})-[:PART_OF]->(b);

	//create the link graph
	load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=0" as csv
	with csv.URL as URL, csv.Links as row
	unwind row as listoflinks
	with URL, [l in split(listoflinks,"****") \| trim(l)] as links
	unwind range(0,size(links)-2) as idx
	MERGE (l:Page {url:links[idx]})
	WITH l, URL
	MATCH (p:Page {url: URL})
	MERGE (p)-[:LINKS_TO]->(l);

	//remove the redundant links
	//linking to pages with same url (eg. archive pages, label pages...)
	match (b:Blog {name:"Bruggen"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
	where p2.url starts with "http://blog.bruggen.com"
	and not ((b)<-[:PART_OF]-(p2))
	detach delete p2;
	//linking to other posts of the same blog
	match (p1:Page)-[:PART_OF]->(b:Blog {name:"Bruggen"})<-[:PART_OF]-(p2:Page),
	(p1)-[lt:LINKS_TO]-(p2)
	delete lt;
	//linking to itself
	match (p1:Page)-[:PART_OF]->(b:Blog {name:"Bruggen"}),
	(p1)-[lt:LINKS_TO]-(p1)
	delete lt;
	//linking to the blog provider (Blogger)
	match (p:Page)
	where p.url contains "//www.blogger.com"
	detach delete p;


	//Neo4j blog feed
	//https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=999553051

	//create the blog entries
	load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=999553051" as csv
	merge (p:Page {url: csv.URL})
	on match set p.title=csv.Title, p.created=csv.Date
	on create set p.title=csv.Title, p.created=csv.Date
	with p
	match (b:Blog {name:"Neo4j Blog", url:"http://neo4j.com/blog"})
	create (p)-[:PART_OF]->(b);

	//create the link graph
	load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=999553051" as csv
	with csv.URL as URL, csv.Links as row
	unwind row as linklist
	with URL, [l in split(linklist,"****") \| trim(l)] as links
	unwind range(0,size(links)-2) as idx
	MERGE (l:Page {url:links[idx]})
	WITH l, URL
	MATCH (p:Page {url: URL})
	CREATE (p)-[:LINKS_TO]->(l);

	//remove the redundant links
	match (b:Blog {name:"Neo4j Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
	where p2.url starts with "http://neo4j.com/blog"
	and not ((b)<-[:PART_OF]-(p2))
	detach delete p2;

	match (b:Blog {name:"Neo4j Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
	where p2.url contains "//neo4j.com/"
	and not ((b)<-[:PART_OF]-(p2))
	detach delete p2;

	match (p1:Page)-[:PART_OF]->(b:Blog {name:"Neo4j Blog"})<-[:PART_OF]-(p2:Page),
	(p1)-[lt:LINKS_TO]-(p2)
	delete lt;

	match (p1:Page)-[:PART_OF]->(b:Blog {name:"Neo4j Blog"}),
	(p1)-[lt:LINKS_TO]-(p1)
	delete lt;


	//JEXP blog feed
	//https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=2066848510

	//create the blog entries
	load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=2066848510" as csv
	merge (p:Page {url: csv.URL})
	on match set p.title=csv.Title, p.created=csv.Date
	on create set p.title=csv.Title, p.created=csv.Date
	with p
	match (b:Blog {name:"JEXP Blog", url:"http://jexp.de/blog/"})
	create (p)-[:PART_OF]->(b);

	//create the link graph
	load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=2066848510" as csv
	with csv.URL as URL, csv.Links as row
	unwind row as linklist
	with URL, [l in split(linklist,"****") \| trim(l)] as links
	unwind range(0,size(links)-2) as idx
	MERGE (l:Page {url:links[idx]})
	WITH l, URL
	MATCH (p:Page {url: URL})
	CREATE (p)-[:LINKS_TO]->(l);

	//remove the redundant links
	match (b:Blog {name:"JEXP Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
	where p2.url starts with "http://jexp.de/blog"
	and not ((b)<-[:PART_OF]-(p2))
	detach delete p2;

	match (b:Blog {name:"JEXP Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
	where p2.url contains "//jexp.de/"
	and not ((b)<-[:PART_OF]-(p2))
	detach delete p2;

	match (p1:Page)-[:PART_OF]->(b:Blog {name:"JEXP Blog"})<-[:PART_OF]-(p2:Page),
	(p1)-[lt:LINKS_TO]-(p2)
	delete lt;

	match (p1:Page)-[:PART_OF]->(b:Blog {name:"JEXP Blog"}),
	(p1)-[lt:LINKS_TO]-(p1)
	delete lt;



	//Stefan Armbruster-IT https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1143698530

	//create the blog entries
	load csv with headers from "https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1143698530" as csv
	merge (p:Page {url: csv.URL})
	on match set p.title=csv.Title, p.created=csv.Date
	on create set p.title=csv.Title, p.created=csv.Date
	with p
	match (b:Blog {name:"Armbruster-IT Blog", url:"http://blog.armbruster-it.de/"})
	create (p)-[:PART_OF]->(b);

	//create the link graph
	load csv with headers from "https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1143698530" as csv
	with csv.URL as URL, csv.Links as row
	unwind row as linklist
	with URL, [l in split(linklist,"****") \| trim(l)] as links
	unwind range(0,size(links)-2) as idx
	MERGE (l:Page {url:links[idx]})
	WITH l, URL
	MATCH (p:Page {url: URL})
	CREATE (p)-[:LINKS_TO]->(l);

	//remove the redundant links
	match (b:Blog {name:"Armbruster-IT Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
	where p2.url starts with "http://blog.armbruster-it.de"
	and not ((b)<-[:PART_OF]-(p2))
	detach delete p2;

	match (b:Blog {name:"Armbruster-IT Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
	where p2.url contains "//blog.armbruster-it.de/"
	and not ((b)<-[:PART_OF]-(p2))
	detach delete p2;

	match (p1:Page)-[:PART_OF]->(b:Blog {name:"Armbruster-IT Blog"})<-[:PART_OF]-(p2:Page),
	(p1)-[lt:LINKS_TO]-(p2)
	delete lt;

	match (p1:Page)-[:PART_OF]->(b:Blog {name:"Armbruster-IT Blog"}),
	(p1)-[lt:LINKS_TO]-(p1)
	delete lt;



	//Max De Marzi
	https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1473003649

	//create the blog entries
	load csv with headers from "https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1473003649" as csv
	merge (p:Page {url: csv.URL})
	on match set p.title=csv.Title, p.created=csv.Date
	on create set p.title=csv.Title, p.created=csv.Date
	with p
	match (b:Blog {name:"Max De Marzi's Blog", url:"http://maxdemarzi.com/"})
	create (p)-[:PART_OF]->(b);

	//create the link graph
	load csv with headers from "https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1473003649" as csv
	with csv.URL as URL, csv.Links as row
	unwind row as linklist
	with URL, [l in split(linklist,"****") \| trim(l)] as links
	unwind range(0,size(links)-2) as idx
	MERGE (l:Page {url:links[idx]})
	WITH l, URL
	MATCH (p:Page {url: URL})
	CREATE (p)-[:LINKS_TO]->(l);

	// //remove the redundant links
	// match (b:Blog {name:"Max De Marzi's Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
	// where p2.url starts with "http://maxdemarzi.com"
	// and not ((b)<-[:PART_OF]-(p2))
	// detach delete p2;

	match (b:Blog {name:"Max De Marzi's Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
	where p2.url contains "//maxdemarzi.com/"
	and not ((b)<-[:PART_OF]-(p2))
	detach delete p2;

	match (p1:Page)-[:PART_OF]->(b:Blog {name:"Max De Marzi's Blog"})<-[:PART_OF]-(p2:Page),
	(p1)-[lt:LINKS_TO]-(p2)
	delete lt;

	match (p1:Page)-[:PART_OF]->(b:Blog {name:"Max De Marzi's Blog"}),
	(p1)-[lt:LINKS_TO]-(p1)
	delete lt;




	//Will Lyon
	https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1326206509

	//create the blog entries
	load csv with headers from "https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1326206509" as csv
	merge (p:Page {url: csv.URL})
	on match set p.title=csv.Title, p.created="Unknown"
	on create set p.title=csv.Title, p.created="Unknown"
	with p
	match (b:Blog {name:"Will Lyon's Blog", url:"http://lyonwj.com/"})
	create (p)-[:PART_OF]->(b);

	//create the link graph
	load csv with headers from "https://docs.google.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1326206509" as csv
	with csv.URL as URL, csv.Links as row
	unwind row as linklist
	with URL, [l in split(linklist,"****") \| trim(l)] as links
	unwind range(0,size(links)-2) as idx
	MERGE (l:Page {url:links[idx]})
	WITH l, URL
	MATCH (p:Page {url: URL})
	CREATE (p)-[:LINKS_TO]->(l);

	// //remove the redundant links

	match (b:Blog {name:"Will Lyon's Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
	where p2.url contains "//lyonwj.com/"
	and not ((b)<-[:PART_OF]-(p2))
	detach delete p2;

	match (p1:Page)-[:PART_OF]->(b:Blog {name:"Will Lyon's Blog"})<-[:PART_OF]-(p2:Page),
	(p1)-[lt:LINKS_TO]-(p2)
	delete lt;

	match (p1:Page)-[:PART_OF]->(b:Blog {name:"Will Lyon's Blog"}),
	(p1)-[lt:LINKS_TO]-(p1)
	delete lt;

	//Mark Needham
	https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1346442478

	//create the blog entries
	load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1346442478" as csv
	merge (p:Page {url: csv.URL})
	on match set p.title=csv.Title, p.created="Unknown"
	on create set p.title=csv.Title, p.created="Unknown"
	with p
	match (b:Blog {name:"Mark Needham's Blog"})
	create (p)-[:PART_OF]->(b);

	//create the link graph
	load csv with headers from "https://docs.google.com/a/neotechnology.com/spreadsheets/d/1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w/export?format=csv&id=1LAQarqQ-id74-zxV6R4SdG7mCq_24xACXO5WNOP-2_w&gid=1346442478" as csv
	with csv.URL as URL, csv.Links as row
	unwind row as linklist
	with URL, [l in split(linklist,"****") \| trim(l)] as links
	unwind range(0,size(links)-2) as idx
	MERGE (l:Page {url:links[idx]})
	WITH l, URL
	MATCH (p:Page {url: URL})
	CREATE (p)-[:LINKS_TO]->(l);

	// //remove the redundant links
	match (b:Blog {name:"Mark Needham's Blog"})<-[:PART_OF]-(p1:Page)-[:LINKS_TO]->(p2:Page)
	where p2.url contains "//www.markhneedham.com/"
	and not ((b)<-[:PART_OF]-(p2))
	detach delete p2;

	match (p1:Page)-[:PART_OF]->(b:Blog {name:"Mark Needham's Blog"})<-[:PART_OF]-(p2:Page),
	(p1)-[lt:LINKS_TO]-(p2)
	delete lt;

	match (p1:Page)-[:PART_OF]->(b:Blog {name:"Mark Needham's Blog"}),
	(p1)-[lt:LINKS_TO]-(p1)
	delete lt;

	//clear out the relative and incomplete URLs
	match (p:Page)
	where p.url starts with "/"
	detach delete p;

	match (p:Page)
	where p.url starts with "#"
	detach delete p;

	match (p:Page)
	where p.url starts with "javascript"
	detach delete p;
	//Query the Bloggraph

	//Which pages have the most links
	match (b:Blog)--(p:Page)-[r:LINKS_TO]->(p2:Page)
	return b.name, p.title, count(r)
	order by count(r) desc

	//Which pages are being linked to most
	match ()-[r:LINKS_TO]->(p:Page)
	return p.url, count(r)
	order by count(r) DESC
	limit 10;

	//links between blogposts
	MATCH p=((n1:Blog)--(p1:Page)-[:LINKS_TO]-(p2:Page)--(b2:Blog))
	RETURN p

	//show all the paths between blogs
	match (b1:Blog {name:"Bruggen"}), (b2:Blog {name:"Neo4j Blog"}),
	p1 = allshortestpaths((b1)-[*]-(b2))
	return p1 as paths
	union
	match (b1:Blog {name:"Bruggen"}),(b3:Blog {name:"JEXP Blog"}),
	p2 = allshortestpaths((b1)-[*]-(b3))
	return p2 as paths
	// union
	// match (b2:Blog {name:"Neo4j Blog"}),(b3:Blog {name:"JEXP Blog"}),
	// p3 = allshortestpaths((b2)-[*]-(b3))
	// return p3 as paths;

	//Pagerank ftw
	//1. calculate the PageRank
	UNWIND range(1,50) AS round
	MATCH (n:Page)
	WHERE rand() < 0.1
	MATCH (n:Page)-[:LINKS_TO*..10]->(m:Page)
	SET m.rank = coalesce(m.rank,0) + 1

	//2. Show the PageRank
	match (n:Page)
	where n.rank is not null
	return n.url, n.rank
	order by n.rank desc
	limit 10;