Created
July 16, 2015 14:11
-
-
Save Leward/c93f6e3265a23119934f to your computer and use it in GitHub Desktop.
Vos scripts sous stéroïdes avec Groovy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Load Jsoup used the get html pages and query them | |
@Grab("org.jsoup:jsoup:1.8.2") | |
import org.jsoup.Jsoup | |
// Load Neo4j JDBC to load data and query a Neo4j database | |
@GrabResolver(name='neo4j-public', root='http://m2.neo4j.org/content/groups/public') | |
@Grab("org.neo4j:neo4j-jdbc:2.1.4") | |
import org.neo4j.jdbc.Driver | |
import groovy.sql.Sql | |
// This script regulary get the articles on the home of slate.com website | |
// The titles are then persisted into a database | |
// Get the articles titles on the Slate home page | |
println "Reading articles on Slate" | |
def document = Jsoup.connect("http://slate.com/").get(); | |
def h1Elements = document.select("article h1") | |
def titles = [] | |
h1Elements.each { titles.add(it.text()) } | |
println "Found ${titles.size()} articles" | |
// Put the article titles into a database | |
def sql = Sql.newInstance('jdbc:neo4j://localhost:7474/') | |
titles.each { | |
def cypherQuery = ''' | |
MERGE (w:`Website` {name: {1}}) | |
MERGE (d:`Day` {date: {2}}) | |
CREATE | |
(a:`Article` {3}), | |
(a)-[:`PUBLISHED_ON`]->(w), | |
(a)-[:`PUBLISHED_AT`]->(d) | |
''' | |
sql.execute(cypherQuery, [ | |
"slate.com", | |
new Date().format("YYYY/MM/dd"), | |
[title:it] | |
]) | |
println "Inserted: ${it}" | |
} | |
println "Script completed. " |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment