Skip to content

Instantly share code, notes, and snippets.

@michal-mally
Created March 3, 2017 18:11
Show Gist options
  • Save michal-mally/b567d44d96aabb1ec9ef21f76203734a to your computer and use it in GitHub Desktop.
Save michal-mally/b567d44d96aabb1ec9ef21f76203734a to your computer and use it in GitHub Desktop.
Downloads MotoGP calendar and produces gcalcli's commands to add all the events to Google Calendar.
import org.apache.commons.codec.digest.DigestUtils
import org.apache.commons.lang3.StringUtils
import org.htmlcleaner.HtmlCleaner
import org.htmlcleaner.TagNode
import java.time.ZonedDateTime
import java.util.concurrent.TimeUnit
String download(String url) {
def md5 = DigestUtils.md5Hex(url)
print "Downloading $url ($md5)... "
def file = new File(md5)
if(file.exists()) {
println "from cache"
file.text
} else {
println "OK"
file.text = new URL(url).text
}
}
long toMillis(String date) {
ZonedDateTime.parse(date[0..21] + ":" + date[22..-1]).toInstant().toEpochMilli()
}
def indexHtml = download("http://www.motogp.com/en/calendar/")
def events = indexHtml.findAll(~"http://www.motogp.com/en/event/[^\"]+").findAll { !it.contains('Test') }.unique()
def eventsHtml = events.collectEntries { [it, download(it)] }
def allEvents = eventsHtml.collectEntries { k, v ->
def cleaned = HtmlCleaner.newInstance().clean(v)
def days = cleaned
.getElementListByAttValue("class", "schedule_table", true, true)
def sessions = days*.getElementListByName("tr", true)
.flatten()
.collectMany { TagNode s ->
def sessionName = s.getElementListByName("td", true)*.text*.toString()*.trim().findAll { it.contains("Moto") || it.contains("Q") || it.contains("FP") || it.contains("WUP") || it.contains("RAC") }.join(" ")
if (!sessionName) {
return []
}
def start = s.findElementHavingAttribute("data-ini", true)?.getAttributeByName("data-ini")
def end = s.findElementHavingAttribute("data-end", true)?.getAttributeByName("data-end")
[[sessionName, start, end]]
}
[k, sessions.unique()]
}
// gcalcli --calendar 'TV' --title 'MotoGP RAC' --where 'Americas' --when '2016-04-10T14:00:00-0500' --duration 60 --description '(Americas) MotoGP RAC' --reminder 30 add
allEvents.each { venue, sessions ->
sessions.sort { it[1] }.each { session ->
def duration = session[2] ? TimeUnit.MILLISECONDS.toMinutes(toMillis(session[2]) - toMillis(session[1])) : 60
println "gcalcli --calendar 'TV' --title '${session[0]}' --where '${StringUtils.substringAfterLast(venue, "/").replaceAll("\\+", " ")}' --when '${session[1]}' --duration $duration --description '($venue) ${session[0]}' --reminder 30 add"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment