Created
September 23, 2014 16:09
-
-
Save michalbcz/0be8d667e1ea825de588 to your computer and use it in GitHub Desktop.
EN -> CZ translation using slovnik.seznam.cz
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* EN -> CZ translation using slovnik.seznam.cz */ | |
@Grapes([ | |
@Grab(group='org.apache.httpcomponents', module='httpclient', version='4.3.5'), | |
@Grab(group='org.jsoup', module='jsoup', version='1.7.3') | |
]) | |
import org.apache.http.impl.client.* | |
import org.apache.http.client.methods.* | |
import org.apache.http.util.EntityUtils | |
import org.jsoup.* | |
def chrome37UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36" | |
def http = HttpClients.custom().setUserAgent(chrome37UserAgent).build(); | |
def word = "congestion" // this is supposed to be replaced or get from cli param or something like that | |
def get = new HttpGet("http://slovnik.seznam.cz/?q=${word}&lang=en-cz&forceLang=1") | |
def response = http.execute(get) | |
def html = EntityUtils.toString(response.getEntity(), "UTF-8") | |
def doc = Jsoup.parse(html) | |
def translationElements = doc.select("#fastMeanings *") | |
def translations = [] | |
def buffer = "" | |
translationElements.each { element -> | |
//println "${element.tagName()} class=${element.className()} text: ${element.text()}" | |
if (element.tagName() == "a" || element.className() == "w") { | |
buffer += element.text() + " " | |
} | |
if (element.className() == "comma" || element.tagName() == "br") { | |
translations << buffer | |
buffer = "" | |
} | |
} | |
translations.eachWithIndex { translation, index -> println "${index+1}: ${translation}" } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment