Created
April 26, 2019 06:18
-
-
Save laiso/128ed617ec4c3b650a44e01065c7caed to your computer and use it in GitHub Desktop.
英会話学習用のテキストを自作したくてQuoraのクスレイピングを書いた。なぜScalaなのかは不明…… #CodePiece
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.{File, PrintWriter} | |
import org.jsoup.Jsoup | |
import scalaj.http.BaseHttp | |
object Main extends App { | |
def dumpToFile(text: String) = { | |
val writer = new PrintWriter(new File("dump.md")) | |
writer.write(text) | |
} | |
val url = "https://www.quora.com/How-is-your-life-after-moving-to-Thailand-as-a-foreigner" | |
val body = LoadFromQuora(url).asString.body | |
val html = Jsoup.parse(body) | |
val title = html.getElementsByTag("h1").first().text() | |
println(title) | |
println("===============") | |
println(url) | |
val answers = html.getElementsByClass("pagedlist_item") | |
answers.forEach(answer => { | |
val person = answer.getElementsByClass("feed_item_answer_user").first() | |
if (person != null) { | |
println("## " + person.text()) | |
} | |
answer.getElementsByClass("ui_qtext_expanded").forEach(element => { | |
element.getElementsByClass("ui_qtext_para").forEach(para => { | |
println(para.text()) | |
}) | |
}) | |
}) | |
} | |
object LoadFromQuora extends BaseHttp( | |
userAgent = "curl/7.54.0", | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment