Created
October 12, 2017 03:30
-
-
Save Terryhung/261cf925a8bc6cbc855248afaa24f0ed to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// JAVA | |
public static void parsing() throws exception { | |
string url = "http://www.cna.com.tw/news/aloc/201710030325-1.aspx"; | |
document doc = jsoup.connect(url) | |
.header("user-agent", "mozilla/5.0 (linux; android 7.0; samsung sm-g950u build/nrd90m) applewebkit/537.36 (khtml, like gecko) samsungbrowser/5.2 chrome/51.0.2704.106 mobile safari/537.36") | |
.get(); | |
string target = "div.news_article"; | |
string remove = "script, button"; | |
string image_url = "http://img5.cna.com.tw/www/webphotos/800/20171003/22066085.jpg"; | |
string news_title = "苗栗縣鼓勵青年創業苗栗縣鼓勵青年創業苗栗縣鼓勵青年創業"; | |
elements divs = doc.select(target); | |
elements removed = doc.select(remove); | |
removed.remove(); | |
element div = divs.first().prependelement("div").attr("style", "position: relative; width:100%; height: 600px; overflow: hidden;"); | |
div.append(string.format("<img src=%s style='height:600px;width:%s;opacity: 0.7'>", image_url, "100%")); | |
div.appendelement("h1").attr("style", "position: absolute; top: 400px; left: 0px; width:100%; font-size:50px; word-break: break-all").text(news_title); | |
createhtml(arrays.aslist(divs.html())); | |
system.out.println( divs.html() ); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment