Created
December 20, 2013 13:32
-
-
Save hbprotoss/8054763 to your computer and use it in GitHub Desktop.
抓取58代码片段
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Document doc = Jsoups.getDocByUrl(url); | |
TPGoods goods = new TPGoods(); | |
goods.setId(Long.valueOf(url.substring(url.lastIndexOf("/") + 1, url.lastIndexOf("x.shtml")))); | |
goods.setRealUrl(url); | |
/* main */ | |
Element main = doc.getElementById("main").getElementsByTag("div").first(); | |
Element title = main.select("h1").first(); | |
goods.setTitle(title.html()); | |
Element time = main.select("li.time").first(); | |
goods.setTime(time.html()); | |
/* summary */ | |
Elements summary = main.select("ul.suUl").first().select("div.su_con"); | |
Element price = summary.get(0).select("span").first(); | |
goods.setPrice(price.html() + "万元"); | |
Element chengse = summary.get(1).select("span").first(); // 成色 | |
goods.setChengse(chengse == null ? "" : chengse.html()); | |
// Element area = summary.get(2).select("a").first(); | |
goods.setArea(""); | |
/* content */ | |
Element element = doc.getElementsByClass("des_con").first(); | |
Elements carInfo = element.getElementsByClass("item"); | |
StringBuilder carSb = new StringBuilder(); | |
for(int i = 0; i < carInfo.size(); i++) { | |
carSb.append(Htmls.simpleClean(carInfo.get(i).html())); | |
carSb.append("\n"); | |
} | |
String descriptionStr = carSb.toString(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Document doc = Jsoups.getDocByUrl(url); | |
TPGoods goods = new TPGoods(); | |
goods.setId(Long.valueOf(url.substring(url.lastIndexOf("/") + 1, url.lastIndexOf("x.shtml")))); | |
goods.setRealUrl(url); | |
/* main */ | |
Element main = doc.getElementById("main").getElementsByTag("div").first(); | |
Element title = main.select("h1").first(); | |
goods.setTitle(title.html()); | |
// Element time = main.select("li.time").first(); | |
SimpleDateFormat sdf=new SimpleDateFormat("yyyy-MM-dd"); | |
goods.setTime(sdf.format(new java.util.Date())); | |
/* summary */ | |
Elements summary = main.select("ul.suUl").first().select("div.su_con"); | |
Element price = summary.get(0).select("span").first(); | |
goods.setPrice(price.html() + "元/月"); | |
goods.setChengse(""); | |
Element area = summary.get(4).select("div.su_con").first(); | |
goods.setArea(area == null ? "" : Htmls.simpleClean(area.html()).replace("\n ( 地图▪街景 )", "")); | |
// 部分在summary中的房屋信息加入description中 | |
Elements houseInfo = main.select("ul.suUl").first().select("li"); | |
StringBuilder descBuilder = new StringBuilder(); | |
for(int i = 1; i <= 3; i++) { | |
descBuilder.append(houseInfo.get(i).html().replace("\n", "")); | |
descBuilder.append("\n"); | |
} | |
String descriptionStr = Htmls.simpleClean(descBuilder.toString()) + "\n"; | |
/* content */ | |
Element element = doc.getElementsByClass("cur").get(1); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Document doc = Jsoups.getDocByUrl(url); | |
TPGoods goods = new TPGoods(); | |
goods.setId(Long.valueOf(url.substring(url.lastIndexOf("/") + 1, url.lastIndexOf("x.shtml")))); | |
goods.setRealUrl(url); | |
/* main */ | |
Element main = doc.getElementById("main").getElementsByTag("div").first(); | |
Element title = main.select("h1").first(); | |
goods.setTitle(title.html()); | |
Element time = main.select("li.time").first(); | |
goods.setTime(time.html()); | |
/* summary */ | |
Elements summary = main.select("ul.suUl").first().select("div.su_con"); | |
Element price = summary.get(0).select("span").first(); | |
goods.setPrice(price.html()); | |
Element chengse = summary.get(1).select("span").first(); // 成色 | |
goods.setChengse(chengse == null ? "" : chengse.html()); | |
Element area = summary.get(2).select("span.c_25d").first(); | |
goods.setArea(area == null ? "" : Htmls.simpleClean(area.html())); | |
/* content */ | |
Element element = doc.getElementById("sub_1"); | |
// 车辆信息 | |
Element bikeInfo = element.select("div.sub_1_c").first(); | |
String descriptionStr = Htmls.simpleClean(bikeInfo.html().replace("|", "\n")) + "\n"; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment