Created
May 1, 2017 06:31
-
-
Save shellus/978f7729531d7e5cf4731838ecabbcb3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"github.com/PuerkitoBio/goquery" | |
"fmt" | |
"path" | |
"net/http" | |
"os" | |
"io" | |
"path/filepath" | |
"os/user" | |
"time" | |
) | |
// 明星详情页结构 | |
type MingXingItem struct { | |
name string | |
url string | |
} | |
// 相册结构 | |
type XiangCeItem struct { | |
mingXingItem MingXingItem | |
name string | |
url string | |
} | |
// 图片结构 | |
type TuPianItem struct { | |
xiangCeItem XiangCeItem | |
url string | |
} | |
var userPath string | |
var storePath = `/Pictures/明星图片` | |
var baseUrl = "https://www.houyuantuan.com" | |
func main() { | |
usr, err := user.Current() | |
if err != nil { | |
panic(err) | |
} | |
userPath = usr.HomeDir | |
storePath = userPath + storePath | |
cMingXingList(baseUrl + "/mingxing/2/") | |
} | |
// 采集明星列表,返回详情页url列表 | |
func cMingXingList(url string) { | |
doc, _ := goquery.NewDocument(url) | |
_ = doc.Find("body > div.wrapper > div.container > div > div.mod-list > div.hot > ul > li").Map(func(i int, s *goquery.Selection) string { | |
name := s.Find("a.name").Text() | |
href, _ := s.Find("a.name").Attr("href") | |
go cMingXing(MingXingItem{ | |
name: name, | |
url: href, | |
}) | |
return "" | |
}) | |
time.Sleep(time.Hour) | |
} | |
// 采集明星详情页面, 返回相册url列表 | |
func cMingXing(mingXingItem MingXingItem) { | |
doc, _ := goquery.NewDocument(baseUrl + mingXingItem.url) | |
_ = doc.Find("body > div.wrapper > div.container > div > div.mod-main > div.modules.pic > ul > li").Map(func(i int, s *goquery.Selection) string { | |
href, _ := s.Find("div.cover > a").Attr("href") | |
name := s.Find("div.cover-title > p > a").Text() | |
go cXiangCe(XiangCeItem{ | |
mingXingItem: mingXingItem, | |
name: name, | |
url: href, | |
}) | |
return "" | |
}) | |
} | |
// 获取相册图片url列表 | |
func cXiangCe(xiangCeItem XiangCeItem) { | |
doc, _ := goquery.NewDocument(baseUrl + xiangCeItem.url) | |
doc.Find("body > div.wrapper > div.container > div > div.mod-atlas > div.bd > div > div > ul:nth-child(1) > li").Map(func(i int, s *goquery.Selection) string { | |
href, _ := s.Find("div.pic > img").Attr("src") | |
go downloader(TuPianItem{ | |
xiangCeItem: xiangCeItem, | |
url: href, | |
}) | |
return "" | |
}) | |
} | |
func downloader(tuPianItem TuPianItem) { | |
fn := storePath + "\\" + tuPianItem.xiangCeItem.mingXingItem.name + "\\" + tuPianItem.xiangCeItem.name + "\\" + path.Base(tuPianItem.url) | |
fmt.Println("http:" + tuPianItem.url) | |
fmt.Println(fn) | |
res, _ := http.Get("http:" + tuPianItem.url) | |
os.MkdirAll(filepath.Dir(fn), os.FileMode(777)) | |
file, _ := os.Create(fn) | |
io.Copy(file, res.Body) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment