Last active
July 13, 2020 11:30
-
-
Save songtianyi/c484d3eff24b2910c4b64d81951c8fb4 to your computer and use it in GitHub Desktop.
A cli tool to crawl available e-books from your wish list(eg. douban)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"fmt" | |
"os" | |
"strconv" | |
"github.com/songtianyi/laosj/spider" | |
"github.com/urfave/cli" | |
) | |
func doubanHandler(c *cli.Context) error { | |
uid := c.String("uid") | |
uri := "https://book.douban.com/people/" + uid + "/wish" | |
ns1, err := spider.CreateSpiderFromUrl(uri) | |
if err != nil { | |
return err | |
} | |
t1, err := ns1.GetText("div>div>div.grid-16-8.clearfix>div.article>div.paginator>a") | |
if err != nil { | |
return err | |
} | |
fmt.Print(`<DOCTYPE! html> | |
<html lang="zh_CN"> | |
<head> | |
<meta charset="utf-8"> | |
<title>e-book crab - by songtianyi</title> | |
</head> | |
<style> | |
ul li { | |
list-style: none; | |
padding: 1px; | |
margin: 4px; | |
} | |
</style> | |
<body> | |
<ul> | |
`) | |
maxx := spider.FindMaxFromSliceString(1, t1) | |
for j := 0; j < maxx; j++ { | |
page := uri + "?start=" + strconv.Itoa(j*15) + "&sort=time&rating=all&filter=all&mode=grid" | |
ns2, err := spider.CreateSpiderFromUrl(page) | |
if err != nil { | |
fmt.Println(err) | |
continue | |
} | |
books, err := ns2.GetAttr("div>div>div.grid-16-8.clearfix>div.article>ul.interest-list>li.subject-item>div.info>h2>a", "href") | |
if err != nil { | |
fmt.Println(err) | |
continue | |
} | |
for _, book := range books { | |
ns3, err := spider.CreateSpiderFromUrl(book) | |
if err != nil { | |
fmt.Println(err) | |
continue | |
} | |
ebooks, err := ns3.GetHtml("div>div>div.grid-16-8.clearfix>div.aside>div.gray_ad>div#buyinfo-ebook>ul.bs.noline.more-after") | |
if err != nil { | |
fmt.Println(err) | |
continue | |
} | |
titles, err := ns3.GetText("div>h1>span") | |
if err != nil { | |
fmt.Println(err) | |
continue | |
} | |
if len(ebooks) > 0 { | |
fmt.Print(`<li><ul>《` + titles[0] + `》` + ebooks[0] + `</ul></li>`) | |
} | |
} | |
} | |
fmt.Print(` | |
</ul> | |
</body> | |
</html>`) | |
return nil | |
} | |
func main() { | |
app := cli.NewApp() | |
app.Usage = "A cli tool to crawl available e-books from your wish list(eg. douban)" | |
app.Version = "1.0.0" | |
app.Commands = []cli.Command{ | |
{ | |
Name: "douban", | |
Aliases: []string{"douban"}, | |
Usage: "start crawling douban wish list", | |
Action: doubanHandler, | |
Flags: []cli.Flag{ | |
cli.StringFlag{ | |
Name: "user, uid", | |
Value: "64692178", | |
Usage: "douban user id", | |
}, | |
}, | |
}, | |
} | |
err := app.Run(os.Args) | |
if err != nil { | |
fmt.Println(err) | |
os.Exit(1) | |
} | |
return | |
} |
Author
songtianyi
commented
Jun 25, 2019
•
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment