Skip to content

Instantly share code, notes, and snippets.

@syhily
Last active October 5, 2022 06:04
Show Gist options
  • Save syhily/31d7b97168289a64369d052a7032fcf1 to your computer and use it in GitHub Desktop.
Save syhily/31d7b97168289a64369d052a7032fcf1 to your computer and use it in GitHub Desktop.
抓取天浪书屋的蓝奏云下载链接
for((i=1;i<=10302;i++));
do
echo ""
echo ""
echo "Start download book from $i"
echo ""
echo ""
curl "https://www.tianlangbooks.com/$i.html" -H 'authority: www.tianlangbooks.com' --data-raw 'secret_key=359198&Submit=%E6%8F%90%E4%BA%A4' --compressed | grep "蓝奏云盘" >> download.txt
done
package main
import (
"bufio"
"fmt"
"github.com/go-resty/resty/v2"
"io"
"mime"
"net/http"
"net/url"
"os"
"path/filepath"
"regexp"
"strings"
"sync"
"time"
)
const (
Threads = 5
UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53"
)
func main() {
f, _ := os.Open("links.txt")
defer func() { _ = f.Close() }()
_ = os.Mkdir("downloads", os.ModePerm)
c := make(chan string, Threads)
// Create download threads.
for i := 0; i < Threads; i++ {
go ResolveFile(c)
}
// Start reading.
r := bufio.NewReader(f)
for s, e := Readln(r); e == nil; s, e = Readln(r) {
if strings.HasPrefix(s, "http") {
fmt.Println()
fmt.Println("Start download from:", s)
c <- s
}
}
// Wait for all the messages be consumed.
for len(c) > 0 {
time.Sleep(1 * time.Second)
}
close(c)
}
type Response struct {
Code int64 `json:"code"`
Data *struct {
Name string `json:"name"`
Url string `json:"url"`
} `json:"data"`
Msg string `json:"msg"`
}
func ResolveFile(c chan string) {
client := resty.New()
for s := range c {
name, link := ResolveFileLink(client, s)
if link != "" {
fmt.Println("Get file from link:", link)
_ = DownloadFile(name, link)
} else {
BrokenLink(s)
}
}
}
func ResolveFileLink(client *resty.Client, s string) (string, string) {
result := &Response{}
_, _ = client.R().SetHeader("User-Agent", UserAgent).SetQueryParam("url", s).SetResult(result).Get("https://tenapi.cn/lanzou/")
if result.Msg == "请输入密码" {
_, _ = client.R().SetHeader("User-Agent", UserAgent).SetQueryParam("url", s).SetQueryParam("pwd", "tlsw").SetResult(result).Get("https://tenapi.cn/lanzou/")
} else if result.Msg == "文件取消分享了" {
return PatchResolveFileLink(client, s)
}
if result.Data != nil {
return result.Data.Name, result.Data.Url
} else {
return "", ""
}
}
type FileList struct {
Zt int `json:"zt"`
Info string `json:"info"`
Text []struct {
Icon string `json:"icon"`
T int `json:"t"`
ID string `json:"id"`
NameAll string `json:"name_all"`
Size string `json:"size"`
Time string `json:"time"`
Duan string `json:"duan"`
PIco int `json:"p_ico"`
} `json:"text"`
}
var (
lxReg = regexp.MustCompile("'lx':(\\d+),")
//'fid':4368476,
fidReg = regexp.MustCompile("'fid':(\\d+),")
//'uid':'1645133',
uidReg = regexp.MustCompile("'uid':'(\\d+)',")
//'rep':'0',
repReg = regexp.MustCompile("'rep':'(\\d+)',")
//'up':1,
upReg = regexp.MustCompile("'up':(\\d+),")
//'ls':1,
lsReg = regexp.MustCompile("'ls':(\\d+),")
// var ib4rtb = '1653509951';
tReg = regexp.MustCompile("var ib4rtb = '(\\d+)';")
// var ih33v4 = '42e785f2f96bc41d9693b60b23bab788';
kReg = regexp.MustCompile("var ih33v4 = '(\\S+)';")
)
func extractRegex(reg *regexp.Regexp, str string) string {
matches := reg.FindStringSubmatch(str)
if len(matches) >= 2 {
return matches[1]
}
return ""
}
func PatchResolveFileLink(client *resty.Client, s string) (string, string) {
resp, _ := client.R().Get(s)
str := resp.String()
formData := map[string]string{
"lx": extractRegex(lxReg, str),
"fid": extractRegex(fidReg, str),
"uid": extractRegex(uidReg, str),
"pg": "1",
"rep": extractRegex(repReg, str),
"t": extractRegex(tReg, str),
"k": extractRegex(kReg, str),
"up": extractRegex(upReg, str),
"ls": extractRegex(lsReg, str),
"pwd": "tlsw",
}
result := &FileList{}
_, _ = client.R().SetFormData(formData).SetResult(result).Post("https://tianlangbooks.lanzoue.com/filemoreajax.php")
if len(result.Text) > 0 {
u := ""
for _, file := range result.Text {
if strings.Contains(file.NameAll, "epub") {
u = "https://tianlangbooks.lanzoue.com/" + file.ID
}
}
if u == "" {
u = "https://tianlangbooks.lanzoue.com/" + result.Text[0].ID
}
return ResolveFileLink(client, u)
}
return "", ""
}
func DownloadFile(name, url string) (err error) {
// Get the data
resp, err := http.Get(url)
if err != nil {
return err
}
defer func() { _ = resp.Body.Close() }()
// Check server response
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("bad status: %s", resp.Status)
}
// Create the file
filename := Filename(resp)
if filename == "" {
filename = name
}
out, err := os.Create(filepath.Join("downloads", filename))
if err != nil {
return err
}
defer func() { _ = out.Close() }()
// Writer the body to file
_, err = io.Copy(out, resp.Body)
if err != nil {
return err
}
return nil
}
// Filename parse the file name from Content-Disposition header.
// If there is no such head, we would return blank string.
func Filename(resp *http.Response) (name string) {
if disposition := resp.Header.Get("Content-Disposition"); disposition != "" {
if _, params, err := mime.ParseMediaType(disposition); err == nil {
if filename, ok := params["filename"]; ok {
name, _ = url.QueryUnescape(filename)
}
}
}
return
}
var mutex sync.Mutex
func BrokenLink(link string) {
mutex.Lock()
defer mutex.Unlock()
f, err := os.OpenFile("./links-broken.txt", os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0600)
if err != nil {
panic(err)
}
defer func() { _ = f.Close() }()
if _, err = f.WriteString(link + "\n"); err != nil {
panic(err)
}
}
// Readln returns a single line (without the ending \n)
// from the input buffered reader.
// An error is returned iff there is an error with the
// buffered reader.
func Readln(r *bufio.Reader) (string, error) {
var (
isPrefix = true
err error = nil
line, ln []byte
)
for isPrefix && err == nil {
line, isPrefix, err = r.ReadLine()
ln = append(ln, line...)
}
return string(ln), err
}
module github.com/syhily/download
go 1.18
require (
github.com/go-resty/resty/v2 v2.7.0 // indirect
golang.org/x/net v0.0.0-20211029224645-99673261e6eb // indirect
)
@syhily
Copy link
Author

syhily commented May 25, 2022

抓取截止日期 2022/05/18,蓝奏云密码,均为:tlsw。

蓝奏云解析服务:https://tenapi.cn/lanzou/
蓝奏云解析服务文档:https://docs.tenapi.cn/lanzou.html
自建蓝奏云解析服务:https://github.com/5ime/Lanzou_API

@syhily
Copy link
Author

syhily commented May 25, 2022

使用方法:

需要本机有 Go 环境,将 Gist 里面 4 个文件下载到本地,放在同一个文件夹内,然后 go build .。运行文件夹内生成的可执行文件即可。

@myzle
Copy link

myzle commented May 26, 2022

赞👍

@ihipop
Copy link

ihipop commented May 29, 2022

diff --git a/download.go b/download.go
index de3987c..2503f27 100644
--- a/download.go
+++ b/download.go
@@ -13,12 +13,11 @@ import (
 	"regexp"
 	"strings"
 	"sync"
	"time"
 )
 
 const (
 	Threads   = 5
-	UserAgent = "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53"
+	UserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53"
 )
 
 func main() {
@@ -118,10 +124,12 @@ var (
 	upReg = regexp.MustCompile("'up':(\\d+),")
 	//'ls':1,
 	lsReg = regexp.MustCompile("'ls':(\\d+),")
-	// var ib4rtb = '1653509951';
-	tReg = regexp.MustCompile("var ib4rtb = '(\\d+)';")
-	// var ih33v4 = '42e785f2f96bc41d9693b60b23bab788';
-	kReg = regexp.MustCompile("var ih33v4 = '(\\S+)';")
+	//// var ib4rtb = '1653509951';
+	//tReg = regexp.MustCompile("var iayenb = '(\\d+)';")
+	//// var ih33v4 = '42e785f2f96bc41d9693b60b23bab788';
+	//kReg = regexp.MustCompile("var igwqp4 = '(\\S+)';")
+	tVar = regexp.MustCompile("'t':(\\S+),")
+	kVar = regexp.MustCompile("'k':(\\S+),")
 )
 
 func extractRegex(reg *regexp.Regexp, str string) string {
@@ -142,8 +150,8 @@ func PatchResolveFileLink(client *resty.Client, s string) (string, string) {
 		"uid": extractRegex(uidReg, str),
 		"pg":  "1",
 		"rep": extractRegex(repReg, str),
-		"t":   extractRegex(tReg, str),
-		"k":   extractRegex(kReg, str),
+		"t":   extractRegex(regexp.MustCompile("var "+extractRegex(tVar, str)+" = '(\\d+)';"), str),
+		"k":   extractRegex(regexp.MustCompile("var "+extractRegex(kVar, str)+" = '(\\S+)';"), str),
 		"up":  extractRegex(upReg, str),
 		"ls":  extractRegex(lsReg, str),
 		"pwd": "tlsw",
diff --git a/go.mod b/go.mod
index d41245a..1ff00fb 100644
--- a/go.mod
+++ b/go.mod
@@ -1,8 +1,8 @@
 module github.com/syhily/download
 
-go 1.18
+go 1.16
 
 require (
-	github.com/go-resty/resty/v2 v2.7.0 // indirect
+	github.com/go-resty/resty/v2 v2.7.0
 	golang.org/x/net v0.0.0-20211029224645-99673261e6eb // indirect
 )

@ihipop
Copy link

ihipop commented May 29, 2022

.*(https://www.tianlangbooks.com/redirect/.*?)">(.*)</a>(.*[:](\S{4}))?.*

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment