Last active
January 29, 2025 21:50
-
-
Save nirlanka/1e2718b20c5d407a84812d14c04ae3b2 to your computer and use it in GitHub Desktop.
webnovel downloader - golang
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## DEV SH | |
mkdir golang_one | |
cd golang_one | |
go mod init golang_one | |
# go run golang_one | |
go get -u github.com/chromedp/chromedp | |
## Program the app | |
mkdir output | |
go run golang_one "DC新氪星 - DC New Krypton" '.content_read>.box_con>.bookname>h1' '.content_read>.box_con>#content' '#pager_next' https://www.shuhaige.net/130382/78521659.html | |
cd output | |
zip -r dc_new_krypton.zip . | |
## SSH | |
cd ~/Servers/reader/public/books | |
mkdir dc_new_krypton | |
## DEV SH | |
scp -rp -P 2222 dc_new_krypton.zip [email protected]:/home/evildino/Servers/reader/public/books/dc_new_krypton | |
## SSH | |
cd dc_new_krypton | |
unzip dc_new_krypton.zip | |
## in case of stopping in the middle: | |
go run golang_one "DC新氪星 - DC New Krypton" '.content_read>.box_con>.bookname>h1' '.content_read>.box_con>#content' '#pager_next' https://www.shuhaige.net/130382/78521679.html 5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"context" | |
"encoding/json" | |
"fmt" | |
"os" | |
"strconv" | |
"time" | |
"github.com/chromedp/chromedp" | |
) | |
var ( | |
cmdName = `golang_one` | |
outpDirname = `output` | |
) | |
func main() { | |
args := os.Args[1:] | |
if len(args) < 4 { | |
fmt.Printf(`Error: arguments required: | |
%s | |
bookTitle | |
chapterTitleCssSelector | |
chapterBodyCssSelector | |
nextChapterLinkCssSelector | |
startingChapterUrl | |
[chapterIndex]\n`, cmdName) | |
return | |
} | |
bookTitle := args[0] | |
// document.querySelector('.book.reader>.content>h1') | |
// document.querySelector('.content_read>.box_con>.bookname>h1') | |
jsGetTitle := fmt.Sprintf(` | |
( | |
document.querySelector("%s") | |
||{} | |
).textContent | |
||'' | |
`, args[1]) | |
// document.querySelector('.book.reader>.content>#content') | |
// document.querySelector('.content_read>.box_con>#content') | |
jsGetBodyHtml := fmt.Sprintf(` | |
( | |
document.querySelector("%s") | |
||{} | |
).innerHTML | |
||'' | |
`, args[2]) | |
// document.querySelector('.page_chapter>ul>:nth-child(3)>a') | |
// document.querySelector('.content_read>.box_con>.bottem2>:nth-child(5)') | |
// document.querySelector('.content_read>.box_con>.bottem2>#A3') | |
// window.pager_next | |
jsGetNextUrl := fmt.Sprintf(` | |
( | |
document.querySelector("%s") | |
||{} | |
).href | |
||'' | |
`, args[3]) | |
firstChapterUrl := args[4] | |
indexFname := fmt.Sprintf("%s/index.json", outpDirname) | |
var chapters [][]interface{} | |
idx := 1 | |
//// start from middle: | |
if len(args) > 5 { | |
//// idx: | |
var err error | |
idx, err = strconv.Atoi(args[5]) | |
onError(err) | |
//// prevChapters: | |
indexJsonReadBytes, err := os.ReadFile(indexFname) | |
onError(err) | |
indexReadObj := jsonIndex{} | |
json.Unmarshal([]byte(indexJsonReadBytes), &indexReadObj) | |
chapters = indexReadObj.Chapters | |
fmt.Printf("Previously saved chapters: %d\n", len(chapters)) | |
} | |
fmt.Printf("Downloading [%s]\n", bookTitle) | |
ctx, cancel := chromedp.NewContext( | |
context.Background(), | |
) | |
defer cancel() | |
nextUrl := firstChapterUrl | |
for len(nextUrl) > 0 { | |
var title string | |
var bodyHtml string | |
fmt.Printf("Reading %s...\n", nextUrl) | |
err := chromedp.Run(ctx, | |
chromedp.Navigate(nextUrl), | |
chromedp.Evaluate(jsGetTitle, &title), | |
chromedp.Evaluate(jsGetBodyHtml, &bodyHtml), | |
chromedp.Evaluate(jsGetNextUrl, &nextUrl), | |
) | |
onError(err) | |
//// if last chapter has both idx and title, and title is same as current title: | |
if len(chapters) > 0 && | |
len(chapters[len(chapters)-1]) == 2 && | |
title == chapters[len(chapters)-1][1] { | |
//// append with cahce: | |
title = fmt.Sprintf("%s (part)", title) | |
} | |
chapterMetadata := []interface{}{idx, title} | |
chapters = append(chapters, chapterMetadata) | |
//// Convert chapter metadata and write file: | |
fname := fmt.Sprintf("%s/%d.json", outpDirname, idx) | |
f, err := os.OpenFile( | |
fname, | |
// os.O_APPEND| | |
os.O_CREATE|os.O_WRONLY, | |
0644, | |
) | |
onError(err) | |
defer f.Close() | |
chapterObj := jsonChapter{ | |
Title: title, | |
FullText: bodyHtml, | |
} | |
chapterJsonBytes, err := json.Marshal(chapterObj) | |
onError(err) | |
_, err = f.WriteString(string(chapterJsonBytes)) | |
onError(err) | |
fmt.Println(chapterMetadata) | |
//// Convert index values []interface->[num, string] and write file: | |
f, err = os.OpenFile( | |
indexFname, | |
// os.O_APPEND| | |
os.O_CREATE|os.O_WRONLY, | |
0644, | |
) | |
onError(err) | |
defer f.Close() | |
indexObj := jsonIndex{ | |
Title: bookTitle, | |
Chapters: chapters, | |
} | |
indexJsonBytes, err := json.Marshal(indexObj) | |
onError(err) | |
_, err = f.WriteString(string(indexJsonBytes)) | |
onError(err) | |
idx++ | |
time.Sleep(4 * time.Second) | |
} | |
} | |
func onError(err error) { | |
if err != nil { | |
panic(err) | |
} | |
} | |
type jsonChapter struct { | |
Title string `json:"title"` | |
FullText string `json:"fullText"` | |
} | |
type jsonIndex struct { | |
Title string `json:"title"` | |
Chapters [][]interface{} `json:"chapters"` // [[1, "chapter 1"], [2, "chapter 2"]] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment