Skip to content

Instantly share code, notes, and snippets.

@nirlanka
Last active January 29, 2025 21:50
Show Gist options
  • Save nirlanka/1e2718b20c5d407a84812d14c04ae3b2 to your computer and use it in GitHub Desktop.
Save nirlanka/1e2718b20c5d407a84812d14c04ae3b2 to your computer and use it in GitHub Desktop.
webnovel downloader - golang
## DEV SH
mkdir golang_one
cd golang_one
go mod init golang_one
# go run golang_one
go get -u github.com/chromedp/chromedp
## Program the app
mkdir output
go run golang_one "DC新氪星 - DC New Krypton" '.content_read>.box_con>.bookname>h1' '.content_read>.box_con>#content' '#pager_next' https://www.shuhaige.net/130382/78521659.html
cd output
zip -r dc_new_krypton.zip .
## SSH
cd ~/Servers/reader/public/books
mkdir dc_new_krypton
## DEV SH
scp -rp -P 2222 dc_new_krypton.zip [email protected]:/home/evildino/Servers/reader/public/books/dc_new_krypton
## SSH
cd dc_new_krypton
unzip dc_new_krypton.zip
## in case of stopping in the middle:
go run golang_one "DC新氪星 - DC New Krypton" '.content_read>.box_con>.bookname>h1' '.content_read>.box_con>#content' '#pager_next' https://www.shuhaige.net/130382/78521679.html 5
package main
import (
"context"
"encoding/json"
"fmt"
"os"
"strconv"
"time"
"github.com/chromedp/chromedp"
)
var (
cmdName = `golang_one`
outpDirname = `output`
)
func main() {
args := os.Args[1:]
if len(args) < 4 {
fmt.Printf(`Error: arguments required:
%s
bookTitle
chapterTitleCssSelector
chapterBodyCssSelector
nextChapterLinkCssSelector
startingChapterUrl
[chapterIndex]\n`, cmdName)
return
}
bookTitle := args[0]
// document.querySelector('.book.reader>.content>h1')
// document.querySelector('.content_read>.box_con>.bookname>h1')
jsGetTitle := fmt.Sprintf(`
(
document.querySelector("%s")
||{}
).textContent
||''
`, args[1])
// document.querySelector('.book.reader>.content>#content')
// document.querySelector('.content_read>.box_con>#content')
jsGetBodyHtml := fmt.Sprintf(`
(
document.querySelector("%s")
||{}
).innerHTML
||''
`, args[2])
// document.querySelector('.page_chapter>ul>:nth-child(3)>a')
// document.querySelector('.content_read>.box_con>.bottem2>:nth-child(5)')
// document.querySelector('.content_read>.box_con>.bottem2>#A3')
// window.pager_next
jsGetNextUrl := fmt.Sprintf(`
(
document.querySelector("%s")
||{}
).href
||''
`, args[3])
firstChapterUrl := args[4]
indexFname := fmt.Sprintf("%s/index.json", outpDirname)
var chapters [][]interface{}
idx := 1
//// start from middle:
if len(args) > 5 {
//// idx:
var err error
idx, err = strconv.Atoi(args[5])
onError(err)
//// prevChapters:
indexJsonReadBytes, err := os.ReadFile(indexFname)
onError(err)
indexReadObj := jsonIndex{}
json.Unmarshal([]byte(indexJsonReadBytes), &indexReadObj)
chapters = indexReadObj.Chapters
fmt.Printf("Previously saved chapters: %d\n", len(chapters))
}
fmt.Printf("Downloading [%s]\n", bookTitle)
ctx, cancel := chromedp.NewContext(
context.Background(),
)
defer cancel()
nextUrl := firstChapterUrl
for len(nextUrl) > 0 {
var title string
var bodyHtml string
fmt.Printf("Reading %s...\n", nextUrl)
err := chromedp.Run(ctx,
chromedp.Navigate(nextUrl),
chromedp.Evaluate(jsGetTitle, &title),
chromedp.Evaluate(jsGetBodyHtml, &bodyHtml),
chromedp.Evaluate(jsGetNextUrl, &nextUrl),
)
onError(err)
//// if last chapter has both idx and title, and title is same as current title:
if len(chapters) > 0 &&
len(chapters[len(chapters)-1]) == 2 &&
title == chapters[len(chapters)-1][1] {
//// append with cahce:
title = fmt.Sprintf("%s (part)", title)
}
chapterMetadata := []interface{}{idx, title}
chapters = append(chapters, chapterMetadata)
//// Convert chapter metadata and write file:
fname := fmt.Sprintf("%s/%d.json", outpDirname, idx)
f, err := os.OpenFile(
fname,
// os.O_APPEND|
os.O_CREATE|os.O_WRONLY,
0644,
)
onError(err)
defer f.Close()
chapterObj := jsonChapter{
Title: title,
FullText: bodyHtml,
}
chapterJsonBytes, err := json.Marshal(chapterObj)
onError(err)
_, err = f.WriteString(string(chapterJsonBytes))
onError(err)
fmt.Println(chapterMetadata)
//// Convert index values []interface->[num, string] and write file:
f, err = os.OpenFile(
indexFname,
// os.O_APPEND|
os.O_CREATE|os.O_WRONLY,
0644,
)
onError(err)
defer f.Close()
indexObj := jsonIndex{
Title: bookTitle,
Chapters: chapters,
}
indexJsonBytes, err := json.Marshal(indexObj)
onError(err)
_, err = f.WriteString(string(indexJsonBytes))
onError(err)
idx++
time.Sleep(4 * time.Second)
}
}
func onError(err error) {
if err != nil {
panic(err)
}
}
type jsonChapter struct {
Title string `json:"title"`
FullText string `json:"fullText"`
}
type jsonIndex struct {
Title string `json:"title"`
Chapters [][]interface{} `json:"chapters"` // [[1, "chapter 1"], [2, "chapter 2"]]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment