Last active
November 7, 2022 12:27
-
-
Save mashingan/6f83c936f9621a40a434074462ced935 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"encoding/xml" | |
"flag" | |
"fmt" | |
"io" | |
"net/http" | |
"net/url" | |
"os" | |
"strconv" | |
"strings" | |
"sync" | |
"time" | |
) | |
func toString(name xml.Name) string { | |
return name.Space + ":" + name.Local | |
} | |
func isPageClass(t xml.StartElement) bool { | |
return t.Name == xml.Name{Space: "", Local: "div"} && | |
len(t.Attr) > 0 && | |
t.Attr[0].Value == "page" | |
} | |
func nextToken(d *xml.Decoder) (token xml.Token, err error) { | |
token, err = d.Token() | |
if err != nil { | |
return | |
} | |
switch v := token.(type) { | |
default: | |
fmt.Printf("the type: %T\n", v) | |
} | |
return | |
} | |
func pageExtract(linkurl string) (nextlink string, imglink string, err error) { | |
resp, err := http.Get(linkurl) | |
if err != nil { | |
return "", "", err | |
} | |
decoder := xml.NewDecoder(resp.Body) | |
decoder.AutoClose = []string{"link", "meta"} | |
decoder.Strict = false | |
searching: | |
for { | |
token, err := decoder.Token() | |
if err != nil { | |
fmt.Println(err) | |
if err == io.EOF { | |
break searching | |
} | |
} else { | |
switch v := token.(type) { | |
case xml.StartElement: | |
if isPageClass(v) { | |
fmt.Println("found!") | |
break searching | |
} | |
default: | |
} | |
} | |
} | |
_, err = nextToken(decoder) | |
if err != nil { | |
fmt.Println(err) | |
} | |
stelm, err := nextToken(decoder) | |
nstelm, ok := stelm.(xml.StartElement) | |
if !ok { | |
fmt.Println("cannot convert to start element") | |
return "", "", fmt.Errorf("Error pageExtract: cannot convert to start element") | |
} | |
fmt.Println("next link:", nstelm.Attr[0].Value) | |
img, err := nextToken(decoder) | |
nimg, ok := img.(xml.StartElement) | |
if !ok { | |
fmt.Println("cannot convert to start element") | |
return "", "", fmt.Errorf("Error pageExtract: cannot convert to start element") | |
} | |
base, _ := url.Parse(linkurl) | |
nexturl, _ := url.Parse(nstelm.Attr[0].Value) | |
nextlink = base.ResolveReference(nexturl).String() | |
imglink = "https:" + nimg.Attr[1].Value | |
err = nil | |
return | |
} | |
func downloadFile(imglink string) error { | |
lenImgName := strings.Split(imglink, "/") | |
fname := lenImgName[len(lenImgName)-1] | |
file, err := os.Create(fname) | |
defer file.Close() | |
if err != nil { | |
fmt.Println(err) | |
return err | |
} | |
imgresp, err := http.Get(imglink) | |
if err != nil { | |
fmt.Println(err) | |
return err | |
} | |
defer imgresp.Body.Close() | |
size, err := io.Copy(file, imgresp.Body) | |
if err != nil { | |
fmt.Println(err) | |
return err | |
} | |
fmt.Printf("downloaded a file %s with size %d\n", fname, size) | |
return nil | |
} | |
func chapterExtract(link string) (int, error) { | |
rpos := strings.Index(link, "/r/") | |
if rpos == -1 { | |
return rpos, nil | |
} | |
nextslash := strings.Index(link[rpos+3:], "/") | |
nextslash += rpos + 3 | |
chapstr := strings.Index(link[nextslash+1:], "/") | |
chapstr += nextslash + 1 | |
intonly := link[nextslash+1 : chapstr] | |
result, err := strconv.Atoi(intonly) | |
if err != nil { | |
return -1, err | |
} | |
return result, nil | |
} | |
func main() { | |
mangaurl := "" | |
flag.StringVar(&mangaurl, "url", "", "Please specify the url") | |
flag.Parse() | |
if mangaurl == "" { | |
fmt.Println("url:", mangaurl) | |
panic("No url specified") | |
} | |
var wg sync.WaitGroup | |
thisChapter, err := chapterExtract(mangaurl) | |
if err != nil { | |
fmt.Println(err) | |
return | |
} | |
fmt.Println("This chapter:", thisChapter) | |
var nextlink, imglink string | |
nextlink = mangaurl | |
start := time.Now() | |
fetchingpage: | |
for { | |
nextlink, imglink, err = pageExtract(nextlink) | |
if err != nil { | |
fmt.Println(err) | |
return | |
} | |
fmt.Println("Next link:", nextlink) | |
fmt.Println("img link:", imglink) | |
if imglink == "" { | |
break fetchingpage | |
} | |
wg.Add(1) | |
go func(w *sync.WaitGroup, link string) { | |
defer w.Done() | |
if err := downloadFile(link); err != nil { | |
fmt.Println(err) | |
} | |
}(&wg, imglink) | |
nextChapter, err := chapterExtract(nextlink) | |
if err != nil { | |
fmt.Println(err) | |
return | |
} else if nextChapter == -1 { | |
break fetchingpage | |
} | |
fmt.Printf("old %d and current %d\n", thisChapter, nextChapter) | |
if nextChapter != thisChapter { | |
break fetchingpage | |
} | |
} | |
wg.Wait() | |
fmt.Println("ended after:", time.Now().Sub(start)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment