Last active
December 16, 2015 23:59
-
-
Save scturtle/5517282 to your computer and use it in GitHub Desktop.
bengou comic downloader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package main | |
| import ( | |
| "io" | |
| "os" | |
| "fmt" | |
| "flag" | |
| "time" | |
| "errors" | |
| "regexp" | |
| "strings" | |
| "net/http" | |
| "io/ioutil" | |
| "./httpclient" | |
| ) | |
| const ( | |
| WORKERS = 5 | |
| TIMEOUT = 10 | |
| ) | |
| type Job struct{ | |
| filename string | |
| url string | |
| msg string | |
| err error | |
| } | |
| var client = httpclient.New() | |
| func download_worker(jobs chan Job, results chan Job) { | |
| for t := range jobs { | |
| // test if file exists | |
| _, err := os.Stat(t.filename) | |
| if err!=nil || os.IsNotExist(err) { | |
| // make request | |
| req, _ := http.NewRequest("GET", t.url, nil) | |
| resp, err := client.Do(req) | |
| // request timeout | |
| if err != nil { | |
| t.err = errors.New("request timeout") | |
| results <- t | |
| continue | |
| } | |
| // save to file | |
| fout, _ := os.Create(t.filename) | |
| defer resp.Body.Close() | |
| _, err = io.Copy(fout, resp.Body) | |
| // download timeout | |
| if err != nil { | |
| // remove unfinished file | |
| fout.Close() | |
| os.Remove(t.filename) | |
| t.err = errors.New("download timeout") | |
| results <- t | |
| continue | |
| } | |
| fout.Close() | |
| t.msg = "done" | |
| results <- t | |
| } else { | |
| t.msg = "exist" | |
| results <- t | |
| } | |
| } | |
| } | |
| func main() { | |
| flag.Parse() | |
| if flag.NArg()!=1 { | |
| fmt.Println("Error: Need url.") | |
| os.Exit(1) | |
| } | |
| bengou_url := flag.Args()[0] | |
| // set timeout | |
| client.ConnectTimeout = TIMEOUT * time.Second | |
| client.ReadWriteTimeout = TIMEOUT * time.Second | |
| resp, _ := http.Get(bengou_url) | |
| defer resp.Body.Close() | |
| body, _ := ioutil.ReadAll(resp.Body) | |
| html := string(body) | |
| // comic title | |
| title_p, _ := regexp.Compile(`title>(.*)</title`) | |
| title := title_p.FindStringSubmatch(html)[1] | |
| // base url | |
| base_p, _ := regexp.Compile(`pic_base\s=\s'([^']*)'`) | |
| base := base_p.FindStringSubmatch(html)[1] | |
| // pic names | |
| pics_p, _ := regexp.Compile(`picTree\s=\s\[([^\]]*)\]`) | |
| pics := strings.Split(pics_p.FindStringSubmatch(html)[1], ",") | |
| for i := range pics { | |
| pics[i] = pics[i][1:len(pics[i])-1] | |
| } | |
| // make dir | |
| _, err := os.Stat(title) | |
| if os.IsNotExist(err) { | |
| os.Mkdir(title, 0777) | |
| } | |
| os.Chdir(title) | |
| jobs := make(chan Job) | |
| results := make(chan Job) | |
| // run workers | |
| for i := 0; i < WORKERS; i++ { | |
| go download_worker(jobs, results) | |
| } | |
| // distribute jobs | |
| total := len(pics) | |
| go func() { | |
| for i := 0; i < total; i++ { | |
| pic := pics[i] | |
| filename := fmt.Sprint(i) + pic[strings.Index(pic, "."):] | |
| jobs <- Job{filename: filename, url: base + pic} | |
| } | |
| }() | |
| // check | |
| done_cnt := 0 | |
| for done_cnt < total { | |
| r := <-results | |
| if r.err!=nil { // redo | |
| fmt.Printf("%s %v \n", r.filename, r.err) | |
| r.err = nil | |
| go func() { jobs <- r }() | |
| } else { | |
| done_cnt ++ | |
| fmt.Printf("%d / %d %s %s \n", done_cnt, total, r.filename, r.msg) | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment