Skip to content

Instantly share code, notes, and snippets.

@scturtle
Last active December 16, 2015 23:59
Show Gist options
  • Select an option

  • Save scturtle/5517282 to your computer and use it in GitHub Desktop.

Select an option

Save scturtle/5517282 to your computer and use it in GitHub Desktop.
bengou comic downloader
package main
import (
"io"
"os"
"fmt"
"flag"
"time"
"errors"
"regexp"
"strings"
"net/http"
"io/ioutil"
"./httpclient"
)
const (
WORKERS = 5
TIMEOUT = 10
)
type Job struct{
filename string
url string
msg string
err error
}
var client = httpclient.New()
func download_worker(jobs chan Job, results chan Job) {
for t := range jobs {
// test if file exists
_, err := os.Stat(t.filename)
if err!=nil || os.IsNotExist(err) {
// make request
req, _ := http.NewRequest("GET", t.url, nil)
resp, err := client.Do(req)
// request timeout
if err != nil {
t.err = errors.New("request timeout")
results <- t
continue
}
// save to file
fout, _ := os.Create(t.filename)
defer resp.Body.Close()
_, err = io.Copy(fout, resp.Body)
// download timeout
if err != nil {
// remove unfinished file
fout.Close()
os.Remove(t.filename)
t.err = errors.New("download timeout")
results <- t
continue
}
fout.Close()
t.msg = "done"
results <- t
} else {
t.msg = "exist"
results <- t
}
}
}
func main() {
flag.Parse()
if flag.NArg()!=1 {
fmt.Println("Error: Need url.")
os.Exit(1)
}
bengou_url := flag.Args()[0]
// set timeout
client.ConnectTimeout = TIMEOUT * time.Second
client.ReadWriteTimeout = TIMEOUT * time.Second
resp, _ := http.Get(bengou_url)
defer resp.Body.Close()
body, _ := ioutil.ReadAll(resp.Body)
html := string(body)
// comic title
title_p, _ := regexp.Compile(`title>(.*)</title`)
title := title_p.FindStringSubmatch(html)[1]
// base url
base_p, _ := regexp.Compile(`pic_base\s=\s'([^']*)'`)
base := base_p.FindStringSubmatch(html)[1]
// pic names
pics_p, _ := regexp.Compile(`picTree\s=\s\[([^\]]*)\]`)
pics := strings.Split(pics_p.FindStringSubmatch(html)[1], ",")
for i := range pics {
pics[i] = pics[i][1:len(pics[i])-1]
}
// make dir
_, err := os.Stat(title)
if os.IsNotExist(err) {
os.Mkdir(title, 0777)
}
os.Chdir(title)
jobs := make(chan Job)
results := make(chan Job)
// run workers
for i := 0; i < WORKERS; i++ {
go download_worker(jobs, results)
}
// distribute jobs
total := len(pics)
go func() {
for i := 0; i < total; i++ {
pic := pics[i]
filename := fmt.Sprint(i) + pic[strings.Index(pic, "."):]
jobs <- Job{filename: filename, url: base + pic}
}
}()
// check
done_cnt := 0
for done_cnt < total {
r := <-results
if r.err!=nil { // redo
fmt.Printf("%s %v \n", r.filename, r.err)
r.err = nil
go func() { jobs <- r }()
} else {
done_cnt ++
fmt.Printf("%d / %d %s %s \n", done_cnt, total, r.filename, r.msg)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment