Skip to content

Instantly share code, notes, and snippets.

@lerry
Created October 30, 2017 07:55
Show Gist options
  • Save lerry/77d272ff49b8a664904fcb6453830679 to your computer and use it in GitHub Desktop.
Save lerry/77d272ff49b8a664904fcb6453830679 to your computer and use it in GitHub Desktop.
package main
import (
"io"
"os"
"fmt"
"sync"
"strconv"
"strings"
"io/ioutil"
"regexp"
"net/http"
"path/filepath"
)
var URL string = "http://www.yanshuo.me/r/meizi/rank/"
func main () {
c := make(chan string, 5)
var wg sync.WaitGroup
wg.Add(1)
go parsePage(c, &wg)
rootpath := filepath.Join(".", "imgs")
os.MkdirAll(rootpath, os.ModePerm)
for i := 0; i < 3; i++ {
wg.Add(1)
go downloader(c, &wg, rootpath)
}
wg.Wait()
}
func parsePage(c chan string, wg *sync.WaitGroup) {
re, _ := regexp.Compile(`data-original=\"(.*)\"\s`)
page_no := 1
for true {
url := URL + strconv.Itoa(page_no)
fmt.Println(url)
res, err := http.Get(url)
checkErr(err)
body, _ := ioutil.ReadAll(res.Body)
img_list := re.FindAllString(string(body), -1)
for _,i := range img_list {
url := i[15:]
url = "http:" + url[:len(url)-2 ]
c <- url
}
res.Body.Close()
if len(img_list) == 0 {
break
}
page_no += 1
}
close(c)
wg.Done()
}
func downloader(c chan string, wg *sync.WaitGroup, rootpath string) {
for url := range c {
path := filepath.Join(rootpath, strings.Split(url, "/")[4])
if _, err := os.Stat(path); !os.IsNotExist(err) {
continue
}
response, err := http.Get(url)
checkErr(err)
file, err := os.Create(path)
checkErr(err)
_, err = io.Copy(file, response.Body)
checkErr(err)
file.Close()
response.Body.Close()
fmt.Println(path+" Ok")
}
wg.Done()
}
func checkErr(err error) {
if err != nil {
panic(err)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment