Skip to content

Instantly share code, notes, and snippets.

@SilverCory
Last active November 6, 2017 04:47
Show Gist options
  • Save SilverCory/339d11a49f9bb699271ca53aae67cfec to your computer and use it in GitHub Desktop.
Save SilverCory/339d11a49f9bb699271ca53aae67cfec to your computer and use it in GitHub Desktop.
Scrape all the video information from a youtube video and store it in json files.
package main
import (
"encoding/json"
"flag"
"fmt"
"io/ioutil"
"net/http"
"os"
"strconv"
)
type Page struct {
Token string `json:"nextPageToken"`
Videos []Video `json:"items"`
}
type Video struct {
ID ID `json:"id"`
}
type ID struct {
VideoID string `json:"videoId"`
}
func main() {
apikey := flag.String("apikey", "", "Your youtube data enabled API key")
channelId := flag.String("channelId", "", "The channel id of the person you're scraping.")
flag.Parse()
if err := os.Mkdir(*channelId, 0644); err != nil {
fmt.Println("Error making channel directory!")
return
}
pageNumber := 1
var nextPage *Page
request := "https://www.googleapis.com/youtube/v3/search?key=" + *apikey + "&channelId=" + *channelId + "&part=snippet,id&order=date&maxResults=50"
for {
req := request
if nextPage == nil || nextPage.Token == "" {
if pageNumber != 1 {
fmt.Println("Unexpected end!")
return
}
} else {
req += "&pageToken=" + nextPage.Token
}
fmt.Println("Fetching page: ", strconv.Itoa(pageNumber))
data, page, err := getPage(req)
if err != nil {
return
}
if err := writeData(*channelId, data, pageNumber); err != nil {
fmt.Println("Error writing file!", err)
return
}
traverseVideos(*channelId, *apikey, page.Videos)
if page.Token == "" {
return
}
nextPage = page
pageNumber += 1
}
}
func traverseVideos(channelId, apikey string, videos []Video) {
for _, v := range videos {
fmt.Println(" - Fetching video id: " + v.ID.VideoID)
fmt.Println(" - URL: https://www.googleapis.com/youtube/v3/videos?part=id%2C+snippet&id=" + v.ID.VideoID + "&key=" + apikey)
req, err := http.Get("https://www.googleapis.com/youtube/v3/videos?part=id%2C+snippet&id=" + v.ID.VideoID + "&key=" + apikey)
if err != nil {
fmt.Println(" - Error getting video page!", err)
continue
} else if req.StatusCode < 200 || req.StatusCode > 209 {
fmt.Println(" - Non 20x response status!", req.Status)
continue
}
data, err := ioutil.ReadAll(req.Body)
if err != nil {
fmt.Println(" - Error reading video page!", err)
continue
}
err = ioutil.WriteFile(channelId+"/"+v.ID.VideoID+".json", data, 0644)
if err != nil {
fmt.Println(" - Error reading writing video page!", err)
continue
}
}
}
func writeData(channelId string, data []byte, number int) error {
return ioutil.WriteFile(channelId+"_page"+strconv.Itoa(number)+".json", data, 0644)
}
func getPage(url string) ([]byte, *Page, error) {
fmt.Println("URL: ", url)
req, err := http.Get(url)
if err != nil {
fmt.Println("Error getting page!", err)
return []byte{}, nil, err
} else if req.StatusCode < 200 || req.StatusCode > 209 {
fmt.Println("Non 20x response status!", req.Status)
return []byte{}, nil, fmt.Errorf("non 20x response status %q", req.Status)
}
defer req.Body.Close()
pageContents, err := ioutil.ReadAll(req.Body)
if err != nil {
fmt.Println("Error reading page!", err)
return []byte{}, nil, err
}
page := &Page{}
err = json.Unmarshal(pageContents, page)
if err != nil {
fmt.Println("Error decoding page!", err)
return []byte{}, nil, err
}
return pageContents, page, nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment