Skip to content

Instantly share code, notes, and snippets.

@rif
Created February 12, 2018 20:06
Show Gist options
  • Save rif/d3958ee14758052976203b391d86208e to your computer and use it in GitHub Desktop.
Save rif/d3958ee14758052976203b391d86208e to your computer and use it in GitHub Desktop.
package main
import (
"encoding/json"
"fmt"
"log"
"sort"
"time"
"github.com/gocolly/colly"
)
type Gist struct {
User string
Date time.Time
Text string
}
func main() {
c := colly.NewCollector(colly.UserAgent("User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:58.0) Gecko/20100101 Firefox/58.0"))
var gists []*Gist
// Find and visit all links
c.OnHTML("div.gist-snippet", func(e *colly.HTMLElement) {
user := e.ChildAttr("span.creator>img", "alt")
date := e.ChildAttr("div.extra-info>time-ago", "datetime")
t, err := time.Parse(time.RFC3339, date)
if err != nil {
log.Print("ERR: ", err)
}
text := e.ChildText("td.blob-code")
gists = append(gists, &Gist{
User: user,
Date: t,
Text: text,
})
})
// Visit next page
c.OnHTML("a.next_page", func(e *colly.HTMLElement) {
//log.Println("Next page link found:", e.Attr("href"))
time.Sleep(10 * time.Second)
if err := e.Request.Visit(e.Attr("href")); err != nil {
log.Print("Visit ERR: ", err)
}
})
c.OnRequest(func(r *colly.Request) {
log.Printf("\n ========================= Visiting: %s ============================= \n", r.URL)
})
if err := c.Visit("https://gist.github.com/search?p=1&q=genesis_public_key"); err != nil {
log.Print("Initial page ERR: ", err)
}
sort.Slice(gists, func(i, j int) bool { return gists[i].Date.Before(gists[j].Date) })
b, err := json.MarshalIndent(gists, "", " ")
if err != nil {
log.Print("error formating result json: ", err)
}
fmt.Printf(string(b))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment