Skip to content

Instantly share code, notes, and snippets.

@mpfund
Created February 13, 2015 15:42
Show Gist options
  • Save mpfund/d2d15d6aa3ddc9b54ee5 to your computer and use it in GitHub Desktop.
Save mpfund/d2d15d6aa3ddc9b54ee5 to your computer and use it in GitHub Desktop.
golang get links from url
package main
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"log"
"net/http"
"net/url"
"strings"
)
func main() {
ExampleScrape()
}
func ExampleScrape() {
base := "http://www.yahoo.com/"
resp, _ := http.Get(base)
doc, err := goquery.NewDocumentFromResponse(resp)
if err != nil {
log.Fatal(err)
}
links := map[string]bool{}
doc.Find("a").Each(func(i int, s *goquery.Selection) {
linkHref, _ := s.Attr("href")
bUrl, _ := url.Parse(base)
lUrl, _ := url.Parse(linkHref)
if !lUrl.IsAbs() {
lUrl = bUrl.ResolveReference(lUrl)
}
if !strings.HasSuffix(lUrl.Host, "yahoo.com") {
return
}
fmt.Printf("%s %s\n", lUrl.String(), lUrl.Host)
links[lUrl.String()] = true
})
fmt.Println("links", len(links))
}
func test() {
fmt.Println("test")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment