Skip to content

Instantly share code, notes, and snippets.

@EdmundMartin
Created November 11, 2017 15:44
Show Gist options
  • Save EdmundMartin/eaea4aaa5d231078cb433b89878dbecf to your computer and use it in GitHub Desktop.
Save EdmundMartin/eaea4aaa5d231078cb433b89878dbecf to your computer and use it in GitHub Desktop.
package googlescraper
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"net/http"
"strings"
)
type GoogleResult struct {
ResultRank int
ResultURL string
ResultTitle string
ResultDesc string
}
var googleDomains = map[string]string{
"com": "https://www.google.com/search?q=",
"uk": "https://www.google.co.uk/search?q=",
"ru": "https://www.google.ru/search?q=",
"fr": "https://www.google.fr/search?q=",
}
func buildGoogleUrl(searchTerm string, countryCode string, languageCode string) string {
searchTerm = strings.Trim(searchTerm, " ")
searchTerm = strings.Replace(searchTerm, " ", "+", -1)
if googleBase, found := googleDomains[countryCode]; found {
return fmt.Sprintf("%s%s&num=100&hl=%s", googleBase, searchTerm, languageCode)
} else {
return fmt.Sprintf("%s%s&num=100&hl=%s", googleDomains["com"], searchTerm, languageCode)
}
}
func googleRequest(searchURL string) (*http.Response, error) {
baseClient := &http.Client{}
req, _ := http.NewRequest("GET", searchURL, nil)
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36")
res, err := baseClient.Do(req)
if err != nil {
return nil, err
} else {
return res, nil
}
}
func googleResultParser(response *http.Response) ([]GoogleResult, error) {
doc, err := goquery.NewDocumentFromResponse(response)
if err != nil {
return nil, err
}
results := []GoogleResult{}
sel := doc.Find("div.g")
rank := 1
for i := range sel.Nodes {
item := sel.Eq(i)
linkTag := item.Find("a")
link, _ := linkTag.Attr("href")
titleTag := item.Find("h3.r")
descTag := item.Find("span.st")
desc := descTag.Text()
title := titleTag.Text()
link = strings.Trim(link, " ")
if link != "" && link != "#" {
result := GoogleResult{
rank,
link,
title,
desc,
}
results = append(results, result)
rank += 1
}
}
return results, err
}
func GoogleScrape(searchTerm string, countryCode string, languageCode string) ([]GoogleResult, error) {
googleUrl := buildGoogleUrl(searchTerm, countryCode, languageCode)
res, err := googleRequest(googleUrl)
if err != nil {
return nil, err
}
scrapes, err := googleResultParser(res)
if err != nil {
return nil, err
} else {
return scrapes, nil
}
}
@fedir
Copy link

fedir commented Sep 12, 2019

Nice tiny script, thanks for the sharing.
It's seems, what the title tag has an another HTML structure, should be adjusted.

@pjebs
Copy link

pjebs commented Mar 11, 2020

Yes, the structure has now changed.
I've update it here: https://github.com/rocketlaunchr/google-search

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment