Skip to content

Instantly share code, notes, and snippets.

@EdmundMartin
Created November 11, 2017 15:44
Show Gist options
  • Select an option

  • Save EdmundMartin/eaea4aaa5d231078cb433b89878dbecf to your computer and use it in GitHub Desktop.

Select an option

Save EdmundMartin/eaea4aaa5d231078cb433b89878dbecf to your computer and use it in GitHub Desktop.
package googlescraper
import (
"fmt"
"github.com/PuerkitoBio/goquery"
"net/http"
"strings"
)
type GoogleResult struct {
ResultRank int
ResultURL string
ResultTitle string
ResultDesc string
}
var googleDomains = map[string]string{
"com": "https://www.google.com/search?q=",
"uk": "https://www.google.co.uk/search?q=",
"ru": "https://www.google.ru/search?q=",
"fr": "https://www.google.fr/search?q=",
}
func buildGoogleUrl(searchTerm string, countryCode string, languageCode string) string {
searchTerm = strings.Trim(searchTerm, " ")
searchTerm = strings.Replace(searchTerm, " ", "+", -1)
if googleBase, found := googleDomains[countryCode]; found {
return fmt.Sprintf("%s%s&num=100&hl=%s", googleBase, searchTerm, languageCode)
} else {
return fmt.Sprintf("%s%s&num=100&hl=%s", googleDomains["com"], searchTerm, languageCode)
}
}
func googleRequest(searchURL string) (*http.Response, error) {
baseClient := &http.Client{}
req, _ := http.NewRequest("GET", searchURL, nil)
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36")
res, err := baseClient.Do(req)
if err != nil {
return nil, err
} else {
return res, nil
}
}
func googleResultParser(response *http.Response) ([]GoogleResult, error) {
doc, err := goquery.NewDocumentFromResponse(response)
if err != nil {
return nil, err
}
results := []GoogleResult{}
sel := doc.Find("div.g")
rank := 1
for i := range sel.Nodes {
item := sel.Eq(i)
linkTag := item.Find("a")
link, _ := linkTag.Attr("href")
titleTag := item.Find("h3.r")
descTag := item.Find("span.st")
desc := descTag.Text()
title := titleTag.Text()
link = strings.Trim(link, " ")
if link != "" && link != "#" {
result := GoogleResult{
rank,
link,
title,
desc,
}
results = append(results, result)
rank += 1
}
}
return results, err
}
func GoogleScrape(searchTerm string, countryCode string, languageCode string) ([]GoogleResult, error) {
googleUrl := buildGoogleUrl(searchTerm, countryCode, languageCode)
res, err := googleRequest(googleUrl)
if err != nil {
return nil, err
}
scrapes, err := googleResultParser(res)
if err != nil {
return nil, err
} else {
return scrapes, nil
}
}
@fedir
Copy link
Copy Markdown

fedir commented Sep 12, 2019

Nice tiny script, thanks for the sharing.
It's seems, what the title tag has an another HTML structure, should be adjusted.

@pjebs
Copy link
Copy Markdown

pjebs commented Mar 11, 2020

Yes, the structure has now changed.
I've update it here: https://github.com/rocketlaunchr/google-search

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment