Last active
March 14, 2023 21:21
-
-
Save akiyoshi83/1fce0aae0f70c92a3d526fc20406d5f7 to your computer and use it in GitHub Desktop.
Scraping by go using chromedp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"context" | |
"fmt" | |
"io/ioutil" | |
"log" | |
"time" | |
"github.com/chromedp/cdproto/cdp" | |
"github.com/chromedp/chromedp" | |
"github.com/chromedp/chromedp/runner" | |
) | |
// Chrome installed by homebrew | |
const chromePath string = "/usr/local/Caskroom/google-chrome/latest/Google Chrome.app/Contents/MacOS/Google Chrome" | |
func main() { | |
var err error | |
// create context | |
ctxt, cancel := context.WithCancel(context.Background()) | |
defer cancel() | |
// prepare options | |
opts := chromedp.WithRunnerOptions( | |
runner.ExecPath(chromePath), | |
runner.Port(9222), | |
runner.Flag("headless", true), | |
runner.Flag("disable-gpu", true), | |
) | |
// create chrome instance | |
c, err := chromedp.New( | |
ctxt, | |
chromedp.WithLog(log.Printf), | |
opts, | |
) | |
if err != nil { | |
log.Fatal(err) | |
} | |
// run task list | |
var site, res string | |
err = c.Run(ctxt, googleSearch("site:brank.as", "Home", &site, &res)) | |
if err != nil { | |
log.Fatal(err) | |
} | |
// shutdown chrome | |
err = c.Shutdown(ctxt) | |
if err != nil { | |
log.Fatal(err) | |
} | |
// wait for chrome to finish | |
err = c.Wait() | |
if err != nil { | |
log.Fatal(err) | |
} | |
log.Printf("saved screenshot from search result listing `%s` (%s)", res, site) | |
} | |
func googleSearch(q, text string, site, res *string) chromedp.Tasks { | |
var buf []byte | |
sel := fmt.Sprintf(`//a[text()[contains(., '%s')]]`, text) | |
return chromedp.Tasks{ | |
chromedp.Navigate(`https://www.google.com`), | |
chromedp.WaitVisible(`#hplogo`, chromedp.ByID), | |
chromedp.SendKeys(`#lst-ib`, q+"\n", chromedp.ByID), | |
chromedp.WaitVisible(`#res`, chromedp.ByID), | |
chromedp.Text(sel, res), | |
chromedp.Click(sel), | |
chromedp.WaitNotVisible(`.preloader-content`, chromedp.ByQuery), | |
chromedp.WaitVisible(`a[href*="twitter"]`, chromedp.ByQuery), | |
chromedp.Location(site), | |
chromedp.ScrollIntoView(`.banner-section.third-section`, chromedp.ByQuery), | |
chromedp.Sleep(2 * time.Second), // wait for animation to finish | |
chromedp.Screenshot(`.banner-section.third-section`, &buf, chromedp.ByQuery), | |
chromedp.ActionFunc(func(context.Context, cdp.Executor) error { | |
return ioutil.WriteFile("screenshot.png", buf, 0644) | |
}), | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment