Last active
August 16, 2023 06:47
-
-
Save notsobad/5ffcd24adc47c65a5c81bc11065deffa to your computer and use it in GitHub Desktop.
使用 chromedp,加载一个网页,获取网页加载的所有资源地址,获取页面里的所有链接地址
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Command click is a chromedp example demonstrating how to use a selector to | |
// click on an element. | |
package main | |
import ( | |
"context" | |
"fmt" | |
"log" | |
"strings" | |
"time" | |
"github.com/PuerkitoBio/goquery" | |
"github.com/chromedp/chromedp" | |
) | |
func main() { | |
// create chrome instance | |
ctx, cancel := chromedp.NewContext( | |
context.Background(), | |
//chromedp.WithDebugf(log.Printf), | |
) | |
defer cancel() | |
// create a timeout | |
ctx, cancel = context.WithTimeout(ctx, 15*time.Second) | |
defer cancel() | |
jsCode := `(() => { | |
let ret = {}; | |
ret['resource'] = performance.getEntriesByType("resource").map((r) => r.name); | |
ret['links'] = Array.from(document.getElementsByTagName('a')).map(link => link.href).filter(href => href); | |
return JSON.stringify(ret); })();` | |
var urls string | |
// 定义用于存储HTML内容的字符串变量 | |
var htmlContent string | |
// navigate到目标网站并等待domcontentloaded(即网站首屏加载完全) | |
err := chromedp.Run(ctx, | |
chromedp.Navigate("http://127.0.0.1:8888/hi.html"), | |
chromedp.WaitReady("body", chromedp.ByQuery), | |
// 获取整个页面DOM树并且赋值给htmlContent | |
chromedp.InnerHTML(`html`, &htmlContent, chromedp.NodeVisible), | |
chromedp.EvaluateAsDevTools(jsCode, &urls), | |
) | |
if err != nil { | |
log.Fatal(err) | |
} | |
fmt.Println(htmlContent) | |
// 从渲染后的 dom 中获取链接地址的方式,不推荐 | |
urls1 := getLinksFromHtml(htmlContent) | |
fmt.Println(urls1) | |
// 使用 插入 js 代码来获取链接地址和加载的资源,推荐 | |
fmt.Println("js get all page loaded urls:") | |
fmt.Println(urls) | |
} | |
func getLinksFromHtml(html string) []string { | |
var urls []string | |
parsedHtmlReader := strings.NewReader(html) | |
doc, _ := goquery.NewDocumentFromReader(parsedHtmlReader) | |
doc.Find("a").Each(func(index int, link *goquery.Selection) { | |
url, _ := link.Attr("href") | |
if len(url) > 0 { | |
urls = append(urls, url) | |
} | |
}) | |
return urls | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<dir> | |
<a href="/abc.html">assss</a> | |
</dir> | |
<script crossorigin="anonymous" defer="defer" type="application/javascript" src="/ssssss0.js"></script> | |
<script crossorigin="anonymous" defer="defer" type="application/javascript" src="/test.js"></script> | |
<img src="/xxxx.jpg" /> | |
<iframe src="/iframelink"></iframe> | |
<script> | |
// 生成随机地址函数 | |
function randomUrl() { | |
const chars = 'abcdefghijklmnopqrstuvwxyz0123456789'; | |
let url = ''; | |
for (let i = 0; i < 10; i++) { | |
url += chars.charAt(Math.floor(Math.random() * chars.length)); | |
} | |
return `https://${url}.com`; | |
} | |
// 创建<a>元素并设置随机href属性值 | |
const aTag = document.createElement('a'); | |
aTag.href = "/randomlink"; | |
aTag.innerText = "test url"; | |
// 创建<img>元素并设置随机src属性值 | |
const imgTag = document.createElement('img'); | |
imgTag.src = "/randomimg"; | |
// 创建<iframe>元素并设置随机src属性值 | |
const iframeTag = document.createElement('iframe'); | |
iframeTag.src = "/randomiframe"; | |
// 将三个标签添加到body中 | |
document.body.appendChild(aTag); | |
document.body.appendChild(imgTag); | |
document.body.appendChild(iframeTag); | |
</script> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 创建<a>元素并设置随机href属性值 | |
(function () { | |
const aTag = document.createElement('a'); | |
aTag.href = "/randomlink222222"; | |
aTag.innerText = "test url222"; | |
const imgTag = document.createElement('img'); | |
imgTag.src = "/randomimg2222"; | |
// 将三个标签添加到body中 | |
document.body.appendChild(aTag); | |
document.body.appendChild(imgTag); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment