Skip to content

Instantly share code, notes, and snippets.

@ksasao
Last active February 18, 2022 00:23
Show Gist options
  • Save ksasao/12ac985d3d9094fd2da40080aa863c56 to your computer and use it in GitHub Desktop.
Save ksasao/12ac985d3d9094fd2da40080aa863c56 to your computer and use it in GitHub Desktop.
PuppeteerSharp と AngleSharp を使って C#でヘッドレスブラウザなスクレイピング
using AngleSharp.Dom.Html;
using AngleSharp.Parser.Html;
using PuppeteerSharp;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
/// <summary>
/// NuGet で PuppeteerSharp と AngleSharp を追加してください
/// </summary>
namespace PuppeteerTest
{
class Program
{
static void Main(string[] args)
{
Task.Run(async () =>
{
await PuppeteersharpTest();
});
Console.ReadKey();
}
static async Task PuppeteersharpTest()
{
string url = "https://twitter.com/ksasao";
string imagePath = @".\screenshot.png";
string htmlPath = @".\index.html";
await new BrowserFetcher().DownloadAsync(BrowserFetcher.DefaultRevision);
var browser = await Puppeteer.LaunchAsync(new LaunchOptions
{
Headless = true
});
var page = await browser.NewPageAsync();
await page.SetViewportAsync(new ViewPortOptions { Width = 1080, Height = 1920 }); // 縦長の画面を設定
await page.GoToAsync(url);
await page.ScreenshotAsync(imagePath); // スクショを保存
string html = await page.GetContentAsync();
File.WriteAllText(htmlPath, html); // HTMLとして保存
page.Dispose();
browser.Dispose();
// ここからは取得した HTML を AngleSharpで操作
var parser = new HtmlParser();
IHtmlDocument doc = parser.Parse(html);
// 先頭のTweetをテキストで表示
var firstTweet = doc.GetElementById("stream-items-id").Children[0] as IHtmlListItemElement;
if(firstTweet != null)
{
Console.WriteLine(firstTweet.InnerText);
}
doc.Dispose();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment