katz · August 22, 2016 00:58
diff --git a/page.html b/page.html
 <!DOCTYPE html>
 <html>
 <head>
 <meta charset="utf-8">
 <title>title</title>
 </head>
 <body>
  <ul>
    <li>hoge 1</li>
    <li>fuga 2</li>
    <li>piyo 3</li>
  </ul>

  <table>
    <tr>
      <td>weather</td>
      <td>sunny</td>
    </tr>
    <tr>
      <td>my location is</td>
      <td>Tokyo</td>
    </tr>
  </table>
 </body>
 </html>
diff --git a/ScrapingByScrapySharp.cs b/ScrapingByScrapySharp.cs
 using System;
 using HtmlAgilityPack;
 using ScrapySharp.Extensions;
 using ScrapySharp.Network;
 using System.Linq;
 using System.Text.RegularExpressions;

 namespace ScrapySharpTest
 {
    class Program
    {
        static void Main(string[] args)
        {
            var browser = new ScrapingBrowser();
            browser.AllowAutoRedirect = true;
            browser.AllowMetaRedirect = true;

            //まずはスクレイピング対象のページを取得してくる。
            // Fetch the target web page
            var pageResult = browser.NavigateToPage(new Uri("http://example.com/page.html"));

            //ページに対してCSSセレクタを適用し、該当するDOMノードの最初のものを取り出す。
            // → 「hoge 1」が返る
            // Apply CSS selector to extract DOM Nodes from the page, and grab the first node. The below statement will return "hoge 1".
            pageResult.Html.CssSelect("ul li").First().InnerText;

            //ページに対してCSSセレクタを適用してDOMノード群を取り出し、ノード群の中からinnerTextに「fuga」が入っている最初のノードをLINQで絞り込む
            // → 「fuga 2」が返る
            // Apply CSS selector to extract DOM Nodes from the page, and get the first node that contains "fuga". The below statement will return "fuga 2".
            pageResult.Html.CssSelect("ul li").First(elem => elem.InnerText.Contains("fuga")).InnerText;

            // <td>タグ内に「location」という文字が入っているノードの隣のノードをXPathで絞り込む
            // → 「Tokyo」が返る
            // Apply XPath to extract DOM Nodes from the page, and grab the first node. The below statement will return "Tokyo".
            pageResult.Html.SelectNodes("//td[contains(text(),'location')]/following-sibling::td").First().InnerText;
        }
    }
 }
	<!DOCTYPE html>
	<html>
	<head>
	<meta charset="utf-8">
	<title>title</title>
	</head>
	<body>
	<ul>
	<li>hoge 1</li>
	<li>fuga 2</li>
	<li>piyo 3</li>
	</ul>

	<table>
	<tr>
	<td>weather</td>
	<td>sunny</td>
	</tr>
	<tr>
	<td>my location is</td>
	<td>Tokyo</td>
	</tr>
	</table>
	</body>
	</html>
	using System;
	using HtmlAgilityPack;
	using ScrapySharp.Extensions;
	using ScrapySharp.Network;
	using System.Linq;
	using System.Text.RegularExpressions;

	namespace ScrapySharpTest
	{
	class Program
	{
	static void Main(string[] args)
	{
	var browser = new ScrapingBrowser();
	browser.AllowAutoRedirect = true;
	browser.AllowMetaRedirect = true;

	//まずはスクレイピング対象のページを取得してくる。
	// Fetch the target web page
	var pageResult = browser.NavigateToPage(new Uri("http://example.com/page.html"));

	//ページに対してCSSセレクタを適用し、該当するDOMノードの最初のものを取り出す。
	// → 「hoge 1」が返る
	// Apply CSS selector to extract DOM Nodes from the page, and grab the first node. The below statement will return "hoge 1".
	pageResult.Html.CssSelect("ul li").First().InnerText;

	//ページに対してCSSセレクタを適用してDOMノード群を取り出し、ノード群の中からinnerTextに「fuga」が入っている最初のノードをLINQで絞り込む
	// → 「fuga 2」が返る
	// Apply CSS selector to extract DOM Nodes from the page, and get the first node that contains "fuga". The below statement will return "fuga 2".
	pageResult.Html.CssSelect("ul li").First(elem => elem.InnerText.Contains("fuga")).InnerText;

	// <td>タグ内に「location」という文字が入っているノードの隣のノードをXPathで絞り込む
	// → 「Tokyo」が返る
	// Apply XPath to extract DOM Nodes from the page, and grab the first node. The below statement will return "Tokyo".
	pageResult.Html.SelectNodes("//td[contains(text(),'location')]/following-sibling::td").First().InnerText;
	}
	}
	}