Created
January 22, 2014 16:33
-
-
Save neuecc/8561992 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Gumbo.Wrappers; | |
using Sgml; | |
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.IO; | |
using System.Linq; | |
using System.Net; | |
using System.Xml.Linq; | |
namespace ConsoleApplication55 | |
{ | |
class Program | |
{ | |
static void Bench(string label, Action<string> action) | |
{ | |
var html = new WebClient().DownloadString("https://github.com/google/gumbo-parser"); | |
var sw = Stopwatch.StartNew(); | |
for (int i = 0; i < 100; i++) | |
{ | |
action(html); | |
} | |
sw.Stop(); | |
Console.WriteLine(label + ":" + sw.Elapsed); | |
} | |
static void Main(string[] args) | |
{ | |
Bench("SgmlReader", html => | |
{ | |
XDocument xdoc; | |
using (var tr = new StringReader(html)) | |
using (var sgml = new SgmlReader() { InputStream = tr }) | |
{ | |
xdoc = XDocument.Load(sgml); | |
} | |
var hogehogehoge = xdoc.Descendants("li") | |
.Select(x => x.Value) | |
.ToArray(); | |
}); | |
Bench("HtmlAgilityPack", html => | |
{ | |
var doc = new HtmlAgilityPack.HtmlDocument(); | |
doc.LoadHtml(html); | |
var hogehoge = doc.DocumentNode.Descendants("li") | |
.Select(x => x.InnerText) | |
.ToArray(); | |
}); | |
Bench("Gumbo", html => | |
{ | |
var gumboParse = new Gumbo.Wrappers.GumboWrapper(html); | |
var hoge = gumboParse.Document.Children | |
.TraverseDepthFirst(x => x.Children) // (IxのExpandがBreadthFirstでツカエネーと思った) | |
.OfType<ElementWrapper>() | |
.Where(x => x.NormalizedTagName == "li") | |
.Select(x => x.Value) | |
.ToArray(); | |
}); | |
} | |
} | |
public static class Extensions | |
{ | |
public static IEnumerable<T> TraverseDepthFirst<T>(this IEnumerable<T> source, Func<T, IEnumerable<T>> selector) | |
{ | |
foreach (var item in source) | |
{ | |
yield return item; | |
foreach (var subitem in selector(item).TraverseDepthFirst(selector)) | |
{ | |
yield return subitem; | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment