Skip to content

Instantly share code, notes, and snippets.

@seanho
Created May 20, 2010 08:21
Show Gist options
  • Save seanho/407335 to your computer and use it in GitHub Desktop.
Save seanho/407335 to your computer and use it in GitHub Desktop.
using System;
using System.Linq;
using System.Net;
using System.Text.RegularExpressions;
using HtmlAgilityPack;
namespace EmbedHtml
{
public class Embeder
{
private const int MAX_CONCURRENCY = 5;
public string Url { get; set; }
public Embeder(string url)
{
Url = url;
}
public string Process()
{
var html = new WebClient().DownloadString(Url);
var doc = new HtmlDocument();
doc.LoadHtml(html);
var images = doc.DocumentNode.SelectNodes("//img");
var imagesParallel = Enumerable.Range(0, images.Count).Select(i => images[i]).AsParallel().WithDegreeOfParallelism(MAX_CONCURRENCY);
imagesParallel.ForAll(img =>
{
var extension = Regex.Match(img.Attributes["src"].Value, "^.(.+)$");
var imageUri = new Uri(new Uri(Url), img.Attributes["src"].Value);
var data = new WebClient().DownloadData(imageUri);
if (data != null && extension.Success)
{
var dataB64 = Convert.ToBase64String(data);
img.Attributes["src"].Value = string.Format("data:image/{0};base64,{1}", extension.Value, dataB64);
}
});
return doc.DocumentNode.InnerHtml;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment