Skip to content

Instantly share code, notes, and snippets.

@azyobuzin
Created November 16, 2017 10:47
Show Gist options
  • Save azyobuzin/0ea20253e1d0e065a81e1b60fd6be9ee to your computer and use it in GitHub Desktop.
Save azyobuzin/0ea20253e1d0e065a81e1b60fd6be9ee to your computer and use it in GitHub Desktop.
using System;
using System.IO;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text;
using System.Threading.Tasks;
using System.Xml;
namespace HatenaBlogDownloader
{
class Program
{
static async Task Main(string[] args)
{
Console.Write("ID: ");
var id = Console.ReadLine();
Console.Write("Blog ID: ");
var blogId = Console.ReadLine();
Console.Write("API Key: ");
var apiKey = Console.ReadLine();
using (var client = new HttpClient())
{
client.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue(
"Basic", Convert.ToBase64String(Encoding.UTF8.GetBytes(id + ":" + apiKey)));
var requestUri = "https://blog.hatena.ne.jp/" + id + "/" + blogId + "/atom/entry";
for (var i = 1; requestUri != null; i++)
{
var outputName = $"{i:D03}.xml";
Console.WriteLine(requestUri + " -> " + outputName);
using (var res = await client.GetStreamAsync(requestUri))
using (var fs = new FileStream(outputName, FileMode.Create, FileAccess.ReadWrite))
{
res.CopyTo(fs);
res.Dispose();
fs.Seek(0, SeekOrigin.Begin);
requestUri = null;
using (var reader = XmlReader.Create(fs))
{
const string xmlns = "http://www.w3.org/2005/Atom";
do
{
if (!reader.Read()) throw new Exception();
} while (reader.NodeType != XmlNodeType.Element);
reader.ReadStartElement("feed", xmlns);
while (reader.ReadToNextSibling("link", xmlns))
{
if (reader.GetAttribute("rel") == "next")
{
requestUri = reader.GetAttribute("href");
break;
}
}
}
}
}
}
}
}
}
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Xml.Linq;
using AngleSharp.Dom;
using AngleSharp.Extensions;
using AngleSharp.Parser.Html;
namespace HttpLinkChecker
{
class Program
{
private static readonly XNamespace s_atomNs = "http://www.w3.org/2005/Atom";
private static readonly XNamespace s_hatenaNs = "http://www.hatena.ne.jp/info/xmlns#";
private static readonly HtmlParser s_parser = new HtmlParser();
public static void Main(string[] args)
{
foreach (var path in Directory.GetFiles(".", "*.xml"))
{
foreach (var entry in XElement.Load(path).Elements(s_atomNs + "entry"))
{
var found = false;
foreach (var src in FindHttpSrc(entry.Element(s_hatenaNs + "formatted-content").Value))
{
if (!found)
{
found = true;
Console.WriteLine(entry.Element(s_atomNs + "title").Value);
Console.WriteLine(
entry.Elements(s_atomNs + "link")
.Single(x => ((string)x.Attribute("rel")) == "alternate")
.Attribute("href")
.Value
);
Console.WriteLine();
}
Console.WriteLine(src);
}
if (found)
{
Console.WriteLine();
Console.WriteLine();
}
}
}
}
private static IEnumerable<string> FindHttpSrc(string html)
{
var document = s_parser.Parse(html);
return document.Descendents<IElement>()
.Select(x => x.GetAttribute("src"))
.Where(x => x != null && x.StartsWith("http://", StringComparison.OrdinalIgnoreCase));
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment