Created
November 16, 2017 10:47
-
-
Save azyobuzin/0ea20253e1d0e065a81e1b60fd6be9ee to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.IO; | |
using System.Net.Http; | |
using System.Net.Http.Headers; | |
using System.Text; | |
using System.Threading.Tasks; | |
using System.Xml; | |
namespace HatenaBlogDownloader | |
{ | |
class Program | |
{ | |
static async Task Main(string[] args) | |
{ | |
Console.Write("ID: "); | |
var id = Console.ReadLine(); | |
Console.Write("Blog ID: "); | |
var blogId = Console.ReadLine(); | |
Console.Write("API Key: "); | |
var apiKey = Console.ReadLine(); | |
using (var client = new HttpClient()) | |
{ | |
client.DefaultRequestHeaders.Authorization = new AuthenticationHeaderValue( | |
"Basic", Convert.ToBase64String(Encoding.UTF8.GetBytes(id + ":" + apiKey))); | |
var requestUri = "https://blog.hatena.ne.jp/" + id + "/" + blogId + "/atom/entry"; | |
for (var i = 1; requestUri != null; i++) | |
{ | |
var outputName = $"{i:D03}.xml"; | |
Console.WriteLine(requestUri + " -> " + outputName); | |
using (var res = await client.GetStreamAsync(requestUri)) | |
using (var fs = new FileStream(outputName, FileMode.Create, FileAccess.ReadWrite)) | |
{ | |
res.CopyTo(fs); | |
res.Dispose(); | |
fs.Seek(0, SeekOrigin.Begin); | |
requestUri = null; | |
using (var reader = XmlReader.Create(fs)) | |
{ | |
const string xmlns = "http://www.w3.org/2005/Atom"; | |
do | |
{ | |
if (!reader.Read()) throw new Exception(); | |
} while (reader.NodeType != XmlNodeType.Element); | |
reader.ReadStartElement("feed", xmlns); | |
while (reader.ReadToNextSibling("link", xmlns)) | |
{ | |
if (reader.GetAttribute("rel") == "next") | |
{ | |
requestUri = reader.GetAttribute("href"); | |
break; | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.IO; | |
using System.Linq; | |
using System.Xml.Linq; | |
using AngleSharp.Dom; | |
using AngleSharp.Extensions; | |
using AngleSharp.Parser.Html; | |
namespace HttpLinkChecker | |
{ | |
class Program | |
{ | |
private static readonly XNamespace s_atomNs = "http://www.w3.org/2005/Atom"; | |
private static readonly XNamespace s_hatenaNs = "http://www.hatena.ne.jp/info/xmlns#"; | |
private static readonly HtmlParser s_parser = new HtmlParser(); | |
public static void Main(string[] args) | |
{ | |
foreach (var path in Directory.GetFiles(".", "*.xml")) | |
{ | |
foreach (var entry in XElement.Load(path).Elements(s_atomNs + "entry")) | |
{ | |
var found = false; | |
foreach (var src in FindHttpSrc(entry.Element(s_hatenaNs + "formatted-content").Value)) | |
{ | |
if (!found) | |
{ | |
found = true; | |
Console.WriteLine(entry.Element(s_atomNs + "title").Value); | |
Console.WriteLine( | |
entry.Elements(s_atomNs + "link") | |
.Single(x => ((string)x.Attribute("rel")) == "alternate") | |
.Attribute("href") | |
.Value | |
); | |
Console.WriteLine(); | |
} | |
Console.WriteLine(src); | |
} | |
if (found) | |
{ | |
Console.WriteLine(); | |
Console.WriteLine(); | |
} | |
} | |
} | |
} | |
private static IEnumerable<string> FindHttpSrc(string html) | |
{ | |
var document = s_parser.Parse(html); | |
return document.Descendents<IElement>() | |
.Select(x => x.GetAttribute("src")) | |
.Where(x => x != null && x.StartsWith("http://", StringComparison.OrdinalIgnoreCase)); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment