Created
November 24, 2013 12:11
-
-
Save svick/7626527 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using HtmlAgilityPack; | |
namespace nuget_tmp | |
{ | |
static class Program | |
{ | |
private static void Main() | |
{ | |
var questionsUnsnaswered = GetQuestions("http://codereview.stackexchange.com/questions?page={0}&sort=unanswered&pagesize=50").ToList(); | |
var unansweredVotes = GetQuestions("http://codereview.stackexchange.com/unanswered/tagged/?page={0}&tab=votes&pagesize=50").ToList(); | |
Console.WriteLine("QU: {0}", questionsUnsnaswered.Count); | |
Console.WriteLine("UV: {0}", unansweredVotes.Count); | |
Console.WriteLine("QU.D: {0}", questionsUnsnaswered.Distinct().Count()); | |
Console.WriteLine("UV.D: {0}", unansweredVotes.Distinct().Count()); | |
Console.WriteLine(); | |
var firstExceptSecond = questionsUnsnaswered.Except(unansweredVotes).ToList(); | |
var secondExceptFirst = unansweredVotes.Except(questionsUnsnaswered).ToList(); | |
Console.WriteLine("QU \\ UV: {0}", firstExceptSecond.Count); | |
foreach (var question in firstExceptSecond) | |
{ | |
Console.WriteLine(question); | |
} | |
Console.WriteLine(); | |
Console.WriteLine("UV \\ QU: {0}", secondExceptFirst.Count); | |
foreach (var question in secondExceptFirst) | |
{ | |
Console.WriteLine(question); | |
} | |
} | |
private static IEnumerable<Question> GetQuestions(string urlTemplate) | |
{ | |
var htmlWeb = new HtmlWeb(); | |
htmlWeb.UserAgent = "Svick's scraper for http://meta.codereview.stackexchange.com/q/1042/2041"; | |
for (int i = 1; i <= 20; i++) | |
{ | |
string url = string.Format(urlTemplate, i); | |
var document = htmlWeb.Load(url); | |
var nodes = document.DocumentNode.SelectNodes("//a[@class='question-hyperlink']"); | |
var questionUrls = nodes.Select(n => n.GetAttributeValue("href", null)); | |
foreach (var questionUrl in questionUrls) | |
{ | |
yield return new Question(questionUrl, i); | |
} | |
} | |
} | |
} | |
sealed class Question : IEquatable<Question> | |
{ | |
public bool Equals(Question other) | |
{ | |
if (ReferenceEquals(null, other)) | |
return false; | |
if (ReferenceEquals(this, other)) | |
return true; | |
return string.Equals(Url, other.Url); | |
} | |
public override bool Equals(object obj) | |
{ | |
return Equals(obj as Question); | |
} | |
public override int GetHashCode() | |
{ | |
return Url.GetHashCode(); | |
} | |
public Question(string url, int page) | |
{ | |
Url = url; | |
Page = page; | |
} | |
public string Url { get; private set; } | |
public int Page { get; private set; } | |
public override string ToString() | |
{ | |
return string.Format("{0}; {1}", Url, Page); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment