Skip to content

Instantly share code, notes, and snippets.

@EgorBo
Last active November 6, 2022 20:03
Show Gist options
  • Save EgorBo/917287255448d1e018306be7be7c0968 to your computer and use it in GitHub Desktop.
Save EgorBo/917287255448d1e018306be7be7c0968 to your computer and use it in GitHub Desktop.
blog-parser.cs
using System.Text.RegularExpressions;
/*
Top25 Authors of all 512 PRs in https://devblogs.microsoft.com/dotnet/performance_improvements_in_net_7/
(by count):
stephentoub -- 148
EgorBo -- 45
tannergooding -- 26
vcsjones -- 23
adamsitnik -- 17
elinor-fung -- 14
AndyAyersMS -- 10
wfurt -- 9
eiriktsarpalis -- 9
TIHan -- 8
jkoritzinsky -- 8
AaronRobinsonMSFT -- 7
bartonjs -- 7
kunalspathak -- 6
simonrozsival -- 6
BruceForstall -- 5
jkotas -- 5
kouvel -- 5
teo-tsirpanis -- 5
olsaarik -- 5
joperezr -- 5
tmds -- 5
SingleAccretion -- 4
MichalStrehovsky -- 4
vargaz -- 4
*/
string file = await new HttpClient().GetStringAsync(
"https://devblogs.microsoft.com/dotnet/performance_improvements_in_net_7/");
MatchCollection pullRequestUrls =
Regex.Matches(file, @"https:\/\/github.com\/[a-zA-Z-]+\/[a-zA-Z-]+\/pull\/[0-9]+");
int total = pullRequestUrls.Count;
int i = 1;
List<string> authors = new();
foreach (var pullRequestUrl in pullRequestUrls.Select(m => m.ToString().Trim()).Distinct())
{
string content = await new HttpClient().GetStringAsync(pullRequestUrl);
string title = Regex.Match(content, @"<title>(.*?)<\/title>").Value
.Replace("<title>", "")
.Replace("</title>", "")
.Trim();
if (!title.Contains(" · Pull Request #"))
{
// there are 6 links which are github issues rather than PRs despite /pull/ in the url
continue;
}
title = title.Substring(0, title.IndexOf(" · Pull Request #"));
string nick = title.Substring(title.LastIndexOf(" by ") + " by ".Length).Trim();
Console.WriteLine($"{i++}/{total}: {title}");
authors.Add(nick);
}
// Print top25 most popular authors
foreach (var author in authors.GroupBy(g => g).OrderByDescending(g => g.Count()).Take(25))
{
Console.WriteLine(author.Key.PadRight(
authors.Select(a => a.Length).Max() + 1) + " -- " + author.Count());
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment