Skip to content

Instantly share code, notes, and snippets.

@alikrc
Created February 4, 2017 11:37
Show Gist options
  • Save alikrc/876f858a648d3baae391301af561a96d to your computer and use it in GitHub Desktop.
Save alikrc/876f858a648d3baae391301af561a96d to your computer and use it in GitHub Desktop.
c# extract links in a web site
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net.Http;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace FindWebLinks
{
public class Program
{
public static void Main(string[] args)
{
SendRequest().Wait();
}
private static async Task SendRequest()
{
try
{
using (var c = new HttpClient())
{
c.BaseAddress = new Uri("http://www.vatanbilgisayar.com");
var res = await c.GetAsync("/");
res.EnsureSuccessStatusCode();
var stringResponse = await res.Content.ReadAsStringAsync();
var linkList = ParseLinksFromHtmlString(stringResponse);
foreach (var link in linkList)
{
Console.WriteLine(link);
}
Console.WriteLine("Total links count: " + linkList.Count);
//Console.WriteLine(stringResponse);
}
}
catch (Exception e)
{
Console.WriteLine(e);
throw;
}
}
private static List<string> ParseLinksFromHtmlString(string html)
{
var matches = Regex.Matches(html, "href=\"(.*?)\"");
var links = matches.Cast<Match>().Select(match => match.Value).Distinct().ToList();
return links;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment