Created
May 24, 2012 13:50
-
-
Save jbubriski/2781650 to your computer and use it in GitHub Desktop.
Find all links from a base URL, then check those against a new URL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.IO; | |
using System.Linq; | |
using System.Net; | |
using System.Text; | |
using System.Text.RegularExpressions; | |
using System.Windows; | |
using System.Windows.Controls; | |
using System.Windows.Data; | |
using System.Windows.Documents; | |
using System.Windows.Input; | |
using System.Windows.Media; | |
using System.Windows.Media.Imaging; | |
using System.Windows.Navigation; | |
using System.Windows.Shapes; | |
namespace MigrationBrokenLinkChecker | |
{ | |
public class LinkCheckResult | |
{ | |
public string Url { get; set; } | |
public string RelativeUrl { get; set; } | |
public bool Successful { get; set; } | |
} | |
/// <summary> | |
/// Interaction logic for MainWindow.xaml | |
/// </summary> | |
public partial class MainWindow : Window | |
{ | |
private List<LinkCheckResult> _links; | |
private List<LinkCheckResult> _links2; | |
public string _baseSourceUrl { get; set; } | |
public string _baseTargetUrl { get; set; } | |
public MainWindow() | |
{ | |
InitializeComponent(); | |
} | |
private void uxGo_Click(object sender, RoutedEventArgs e) | |
{ | |
_baseSourceUrl = uxBaseSourceUrl.Text; | |
_baseTargetUrl = uxBaseTargetUrl.Text; | |
_links = new List<LinkCheckResult>(); | |
_links2 = new List<LinkCheckResult>(); | |
Out("Gathering links..."); | |
GetLinks(_baseSourceUrl); | |
Out("Done."); | |
Out(""); | |
Out(""); | |
Out("Checking links against new URL..."); | |
CheckLinks(_baseTargetUrl); | |
} | |
private void Out(string text) | |
{ | |
uxDebug.Text += text + "\r\n"; | |
Debug.WriteLine(text); | |
} | |
private void GetLinks(string url) | |
{ | |
try | |
{ | |
var httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(url); | |
var webResponse = httpWebRequest.GetResponse(); | |
using (var responseStream = webResponse.GetResponseStream()) | |
using (var streamReader = new StreamReader(responseStream)) | |
{ | |
_links.Add(new LinkCheckResult | |
{ | |
Url = url, | |
RelativeUrl = url.Replace(_baseSourceUrl, ""), | |
Successful = true | |
}); | |
Out("1 - " + url.Replace(_baseSourceUrl, "")); | |
var content = streamReader.ReadToEnd(); | |
var matches = Regex.Matches(content, "href=\"(.*?)\""); | |
foreach (Match match in matches) | |
{ | |
var childUrl = match.Groups[1].Value; | |
if (!_links.Any(l => l.Url == childUrl) | |
&& childUrl.StartsWith(_baseSourceUrl) | |
&& !childUrl.EndsWith(".css") | |
&& !childUrl.EndsWith(".png") | |
&& !childUrl.EndsWith(".zip") | |
&& !childUrl.EndsWith("/feed/") | |
&& !childUrl.Contains("/tag/") | |
&& !childUrl.Contains("/category/")) | |
{ | |
GetLinks(childUrl); | |
} | |
} | |
} | |
} | |
catch | |
{ | |
_links.Add(new LinkCheckResult | |
{ | |
Url = url, | |
RelativeUrl = url.Replace(_baseSourceUrl, ""), | |
Successful = false | |
}); | |
Out("0 - " + url.Replace(_baseSourceUrl, "")); | |
} | |
} | |
private void CheckLinks(string baseUrl) | |
{ | |
foreach (var link in _links) | |
{ | |
var newLink = baseUrl + link.RelativeUrl; | |
try | |
{ | |
var httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(newLink); | |
var webResponse = httpWebRequest.GetResponse(); | |
using (var responseStream = webResponse.GetResponseStream()) | |
using (var streamReader = new StreamReader(responseStream)) | |
{ | |
_links2.Add(new LinkCheckResult | |
{ | |
Url = newLink, | |
RelativeUrl = link.RelativeUrl, | |
Successful = true | |
}); | |
Out("1 - " + link.RelativeUrl); | |
} | |
} | |
catch | |
{ | |
_links2.Add(new LinkCheckResult | |
{ | |
Url = newLink, | |
RelativeUrl = link.RelativeUrl, | |
Successful = false | |
}); | |
Out("0 - " + link.RelativeUrl); | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment