Created
October 13, 2012 05:11
-
-
Save JuanKRuiz/3883319 to your computer and use it in GitHub Desktop.
Extracts the first image Url from a html string
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// <summary>Regular expression to get image urls from a HTML string</summary> | |
private const string STR_IMGTAG_SRC_EXP = @"<img\s+[^>]*\bsrc\s*\=\s*[\x27\x22](?<Url>[^\x27\x22]*)[\x27\x22]"; | |
/// <summary> | |
/// Extracts the first image Url from a html string | |
/// </summary> | |
/// <param name="htmlString">A string containing html code</param> | |
/// <returns>a collection with the image Urls contained in htmlString parameter</returns> | |
/// <remarks>This method uses regular expressions,so using System.Text.RegularExpressions; | |
/// must be addeed</remarks> | |
public static List<string> ExtractImageUrisFromHtml(string htmlString) | |
{ | |
var rgx = new Regex(STR_IMGTAG_SRC_EXP, | |
RegexOptions.IgnoreCase | RegexOptions.Multiline); | |
var lista = new List<string>(); | |
var matches = rgx.Matches(htmlString); | |
foreach (Match match in matches) | |
{ | |
var url = match.Groups["Url"].Value; | |
if (!string.IsNullOrWhiteSpace(url)) | |
{ | |
lista.Add( | |
match.Groups["Url"].Value); | |
} | |
} | |
return lista; | |
} | |
/// <summary>Default Uri</summary> | |
private const string TEMPURI = "http://tempuri.org"; | |
/// <summary> | |
/// Extracts the first image Url from a html string | |
/// </summary> | |
/// <param name="htmlString">A string containing html code</param> | |
/// <returns>a string with the Url or first image in the htmlString parameter</returns> | |
/// <remarks>This method uses regular expressions,so using System.Text.RegularExpressions; must be addeed</remarks> | |
public static string ExtractFirstHtmlImage(string htmlString) | |
{ | |
string respuesta = TEMPURI; | |
try | |
{ | |
var rgx = new Regex( | |
STR_IMGTAG_SRC_EXP, | |
RegexOptions.IgnoreCase | RegexOptions.Multiline); | |
var match = rgx.Match(htmlString); | |
respuesta = match.Groups["Url"].Value; | |
if (respuesta == "") | |
respuesta = TEMPURI; | |
} | |
catch { respuesta = TEMPURI; } | |
return respuesta; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment