Skip to content

Instantly share code, notes, and snippets.

@gidili
Created April 15, 2010 02:00
Show Gist options
  • Save gidili/366599 to your computer and use it in GitHub Desktop.
Save gidili/366599 to your computer and use it in GitHub Desktop.
public enum TweetSectionType { NormalText, Url, AtName, HashCode }
public class TweetSection
{
public TweetSectionType SectionType;
public string Text;
}
public class TweetDecoder
{
#region regex patterns
public const string atRegexPattern = @"@([A-Za-z0-9_]+)";
public const string hashRegexPattern = @"#([A-Za-z0-9_]+)";
// here be dragons
public const string urlRegexDragonPattern = @"^(?#Protocol)(?:(?:ht|f)tp(?:s?)\:\/\/|~\/|\/)?(?#Username:Password)(?:\w+:\w+@)?((?#Subdomains)(?:(?:[-\w\d{1-3}]+\.)+(?#TopLevel Domains)(?:com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|edu|co\.uk|ac\.uk|it|fr|tv|museum|asia|local|travel|[a-z]{2})?)|(?#IP)((\b25[0-5]\b|\b[2][0-4][0-9]\b|\b[0-1]?[0-9]?[0-9]\b)(\.(\b25[0-5]\b|\b[2][0-4][0-9]\b|\b[0-1]?[0-9]?[0-9]\b)){3}))(?#Port)(?::[\d]{1,5})?(?#Directories)(?:(?:(?:\/(?:[-\w~!$+|.,=]|%[a-f\d]{2})+)+|\/)+|\?|#)?(?#Query)(?:(?:\?(?:[-\w~!$+|.,*:]|%[a-f\d{2}])+=?(?:[-\w~!$+|.,*:=]|%[a-f\d]{2})*)(?:&(?:[-\w~!$+|.,*:]|%[a-f\d{2}])+=?(?:[-\w~!$+|.,*:=]|%[a-f\d]{2})*)*)*(?#Anchor)(?:#(?:[-\w~!$ |/.,*:;=]|%[a-f\d]{2})*)?$";
#endregion
#region regexes
// always the same so declare once as compiled for faster execution
private static Regex atRegex = new Regex(atRegexPattern, RegexOptions.Compiled);
private static Regex hashRegex = new Regex(hashRegexPattern, RegexOptions.Compiled);
private static Regex urlRegex = new Regex(urlRegexDragonPattern, RegexOptions.Compiled);
#endregion
// Here be magic.
public static List<TweetSection> DecodeSections(string originalTweet)
{
// declare return variable
var sectionz = new List<TweetSection>();
// split in words
var wordz = new List<string>(originalTweet.Split(' '));
// loop through words and figure out what kind of section.
foreach(var word in wordz)
{
// section defaults to normal text
var sectionType = TweetSectionType.NormalText;
if (atRegex.IsMatch(word))
{
sectionType = TweetSectionType.AtName;
}
else if (hashRegex.IsMatch(word))
{
sectionType = TweetSectionType.HashCode;
}
else if (urlRegex.IsMatch(word))
{
sectionType = TweetSectionType.Url;
}
// add word to section list
sectionz.Add(new TweetSection { SectionType = sectionType, Text = word });
}
return sectionz;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment