Skip to content

Instantly share code, notes, and snippets.

@gidili
Created April 17, 2010 00:39
Show Gist options
  • Save gidili/369128 to your computer and use it in GitHub Desktop.
Save gidili/369128 to your computer and use it in GitHub Desktop.
public enum TweetSectionType { NormalText, Url, AtName, HashCode }
public class TweetSection
{
public TweetSectionType SectionType;
public string Text;
}
public class TweetDecoder
{
#region regex patterns
// strict match - won't allow for stuff before the @
public const string atRegexPattern = @"^@([A-Za-z0-9_]+)$";
// strict match - won't allow for stuff before the #
public const string hashRegexPattern = @"^#([A-Za-z0-9_]+)$";
// here be dragons - this is a loose url match
public const string urlRegexDragonPattern = @"^((https?|ftp)\:(\/\/)|(file\:\/{2,3}))?(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(((([a-zA-Z0-9]+)(\.)?)+?)(\.)([a-z]{2}|com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum))([\/][\/a-zA-Z0-9\.]*)*?([\/]?(([\?][a-zA-Z0-9]+[\=][a-zA-Z0-9\%\(\)]*)([\&][a-zA-Z0-9]+[\=][a-zA-Z0-9\%\(\)]*)*?))?$";
#endregion
#region regexes
// always the same so declare once as compiled for faster execution
private static Regex atRegex = new Regex(atRegexPattern, RegexOptions.Compiled);
private static Regex hashRegex = new Regex(hashRegexPattern, RegexOptions.Compiled);
private static Regex urlRegex = new Regex(urlRegexDragonPattern, RegexOptions.Compiled);
#endregion
// Here be magic.
public static List<TweetSection> DecodeSections(string originalTweet)
{
// declare return variable
var sectionz = new List<TweetSection>();
// split in words
var wordz = new List<string>(originalTweet.Split(' '));
// loop through words and figure out what kind of section.
foreach (var word in wordz)
{
// section defaults to normal text
var sectionType = TweetSectionType.NormalText;
if (atRegex.IsMatch(word))
{
sectionType = TweetSectionType.AtName;
}
else if (hashRegex.IsMatch(word))
{
sectionType = TweetSectionType.HashCode;
}
else if (urlRegex.IsMatch(word))
{
sectionType = TweetSectionType.Url;
}
// add word to section list or append to last if they're both normal text
if (sectionz.Count > 0
&& sectionType == TweetSectionType.NormalText
&& sectionz[sectionz.Count - 1].SectionType == TweetSectionType.NormalText)
{
sectionz[sectionz.Count - 1].Text += ' ' + word;
}
else
{
sectionz.Add(new TweetSection { SectionType = sectionType, Text = word });
}
}
return sectionz;
}
}
@IanQuigley
Copy link

Thanks for your input. I'm totally rocking the HttpModules at the mo'. The main handler at the mo' is the HTML one twipler.com/timeline.html is a dynamic response.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment