Created
April 17, 2010 00:39
-
-
Save gidili/369128 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public enum TweetSectionType { NormalText, Url, AtName, HashCode } | |
public class TweetSection | |
{ | |
public TweetSectionType SectionType; | |
public string Text; | |
} | |
public class TweetDecoder | |
{ | |
#region regex patterns | |
// strict match - won't allow for stuff before the @ | |
public const string atRegexPattern = @"^@([A-Za-z0-9_]+)$"; | |
// strict match - won't allow for stuff before the # | |
public const string hashRegexPattern = @"^#([A-Za-z0-9_]+)$"; | |
// here be dragons - this is a loose url match | |
public const string urlRegexDragonPattern = @"^((https?|ftp)\:(\/\/)|(file\:\/{2,3}))?(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(((([a-zA-Z0-9]+)(\.)?)+?)(\.)([a-z]{2}|com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum))([\/][\/a-zA-Z0-9\.]*)*?([\/]?(([\?][a-zA-Z0-9]+[\=][a-zA-Z0-9\%\(\)]*)([\&][a-zA-Z0-9]+[\=][a-zA-Z0-9\%\(\)]*)*?))?$"; | |
#endregion | |
#region regexes | |
// always the same so declare once as compiled for faster execution | |
private static Regex atRegex = new Regex(atRegexPattern, RegexOptions.Compiled); | |
private static Regex hashRegex = new Regex(hashRegexPattern, RegexOptions.Compiled); | |
private static Regex urlRegex = new Regex(urlRegexDragonPattern, RegexOptions.Compiled); | |
#endregion | |
// Here be magic. | |
public static List<TweetSection> DecodeSections(string originalTweet) | |
{ | |
// declare return variable | |
var sectionz = new List<TweetSection>(); | |
// split in words | |
var wordz = new List<string>(originalTweet.Split(' ')); | |
// loop through words and figure out what kind of section. | |
foreach (var word in wordz) | |
{ | |
// section defaults to normal text | |
var sectionType = TweetSectionType.NormalText; | |
if (atRegex.IsMatch(word)) | |
{ | |
sectionType = TweetSectionType.AtName; | |
} | |
else if (hashRegex.IsMatch(word)) | |
{ | |
sectionType = TweetSectionType.HashCode; | |
} | |
else if (urlRegex.IsMatch(word)) | |
{ | |
sectionType = TweetSectionType.Url; | |
} | |
// add word to section list or append to last if they're both normal text | |
if (sectionz.Count > 0 | |
&& sectionType == TweetSectionType.NormalText | |
&& sectionz[sectionz.Count - 1].SectionType == TweetSectionType.NormalText) | |
{ | |
sectionz[sectionz.Count - 1].Text += ' ' + word; | |
} | |
else | |
{ | |
sectionz.Add(new TweetSection { SectionType = sectionType, Text = word }); | |
} | |
} | |
return sectionz; | |
} | |
} |
Thanks for your input. I'm totally rocking the HttpModules at the mo'. The main handler at the mo' is the HTML one twipler.com/timeline.html is a dynamic response.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Yep - in the end if your decoder works and you're happy enough I don't see a need to replace it!
Might wanna look into HttpModules as an alternative to global.asax, global error handling example here --> http://bit.ly/dgrt6k (accessing HttpContext in the example) - also see here (SO question) --> http://bit.ly/b9akSs