Created
April 17, 2010 00:39
-
-
Save gidili/369128 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public enum TweetSectionType { NormalText, Url, AtName, HashCode } | |
public class TweetSection | |
{ | |
public TweetSectionType SectionType; | |
public string Text; | |
} | |
public class TweetDecoder | |
{ | |
#region regex patterns | |
// strict match - won't allow for stuff before the @ | |
public const string atRegexPattern = @"^@([A-Za-z0-9_]+)$"; | |
// strict match - won't allow for stuff before the # | |
public const string hashRegexPattern = @"^#([A-Za-z0-9_]+)$"; | |
// here be dragons - this is a loose url match | |
public const string urlRegexDragonPattern = @"^((https?|ftp)\:(\/\/)|(file\:\/{2,3}))?(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(((([a-zA-Z0-9]+)(\.)?)+?)(\.)([a-z]{2}|com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum))([\/][\/a-zA-Z0-9\.]*)*?([\/]?(([\?][a-zA-Z0-9]+[\=][a-zA-Z0-9\%\(\)]*)([\&][a-zA-Z0-9]+[\=][a-zA-Z0-9\%\(\)]*)*?))?$"; | |
#endregion | |
#region regexes | |
// always the same so declare once as compiled for faster execution | |
private static Regex atRegex = new Regex(atRegexPattern, RegexOptions.Compiled); | |
private static Regex hashRegex = new Regex(hashRegexPattern, RegexOptions.Compiled); | |
private static Regex urlRegex = new Regex(urlRegexDragonPattern, RegexOptions.Compiled); | |
#endregion | |
// Here be magic. | |
public static List<TweetSection> DecodeSections(string originalTweet) | |
{ | |
// declare return variable | |
var sectionz = new List<TweetSection>(); | |
// split in words | |
var wordz = new List<string>(originalTweet.Split(' ')); | |
// loop through words and figure out what kind of section. | |
foreach (var word in wordz) | |
{ | |
// section defaults to normal text | |
var sectionType = TweetSectionType.NormalText; | |
if (atRegex.IsMatch(word)) | |
{ | |
sectionType = TweetSectionType.AtName; | |
} | |
else if (hashRegex.IsMatch(word)) | |
{ | |
sectionType = TweetSectionType.HashCode; | |
} | |
else if (urlRegex.IsMatch(word)) | |
{ | |
sectionType = TweetSectionType.Url; | |
} | |
// add word to section list or append to last if they're both normal text | |
if (sectionz.Count > 0 | |
&& sectionType == TweetSectionType.NormalText | |
&& sectionz[sectionz.Count - 1].SectionType == TweetSectionType.NormalText) | |
{ | |
sectionz[sectionz.Count - 1].Text += ' ' + word; | |
} | |
else | |
{ | |
sectionz.Add(new TweetSection { SectionType = sectionType, Text = word }); | |
} | |
} | |
return sectionz; | |
} | |
} |
Yep - in the end if your decoder works and you're happy enough I don't see a need to replace it!
Might wanna look into HttpModules as an alternative to global.asax, global error handling example here --> http://bit.ly/dgrt6k (accessing HttpContext in the example) - also see here (SO question) --> http://bit.ly/b9akSs
Thanks for your input. I'm totally rocking the HttpModules at the mo'. The main handler at the mo' is the HTML one twipler.com/timeline.html is a dynamic response.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Problem is where do you draw the line on acceptbality verse what most people would recognise as an URL.
So Im assuming http: followed by a-zA-Z0-9 :/@&?_-+% etc is ok, and www.blah is the same.
My next big issue is managing concurrent sessions, providing a "service down" option and managing session resources. Seems a bit weird in in global.asax session_end, HttpContext.Current.Session is null. I'm create a stylesheet on the fly and storing it on disk, but I want to delete it when the session ends. I also want to track and manage how many concurrent ppl are online at the same time. Almost need to write my own session manager which seems wrong?
And I've sort of commited to writing the documentation for dotless css project :)
And I want to add identity switching.
Oh, and now that I can have;
<span class="tweet">Blah<a class="atname">@ian</a> blah <a class="atname">@john</a></span>
I should be able to write some jQuery which allows me to do "reply to all" which would be mega awesome!