-
-
Save gidili/369128 to your computer and use it in GitHub Desktop.
public enum TweetSectionType { NormalText, Url, AtName, HashCode } | |
public class TweetSection | |
{ | |
public TweetSectionType SectionType; | |
public string Text; | |
} | |
public class TweetDecoder | |
{ | |
#region regex patterns | |
// strict match - won't allow for stuff before the @ | |
public const string atRegexPattern = @"^@([A-Za-z0-9_]+)$"; | |
// strict match - won't allow for stuff before the # | |
public const string hashRegexPattern = @"^#([A-Za-z0-9_]+)$"; | |
// here be dragons - this is a loose url match | |
public const string urlRegexDragonPattern = @"^((https?|ftp)\:(\/\/)|(file\:\/{2,3}))?(((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(((([a-zA-Z0-9]+)(\.)?)+?)(\.)([a-z]{2}|com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum))([\/][\/a-zA-Z0-9\.]*)*?([\/]?(([\?][a-zA-Z0-9]+[\=][a-zA-Z0-9\%\(\)]*)([\&][a-zA-Z0-9]+[\=][a-zA-Z0-9\%\(\)]*)*?))?$"; | |
#endregion | |
#region regexes | |
// always the same so declare once as compiled for faster execution | |
private static Regex atRegex = new Regex(atRegexPattern, RegexOptions.Compiled); | |
private static Regex hashRegex = new Regex(hashRegexPattern, RegexOptions.Compiled); | |
private static Regex urlRegex = new Regex(urlRegexDragonPattern, RegexOptions.Compiled); | |
#endregion | |
// Here be magic. | |
public static List<TweetSection> DecodeSections(string originalTweet) | |
{ | |
// declare return variable | |
var sectionz = new List<TweetSection>(); | |
// split in words | |
var wordz = new List<string>(originalTweet.Split(' ')); | |
// loop through words and figure out what kind of section. | |
foreach (var word in wordz) | |
{ | |
// section defaults to normal text | |
var sectionType = TweetSectionType.NormalText; | |
if (atRegex.IsMatch(word)) | |
{ | |
sectionType = TweetSectionType.AtName; | |
} | |
else if (hashRegex.IsMatch(word)) | |
{ | |
sectionType = TweetSectionType.HashCode; | |
} | |
else if (urlRegex.IsMatch(word)) | |
{ | |
sectionType = TweetSectionType.Url; | |
} | |
// add word to section list or append to last if they're both normal text | |
if (sectionz.Count > 0 | |
&& sectionType == TweetSectionType.NormalText | |
&& sectionz[sectionz.Count - 1].SectionType == TweetSectionType.NormalText) | |
{ | |
sectionz[sectionz.Count - 1].Text += ' ' + word; | |
} | |
else | |
{ | |
sectionz.Add(new TweetSection { SectionType = sectionType, Text = word }); | |
} | |
} | |
return sectionz; | |
} | |
} |
For the sum of section.text it's probably a subtle bug (some space or the likes - I'll have a look).
About urls - if it doesn't pass 'this.is' how is it gonna pass 'google.com' or the likes for example - or is it not meant to?
Problem is where do you draw the line on acceptbality verse what most people would recognise as an URL.
So Im assuming http: followed by a-zA-Z0-9 :/@&?_-+% etc is ok, and www.blah is the same.
My next big issue is managing concurrent sessions, providing a "service down" option and managing session resources. Seems a bit weird in in global.asax session_end, HttpContext.Current.Session is null. I'm create a stylesheet on the fly and storing it on disk, but I want to delete it when the session ends. I also want to track and manage how many concurrent ppl are online at the same time. Almost need to write my own session manager which seems wrong?
And I've sort of commited to writing the documentation for dotless css project :)
And I want to add identity switching.
Oh, and now that I can have; <span class="tweet">Blah<a class="atname">@ian</a> blah <a class="atname">@john</a></span>
I should be able to write some jQuery which allows me to do "reply to all" which would be mega awesome!
Yep - in the end if your decoder works and you're happy enough I don't see a need to replace it!
Might wanna look into HttpModules as an alternative to global.asax, global error handling example here --> http://bit.ly/dgrt6k (accessing HttpContext in the example) - also see here (SO question) --> http://bit.ly/b9akSs
Thanks for your input. I'm totally rocking the HttpModules at the mo'. The main handler at the mo' is the HTML one twipler.com/timeline.html is a dynamic response.
I have a unit test which assumes that the sum of sections.text = original text.
Also "this.is" passes a URL test which I would expect it not to.