-
-
Save LenaicTerrier/112880ee39723d182f71 to your computer and use it in GitHub Desktop.
Added support for emojis and html specials characters.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* twitter-entities.js | |
* This function converts a tweet with "entity" metadata | |
* from plain text to linkified HTML. | |
* | |
* See the documentation here: http://dev.twitter.com/pages/tweet_entities | |
* Basically, add ?include_entities=true to your timeline call | |
* | |
* Based off existing code from Wade Simmons | |
* Licensed under the MIT license | |
* http://wades.im/mons | |
* | |
* Modified by lénaïc Terrier | |
* Licensed under the MIT license | |
* | |
* Requires jQuery | |
*/ | |
function linkifyEntities(tweet) | |
{ | |
function escapeHTML(text) | |
{ | |
return $('<div/>').text(htmlCharsCorrect(text)).html(); | |
} | |
function htmlCharsCorrect(text) | |
{ | |
text = text.replace(/&/g,'\u0026'); | |
text = text.replace(/>/g,'\u003E'); | |
text = text.replace(/</g,'\u003C'); | |
text = text.replace(/&(quot;|apos;)/g,'\u0022'); | |
text = text.replace(/'+/g,'\u0027'); | |
return text; | |
} | |
var | |
index_map = {}, | |
result = "", | |
last_i = 0, | |
i = 0, | |
end, | |
func, | |
emoji; | |
var ranges = [ | |
'\ud83c[\udf00-\udfff]', // U+1F300 to U+1F3FF | |
'\ud83d[\udc00-\ude4f]', // U+1F400 to U+1F64F | |
'\ud83d[\ude80-\udeff]' // U+1F680 to U+1F6FF | |
]; | |
var emojis = []; | |
tweet.text = escapeHTML(tweet.text.replace(new RegExp(ranges.join('|'), 'g'), function(match, offset, string){ | |
emojis.push({ | |
offset: offset, | |
char: match | |
}); | |
return '\u0091'; | |
})); | |
if (!(tweet.entities)) { | |
return escapeHTML(tweet.text); | |
} | |
if (tweet.entities.urls) { | |
$.each(tweet.entities.urls, function(i,entry) { | |
index_map[entry.indices[0]] = [entry.indices[1], function(text) {return "<a href='"+escapeHTML(entry.url)+"'>"+escapeHTML(entry.display_url)+"</a>";}]; | |
}); | |
} | |
if (tweet.entities.hashtags) { | |
$.each(tweet.entities.hashtags, function(i,entry) { | |
index_map[entry.indices[0]] = [entry.indices[1], function(text) {return "<a href='http://twitter.com/search?q="+escape("#"+entry.text)+"'>"+escapeHTML(text)+"</a>";}]; | |
}); | |
} | |
if (tweet.entities.user_mentions) { | |
$.each(tweet.entities.user_mentions, function(i,entry) { | |
index_map[entry.indices[0]] = [entry.indices[1], function(text) {return "<a title='"+escapeHTML(entry.name)+"' href='http://twitter.com/"+escapeHTML(entry.screen_name)+"'>"+escapeHTML(text)+"</a>";}]; | |
}); | |
} | |
if(tweet.entities.hasOwnProperty('media')) { | |
$.each(tweet.entities.media, function(i,entry) { | |
index_map[entry.indices[0]] = [entry.indices[1], function(text) {return "<a class='mediahref' data-lightbox='"+entry.id+"' href='"+escapeHTML(entry.media_url)+"'></a>";}]; | |
}); | |
} | |
for (i=0; i < tweet.text.length; ++i) { | |
var ind = index_map[i]; | |
if (ind) { | |
end = ind[0]; | |
func = ind[1]; | |
if (i > last_i) { | |
result += escapeHTML(tweet.text.substring(last_i, i)); | |
} | |
result += func(tweet.text.substring(i, end)); | |
i = end - 1; | |
last_i = end; | |
} | |
} | |
if (i > last_i) { | |
result += escapeHTML(tweet.text.substring(last_i, i)); | |
} | |
result = result.replace(/\u0091/g, function(match, offset, string){ | |
emoji = emojis.shift(); | |
return '<span class="emoji" data-emoji="u'+emoji.char.charCodeAt(0)+'">'+emoji.char+'</span>' | |
}); | |
return result; | |
} |
I know I'm a bit late to the party, but do you know why the unicode emojis are messing up with the text?
PS. I've never heard about PRIVATE USE ONE Unicode character before, the solution you've come up with is quite elegant 👍
I don't recall anything about that piece of program. It was so long ago lol. Even the link to the twitter documentation is dead now... I guess emojis may have been messing with the length of the text because emojis are two characters when the text is utf-8 encoded. Sorry I couldn't be of more help.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Gonna try this out....