Skip to content

Instantly share code, notes, and snippets.

@LenaicTerrier
Forked from wadey/twitter-entities.js
Last active September 26, 2021 01:53
Show Gist options
  • Save LenaicTerrier/112880ee39723d182f71 to your computer and use it in GitHub Desktop.
Save LenaicTerrier/112880ee39723d182f71 to your computer and use it in GitHub Desktop.
Added support for emojis and html specials characters.
/*
* twitter-entities.js
* This function converts a tweet with "entity" metadata
* from plain text to linkified HTML.
*
* See the documentation here: http://dev.twitter.com/pages/tweet_entities
* Basically, add ?include_entities=true to your timeline call
*
* Based off existing code from Wade Simmons
* Licensed under the MIT license
* http://wades.im/mons
*
* Modified by lénaïc Terrier
* Licensed under the MIT license
*
* Requires jQuery
*/
function linkifyEntities(tweet)
{
function escapeHTML(text)
{
return $('<div/>').text(htmlCharsCorrect(text)).html();
}
function htmlCharsCorrect(text)
{
text = text.replace(/&amp;/g,'\u0026');
text = text.replace(/&gt;/g,'\u003E');
text = text.replace(/&lt;/g,'\u003C');
text = text.replace(/&(quot;|apos;)/g,'\u0022');
text = text.replace(/&#039;+/g,'\u0027');
return text;
}
var
index_map = {},
result = "",
last_i = 0,
i = 0,
end,
func,
emoji;
var ranges = [
'\ud83c[\udf00-\udfff]', // U+1F300 to U+1F3FF
'\ud83d[\udc00-\ude4f]', // U+1F400 to U+1F64F
'\ud83d[\ude80-\udeff]' // U+1F680 to U+1F6FF
];
var emojis = [];
tweet.text = escapeHTML(tweet.text.replace(new RegExp(ranges.join('|'), 'g'), function(match, offset, string){
emojis.push({
offset: offset,
char: match
});
return '\u0091';
}));
if (!(tweet.entities)) {
return escapeHTML(tweet.text);
}
if (tweet.entities.urls) {
$.each(tweet.entities.urls, function(i,entry) {
index_map[entry.indices[0]] = [entry.indices[1], function(text) {return "<a href='"+escapeHTML(entry.url)+"'>"+escapeHTML(entry.display_url)+"</a>";}];
});
}
if (tweet.entities.hashtags) {
$.each(tweet.entities.hashtags, function(i,entry) {
index_map[entry.indices[0]] = [entry.indices[1], function(text) {return "<a href='http://twitter.com/search?q="+escape("#"+entry.text)+"'>"+escapeHTML(text)+"</a>";}];
});
}
if (tweet.entities.user_mentions) {
$.each(tweet.entities.user_mentions, function(i,entry) {
index_map[entry.indices[0]] = [entry.indices[1], function(text) {return "<a title='"+escapeHTML(entry.name)+"' href='http://twitter.com/"+escapeHTML(entry.screen_name)+"'>"+escapeHTML(text)+"</a>";}];
});
}
if(tweet.entities.hasOwnProperty('media')) {
$.each(tweet.entities.media, function(i,entry) {
index_map[entry.indices[0]] = [entry.indices[1], function(text) {return "<a class='mediahref' data-lightbox='"+entry.id+"' href='"+escapeHTML(entry.media_url)+"'></a>";}];
});
}
for (i=0; i < tweet.text.length; ++i) {
var ind = index_map[i];
if (ind) {
end = ind[0];
func = ind[1];
if (i > last_i) {
result += escapeHTML(tweet.text.substring(last_i, i));
}
result += func(tweet.text.substring(i, end));
i = end - 1;
last_i = end;
}
}
if (i > last_i) {
result += escapeHTML(tweet.text.substring(last_i, i));
}
result = result.replace(/\u0091/g, function(match, offset, string){
emoji = emojis.shift();
return '<span class="emoji" data-emoji="u'+emoji.char.charCodeAt(0)+'">'+emoji.char+'</span>'
});
return result;
}
@yaroslav-ilin
Copy link

I know I'm a bit late to the party, but do you know why the unicode emojis are messing up with the text?

PS. I've never heard about PRIVATE USE ONE Unicode character before, the solution you've come up with is quite elegant 👍

@LenaicTerrier
Copy link
Author

I don't recall anything about that piece of program. It was so long ago lol. Even the link to the twitter documentation is dead now... I guess emojis may have been messing with the length of the text because emojis are two characters when the text is utf-8 encoded. Sorry I couldn't be of more help.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment