Created
April 16, 2018 14:39
-
-
Save danielepolencic/3e5acd8e8c7409301ad9c5015e69dd07 to your computer and use it in GitHub Desktop.
Crunch tweets on Huginn
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var Agent = {} as any; | |
interface ITweet { | |
urls: string[] | |
text: string | |
id: string | |
timestamp: number | |
media: string[] | |
user: string | |
} | |
interface IContainer { | |
oldestTweet: number | |
tweets: ITweet[] | |
texts: string[] | |
urls: string[] | |
} | |
interface IContainerGroup { | |
containers: IContainer[] | |
value: string | |
} | |
enum Const { | |
NO_URL = 'NO_URL', | |
NO_MEDIA = 'NO_MEDIA', | |
} | |
function onlyUnique(value, index, self) { | |
return self.indexOf(value) === index; | |
} | |
function groupByKey(tweets: IContainer[], fn: (t: IContainer) => string[]) { | |
var groupedTweets = tweets.reduce(function(acc, container) { | |
fn(container).forEach(function(value) { | |
if(!acc[value]) { | |
acc[value] = { | |
containers: [container], | |
value: value | |
}; | |
} else { | |
acc[value].containers = acc[value].containers.concat(container) | |
} | |
}); | |
return acc; | |
}, {} as {[url: string]: {containers: IContainer[], value: string}}); | |
var containerGroups: IContainerGroup[] = Object.keys(groupedTweets).map(function(key) { | |
return groupedTweets[key]; | |
}); | |
return containerGroups; | |
} | |
function reduceGroup(containerGroups: IContainerGroup[]): IContainer[] { | |
if (containerGroups.length === 0) return []; | |
return containerGroups | |
.filter(function(containerGroup) { | |
return containerGroup.containers.length > 0; | |
}) | |
.map(function(containerGroup) { | |
var tweets = containerGroup.containers.reduce(function(acc, container) { | |
return acc.concat(container.tweets); | |
}, [] as ITweet[]) | |
return { | |
oldestTweet: Math.min.apply(null, tweets.map(function(tweet) { | |
return tweet.timestamp; | |
})), | |
texts: containerGroup.containers.reduce(function(acc, tweet) { | |
return acc.concat(tweet.texts) | |
}, [] as string[]).filter(onlyUnique), | |
urls: containerGroup.containers.reduce(function(acc, tweet) { | |
return acc.concat(tweet.urls) | |
}, [] as string[]).filter(onlyUnique), | |
tweets: tweets, | |
}; | |
}); | |
} | |
function uniquify(containers: IContainer[], predicate: (c: IContainer) => string): IContainer[] { | |
var result = containers.reduce(function(acc, container) { | |
var hash = predicate(container); | |
if (!acc.hashes[hash]) { | |
acc.list.push(container); | |
acc.hashes[hash] = true; | |
} | |
return acc; | |
}, {list: [] as IContainer[], hashes: {} as {[hash: string]: boolean}}); | |
return result.list; | |
} | |
function groupAll(containers: IContainer[]): IContainer[] { | |
const groupByText = groupByKey(containers, function(container) { | |
return container.texts.map(removeUrls); | |
}); | |
const uniqueTweetsByText = reduceGroup(groupByText); | |
const groupByUrl = groupByKey(uniqueTweetsByText, function(container) { | |
return container.urls; | |
}); | |
const uniqueTweetsByUrl = reduceGroup(groupByUrl); | |
return uniquify(uniqueTweetsByUrl, function(container) { | |
return container.tweets.map(function(tweet) { | |
return tweet.id; | |
}).join(''); | |
}); | |
} | |
function removeUrls(text: string): string { | |
return text.replace(/(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9]\.[^\s]{2,})/gi, ''); | |
} | |
function liftTweet(id: string, text: string, user: string, urls: string[], media: string[], timestamp: string): IContainer { | |
var tweet: ITweet = { | |
id: id, | |
text: text, | |
user: user, | |
urls: urls.length > 0 ? urls : [Const.NO_URL], | |
media: media.length > 0 ? media : [Const.NO_MEDIA], | |
timestamp: parseInt(timestamp, 10), | |
}; | |
return { | |
oldestTweet: tweet.timestamp, | |
urls: tweet.urls, | |
texts: [tweet.text], | |
tweets: [tweet] | |
}; | |
} | |
var ONE_HOUR_IN_MILLISECONDS = 1000 * 60 * 60; | |
var ONE_DAY_IN_MILLISECONDS = 1000 * 60 * 60 * 24; | |
Agent.check = function() { | |
var items = Array.isArray(this.memory('items')) ? this.memory('items') : []; | |
if (items.length === 0) { | |
return; | |
} | |
var now = new Date().getTime(); | |
const containers = groupAll(items).reduce(function(acc, container) { | |
if (container.oldestTweet < (now - ONE_HOUR_IN_MILLISECONDS)) { | |
acc.discard.push(container); | |
} else { | |
acc.keep.push(container); | |
} | |
return acc; | |
}, {keep: [] as IContainer[], discard: [] as IContainer[]}); | |
if (containers.discard.length > 0) { | |
this.createEvent({tweets: containers.discard}); | |
} | |
this.memory('items', containers.keep); | |
}; | |
Agent.receive = function() { | |
var items = Array.isArray(this.memory('items')) ? this.memory('items') : []; | |
var events = this.incomingEvents() | |
.map(function(event) { | |
return event.payload; | |
}) | |
.map(function(event) { | |
var urls, media; | |
if (!!event.extended_tweet) { | |
urls = event.extended_tweet.entities.urls; | |
media = event.extended_tweet.entities.media; | |
} else { | |
urls = event.entities.urls; | |
media = event.entities.media; | |
} | |
urls = Array.isArray(urls) ? urls : []; | |
media = Array.isArray(media) ? media : []; | |
return liftTweet( | |
event.id_str, | |
event.text, | |
event.user.screen_name, | |
urls.map(function(url) { | |
return url.expanded_url | |
}), | |
media.map(function(media) { | |
return media.media_url_https; | |
}), | |
event.timestamp_ms | |
); | |
}, this); | |
this.memory('items', items.concat(events)); | |
} | |
console.assert(groupByKey([ | |
liftTweet('1', 'text1', 'user1', ['url1'], [], '0'), | |
], function(container) {return container.texts;}).length === 1) | |
console.assert(groupByKey([ | |
liftTweet('1', 'text1', 'user1', ['url1', 'url2'], [], '0'), | |
liftTweet('1', 'text1', 'user1', ['url1'], [], '0'), | |
], function(container) {return container.texts;}).length === 1) | |
console.assert(groupByKey([ | |
liftTweet('1', 'text1', 'user1', ['url1', 'url2'], [], '0'), | |
liftTweet('1', 'text1', 'user1', ['url1'], [], '0'), | |
liftTweet('1', 'text1', 'user1', ['url2'], [], '0'), | |
liftTweet('1', 'text2', 'user1', ['url2', 'url3'], [], '0'), | |
], function(tweet) {return tweet.texts;})[0].containers.length === 3) | |
console.assert(reduceGroup([{containers: [], value: '1'}]).length === 0); | |
console.assert(reduceGroup([{containers: [ | |
liftTweet('1', 'user', 'me', ['url1', 'url2'], [], '0'), | |
liftTweet('2', 'user', 'me', ['url1'], [], '0'), | |
], value: '1'}]).length === 1); | |
var containersTest1 = reduceGroup([ | |
{ | |
containers: [ | |
liftTweet('1', 'text1', 'user1', ['url1', 'url2'], [], '1'), | |
liftTweet('2', 'text2', 'user2', ['url1'], [], '2'), | |
], value: '1' | |
}, | |
{ | |
containers: [ | |
liftTweet('3', 'text3', 'user3', ['url3'], [], '0'), | |
], value: '2' | |
}, | |
]); | |
console.assert(containersTest1.length === 2); | |
console.assert(containersTest1[0].texts.join('') === 'text1text2'); | |
console.assert(containersTest1[1].texts.join('') === 'text3'); | |
console.assert(containersTest1[0].urls.join('') === 'url1url2'); | |
console.assert(containersTest1[1].urls.join('') === 'url3'); | |
console.assert(containersTest1[0].oldestTweet === 1); | |
console.assert(containersTest1[1].oldestTweet === 0); | |
console.assert(groupAll([ | |
liftTweet('1', 'Just in! Introduction to Kubernetes and Minikube. Learn how to use Kubernetes to deploy a small microservices-based… https://t.co/iRqhIRfu0k', 'mimacom', ['http://bit.ly/2HmgZdA'], [], '1523866451489'), | |
liftTweet('2', 'Just in! Introduction to Kubernetes and Minikube. Learn how to use Kubernetes to deploy a small microservices-based… https://t.co/RkwPllbdxC', 'mimacom', ['http://bit.ly/2HAQhLS'], [], '1523866451489') | |
]).length === 1) | |
console.assert(groupAll([ | |
liftTweet('985841185938919425', 'TRAINING: 2 days #Kubernetes Dojo Training! Plus here is a discount code for you: K8SLAB20 For more information a… https://t.co/z2zodbQGa6', 'kloia_com', ['https://kloia.com/training/kubernetes/'], [], '1523877829428'), | |
liftTweet('985841185938919425', 'TRAINING: 2 days #Kubernetes Dojo Training! Plus here is a discount code for you: K8SLAB20 For more information a… https://t.co/9jCsyU8YM0', 'kloia_com', ['https://kloia.com/training/kubernetes/'], [], '1523878186274'), | |
]).length === 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment