-
-
Save absyah/4769c8d75a9c6f1040f7ec6b7315790e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# concept stolen from: | |
# https://stackoverflow.com/questions/787496/what-is-the-best-way-to-compute-trending-topics-or-tags | |
def trending(count, past_hour_data, past_24hours_data) | |
candidates = data_freq_above_median(past_hour_data) | |
zcandidates = candidates.inject([]) do |sum, (term, freq)| | |
z = zscore( | |
freq, | |
past_24hours_data | |
.select { |elt| elt[0].eql?(term) } | |
.map { |elt| elt[1] } | |
) | |
sum << [term, z] | |
sum | |
end | |
zcandidates.sort_by { |term| -term[1] }.take count | |
end | |
def data_freq_above_median(data) | |
return data if data.length < 20 | |
median = medianscore(data.map { |elt| elt[1] }) | |
data.select { |elt| elt[1] > median } | |
end | |
def medianscore(data) | |
mid = data.length / 2.0 | |
data = data.sort | |
if mid % 1 == 0.5 | |
data[mid - 0.5] | |
else | |
(data[mid - 1] + data[mid]) / 2 | |
end | |
end | |
def zscore(freq, population) | |
return 0 if population.length == 0 | |
avg = population.sum / Float(population.length) | |
std = population | |
.map { |freq| (freq - avg) ** 2 } | |
.then { |frs| frs.sum / population.length } | |
.then { |var| Math.sqrt(var) } | |
# https://www.quora.com/While-calculating-a-z-score-what-do-you-do-when-standard-deviation-is-zero | |
return 0 if std == 0 | |
(freq - avg) / std | |
end | |
past_hour = [ | |
["matematika", 200], | |
["biologi", 39], | |
["hoge", 100], | |
["jos", 9999] | |
] | |
past_24_hours = [ | |
["biologi", 1], | |
["matematika", 90], | |
["biologi", 10], | |
["matematika", 102], | |
["biologi", 11], | |
["matematika", 110], | |
["biologi", 25], | |
["matematika", 50], | |
["biologi", 18], | |
["matematika", 205], | |
["biologi", 10], | |
["matematika", 200], | |
["biologi", 18], | |
["matematika", 10], | |
["biologi", 3], | |
["matematika", 90], | |
["biologi", 14], | |
["matematika", 100], | |
["biologi", 18], | |
["matematika", 99], | |
["biologi", 21], | |
["matematika", 97], | |
["biologi", 15], | |
["matematika", 100], | |
["biologi", 22], | |
["matematika", 89], | |
["biologi", 15], | |
["matematika", 96], | |
["biologi", 20], | |
["matematika", 175], | |
["biologi", 23], | |
["matematika", 199], | |
["biologi", 18], | |
["matematika", 96], | |
["biologi", 19], | |
["matematika", 121], | |
["biologi", 14], | |
["matematika", 170], | |
["biologi", 24], | |
["matematika", 230], | |
["biologi", 16], | |
["matematika", 142], | |
["biologi", 12], | |
["matematika", 103], | |
["biologi", 14], | |
["matematika", 95], | |
["biologi", 39], | |
["matematika", 200], | |
["hoge", 10], | |
["asik", 9999] | |
] | |
puts trending(2, past_hour, past_24_hours) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment