-
-
Save fudanchii/9203f29be4b3ca108275bcd03ea0c488 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# concept stolen from: | |
# https://stackoverflow.com/questions/787496/what-is-the-best-way-to-compute-trending-topics-or-tags | |
def trending(count, past_hour_data, past_24hours_data) | |
candidates = data_freq_above_median(past_hour_data) | |
zcandidates = candidates.inject([]) do |sum, (term, freq)| | |
z = zscore( | |
freq, | |
past_24hours_data | |
.select { |elt| elt[0].eql?(term) } | |
.map { |elt| elt[1] } | |
) | |
sum << [term, z] | |
sum | |
end | |
zcandidates.sort_by { |term| -term[1] }.take count | |
end | |
def data_freq_above_median(data) | |
return data if data.length < 20 | |
median = medianscore(data.map { |elt| elt[1] }) | |
data.select { |elt| elt[1] > median } | |
end | |
def medianscore(data) | |
mid = data.length / 2.0 | |
data = data.sort | |
if mid % 1 == 0.5 | |
data[mid - 0.5] | |
else | |
(data[mid - 1] + data[mid]) / 2 | |
end | |
end | |
def zscore(freq, population) | |
raise "population can't be 0" if population.length == 0 | |
avg = population.sum / Float(population.length) | |
raise "there's no variance in the data" if avg == freq | |
std = population | |
.map { |freq| (freq - avg) ** 2 } | |
.then { |frs| frs.sum / population.length } | |
.then { |var| Math.sqrt(var) } | |
(freq - avg) / std | |
end | |
past_hour = [ | |
["matematika", 200], | |
["biologi", 39] | |
] | |
past_24_hours = [ | |
["biologi", 1], | |
["matematika", 90], | |
["biologi", 10], | |
["matematika", 102], | |
["biologi", 11], | |
["matematika", 110], | |
["biologi", 25], | |
["matematika", 50], | |
["biologi", 18], | |
["matematika", 205], | |
["biologi", 10], | |
["matematika", 200], | |
["biologi", 18], | |
["matematika", 10], | |
["biologi", 3], | |
["matematika", 90], | |
["biologi", 14], | |
["matematika", 100], | |
["biologi", 18], | |
["matematika", 99], | |
["biologi", 21], | |
["matematika", 97], | |
["biologi", 15], | |
["matematika", 100], | |
["biologi", 22], | |
["matematika", 89], | |
["biologi", 15], | |
["matematika", 96], | |
["biologi", 20], | |
["matematika", 175], | |
["biologi", 23], | |
["matematika", 199], | |
["biologi", 18], | |
["matematika", 96], | |
["biologi", 19], | |
["matematika", 121], | |
["biologi", 14], | |
["matematika", 170], | |
["biologi", 24], | |
["matematika", 230], | |
["biologi", 16], | |
["matematika", 142], | |
["biologi", 12], | |
["matematika", 103], | |
["biologi", 14], | |
["matematika", 95], | |
["biologi", 39], | |
["matematika", 200] | |
] | |
puts trending(2, past_hour, past_24_hours) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
result:
biologi is more trending than matematika