Created
July 12, 2011 20:51
-
-
Save otobrglez/1078953 to your computer and use it in GitHub Desktop.
Finding related articles using Jaccard index and tags
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This method finds related articles using Jaccard index (optimized for PostgreSQL). | |
# More info: http://en.wikipedia.org/wiki/Jaccard_index | |
class Article < ActiveRecord::Base | |
def related(limit=10) | |
Article.find_by_sql(%Q{ | |
SELECT | |
a.*, | |
( SELECT array_agg(t.name) FROM taggings tg, tags t | |
WHERE tg.taggable_id = a.id AND tg.tag_id = t.id | |
) as tags, | |
((SELECT COUNT(*) FROM | |
((SELECT t.name FROM taggings tg, tags t | |
WHERE tg.taggable_id = a.id AND tg.tag_id = t.id | |
) INTERSECT | |
(SELECT t_2.name FROM taggings tg_2, tags t_2 | |
WHERE tg_2.taggable_id = #{self.id} AND tg_2.tag_id = t_2.id | |
)) as intersection | |
)::float / | |
(SELECT COUNT(*) FROM | |
((SELECT t.name FROM taggings tg, tags t | |
WHERE tg.taggable_id = a.id AND tg.tag_id = t.id | |
) UNION | |
(SELECT t_2.name FROM taggings tg_2, tags t_2 | |
WHERE tg_2.taggable_id = #{self.id} AND tg_2.tag_id = t_2.id | |
)) as union_total | |
)::float) as score | |
FROM | |
articles a | |
WHERE | |
a.published = 1 AND | |
a.id != #{self.id} | |
ORDER BY score DESC, a.created_at DESC | |
LIMIT #{limit} | |
}) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
"array_agg part" can be skipped...