Created
August 19, 2011 22:51
-
-
Save rubish/1158234 to your computer and use it in GitHub Desktop.
In response to http://stackoverflow.com/questions/7125980/rails-3-user-matching-system-efficient-search-approach
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
drop table answers; | |
drop table user_answers; | |
drop table accepted_answers; | |
drop table sum_importance; | |
create table answers(id INT primary key not null auto_increment, question_id int); | |
create table user_answers(id INT primary key not null auto_increment, user_id int, question_id int, answer_id int, importance int); | |
create table accepted_answers(id INT primary key not null auto_increment, user_answer_id int, answer_id int); | |
create table sum_importance(id INT primary key not null auto_increment, current_user_id int, other_user_id int, total_importance int, importance int); | |
INSERT INTO answers(question_id) values(1); | |
INSERT INTO answers(question_id) values(2); | |
INSERT INTO answers(question_id) values(3); | |
INSERT INTO answers(question_id) values(4); | |
INSERT INTO answers(question_id) values(5); | |
INSERT INTO answers(question_id) values(2); | |
INSERT INTO answers(question_id) values(7); | |
INSERT INTO answers(question_id) values(5); | |
INSERT INTO answers(question_id) values(4); | |
INSERT INTO answers(question_id) values(6); | |
INSERT INTO user_answers(user_id, question_id, answer_id, importance) values(1, 1, 1, 10); | |
INSERT INTO user_answers(user_id, question_id, answer_id, importance) values(1, 2, 2, 20); | |
INSERT INTO user_answers(user_id, question_id, answer_id, importance) values(1, 3, 3, 10); | |
INSERT INTO user_answers(user_id, question_id, answer_id, importance) values(1, 4, 4, 10); | |
INSERT INTO user_answers(user_id, question_id, answer_id, importance) values(1, 5, 5, 30); | |
INSERT INTO user_answers(user_id, question_id, answer_id, importance) values(2, 2, 6, 30); | |
INSERT INTO user_answers(user_id, question_id, answer_id, importance) values(2, 7, 7, 20); | |
INSERT INTO user_answers(user_id, question_id, answer_id, importance) values(2, 5, 8, 20); | |
INSERT INTO user_answers(user_id, question_id, answer_id, importance) values(3, 4, 9, 30); | |
INSERT INTO user_answers(user_id, question_id, answer_id, importance) values(3, 6, 10, 10); | |
INSERT INTO accepted_answers(user_answer_id, answer_id) values(4, 9); | |
INSERT INTO accepted_answers(user_answer_id, answer_id) values(9, 4); | |
INSERT INTO accepted_answers(user_answer_id, answer_id) values(5, 8); | |
INSERT INTO accepted_answers(user_answer_id, answer_id) values(8, 5); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- use this query to populate importance table for first time | |
-- can update the table on further questions and answers directly | |
-- run the sql once a day to get data in sync, just in case | |
INSERT INTO sum_importance(current_user_id, other_user_id, total_importance, importance) | |
SELECT uit.user_id, ae.user_id as other_user_id, uit.total_importance, SUM(ua.importance) | |
FROM (SELECT user_id, SUM(importance) as total_importance FROM user_answers GROUP BY user_id) uit | |
JOIN user_answers ua | |
ON ua.user_id = uit.user_id | |
JOIN (SELECT | |
-- a.id as answer_id, | |
-- ua.id user_answer_id, | |
ua.user_id as user_id, | |
-- ua.importance as importance, | |
ua.question_id as question_id | |
FROM answers a | |
JOIN user_answers ua | |
ON ua.answer_id = a.id | |
JOIN accepted_answers aa | |
ON aa.answer_id = a.id) ae -- ae == anwers_expanded | |
ON ua.question_id = ae.question_id | |
WHERE uit.user_id != ae.user_id | |
GROUP BY uit.user_id, ae.user_id, uit.total_importance; | |
-- percentage matches for users | |
-- For each current_user_id, other_user_id is only listed if current user has accepted an answer from them | |
-- otherwise percentage match is obviously 0 | |
-- add a where clause: "WHERE imp1.current_user_id = user_id" at end to get percentage match for a single user | |
SELECT | |
COALESCE(SQRT( (100.0*imp1.importance/imp1.total_importance) * (100.0*imp2.importance/imp2.total_importance) ), 0) as percentage_match, | |
imp1.current_user_id, | |
imp1.other_user_id | |
FROM sum_importance imp1 | |
LEFT OUTER JOIN sum_importance imp2 | |
ON imp1.other_user_id = imp2.current_user_id AND imp1.current_user_id = imp2.other_user_id; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Tables: | |
Users(:id, :username, etc.) | |
Questions(:id, :text) | |
Answers(:id, :question_id, :text) | |
UserAnswers(:id, :user_id, :question_id, :answer_id, :importance) | |
AcceptedAnswers(:id, :user_answer_id, :answer_id) | |
Relations | |
Questions <-> Answers: one-to-many | |
Questions <-> UserAnswers: one-to-many | |
Users <-> UserAnswers: one-to-many | |
UserAnswers <-> AcceptableAnswers: one-to-many |
For some reason I don't know, the value in the column "importance" somehow always is twice as big as it should be. I used exactly the same sql.
Have you tried it with the sample data I populated, I confirmed it is working fine. If still not working can you provide some sample data to test with?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
updated my stackoverflow question with my database model, hopefully that makes things a little bit clearer. Cool that you took the time though