Forked from thomasnield/reactive_data_analysis.py
Last active
October 25, 2016 20:44
-
-
Save eenblam/7744a691ad0030e594671fb18b96cee1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from rx import Observable, Observer | |
from collections import defaultdict | |
users = [ | |
{ "id" : 0, "name" : "Hero" }, | |
{ "id" : 1, "name" : "Dunn" }, | |
{ "id" : 2, "name" : "Sue" }, | |
{ "id" : 3, "name" : "Chi" }, | |
{ "id" : 4, "name" : "Thor" }, | |
{ "id" : 5, "name" : "Clive" }, | |
{ "id" : 6, "name" : "Hicks" }, | |
{ "id" : 7, "name" : "Devin" }, | |
{ "id" : 8, "name" : "Kate" }, | |
{ "id" : 9, "name" : "Klein" }, | |
] | |
friendships = [ | |
(0,1), | |
(0,2), | |
(1,2), | |
(1,3), | |
(2,3), | |
(3,4), | |
(4,5), | |
(5,6), | |
(5,7), | |
(6,8), | |
(7,8), | |
(8,9) | |
] | |
interests = [ | |
(0, "Hadoop"), (0, "Big Data"), (0, "HBase"), (0, "Java"), | |
(0, "Spark"), (0, "Storm"), (0, "Cassandra"), | |
(1, "NoSQL"), (1, "MongoDB"), (1, "Cassandra"), (1, "HBase"), | |
(1, "Postgres"), (2, "Python"), (2, "scikit-learn"), (2, "scipy"), | |
(2, "numpy"), (2, "statsmodels"), (2, "pandas"), (3, "R"), (3, "Python"), | |
(3, "statistics"), (3, "regression"), (3, "probability"), | |
(4, "machine learning"), (4, "regression"), (4, "decision trees"), | |
(4, "libsvm"), (5, "Python"), (5, "R"), (5, "Java"), (5, "C++"), | |
(5, "Haskell"), (5, "programming languages"), (6, "statistics"), | |
(6, "probability"), (6, "mathematics"), (6, "theory"), | |
(7, "machine learning"), (7, "scikit-learn"), (7, "Mahout"), | |
(7, "neural networks"), (8, "neural networks"), (8, "deep learning"), | |
(8, "Big Data"), (8, "artificial intelligence"), (9, "Hadoop"), | |
(9, "Java"), (9, "MapReduce"), (9, "Big Data") | |
] | |
# returns an Observable emitting friends of a given user | |
def get_friends(user): | |
return (Observable.from_(friendships) | |
.filter(lambda friendship: friendship[0] == user["id"] or friendship[1] == user["id"]) | |
.flat_map(lambda friendship: Observable.from_(friendship)) | |
.filter(lambda user_id: user_id != user["id"]) | |
.flat_map(lambda friend_id: Observable.from_(users).filter(lambda user: user["id"] == friend_id))) | |
# emit friends for "Chi" | |
print("Friends of \"Chi\"") | |
get_friends(users[3]).subscribe(print) | |
# get a count of each user's friends, and order by reverse rank | |
print("\r\nUsers and friend counts, sorted descending") | |
(Observable.from_(users) | |
.flat_map(lambda user: get_friends(user).count().map(lambda ct: (user["name"], ct))) | |
.to_list() | |
.map(lambda list: sorted(list,key=lambda t: t[1],reverse=True)) | |
.flat_map(lambda list: Observable.from_(list)) | |
.subscribe(print)) | |
# get mutual friend for Hero and Sue | |
print("\r\nMutual friends of Hero and Sue") | |
def get_mutual_friends(user, other_user): | |
return (get_friends(other_user) | |
.filter(lambda foaf: foaf["id"] != user["id"]) | |
.flat_map(lambda foaf: get_friends(user) | |
.filter(lambda user_friend: user_friend["id"] == foaf["id"]).count() | |
.filter(lambda ct: ct > 0).map(lambda b: foaf) | |
)) | |
hero = users[0] | |
chi = users[3] | |
get_mutual_friends(hero,chi).subscribe(print) | |
# rank friends of Chi by mutual friend count | |
print("\r\nRanked friends of Chi by mutual friend count") | |
(get_friends(chi) | |
.flat_map(lambda friend: get_mutual_friends(chi,friend).count().map(lambda ct: (friend["name"], ct))) | |
.to_list() | |
.map(lambda list: sorted(list,key=lambda t: t[1],reverse=True)) | |
.flat_map(lambda list: Observable.from_(list)) | |
.subscribe(print)) | |
# finding common interests | |
def data_scientists_who_like(target_interest): | |
return (Observable.from_(interests) | |
.filter(lambda applied_interest: applied_interest[1] == target_interest) | |
.map(lambda applied_interest: applied_interest[0]) | |
.flat_map(lambda user_id: Observable.from_(users).filter(lambda user: user["id"] == user_id))) | |
def interests_for_data_scientist(user): | |
return (Observable.from_(interests) | |
.filter(lambda applied_interest: applied_interest[0] == user["id"]) | |
.map(lambda applied_interest: applied_interest[1])) | |
def common_interests_between(user, other_user): | |
return (interests_for_data_scientist(user) | |
.flat_map(lambda interest: interests_for_data_scientist(other_user) | |
.filter(lambda other_interest: interest == other_interest) | |
)) | |
def common_interest_count(user): | |
return (Observable.from_(users) | |
.filter(lambda other_user: other_user["id"] != user["id"]) | |
.flat_map(lambda other_user: common_interests_between(user,other_user). | |
count() | |
.map(lambda ct: (other_user["name"],ct)) | |
).to_list() | |
.map(lambda list: sorted(list, key=lambda t: t[1], reverse=True)) | |
.flat_map(lambda list: Observable.from_(list))) | |
print("\r\nCommon interest counts for Chi") | |
common_interest_count(users[3]).subscribe(print) | |
# OUTPUT: | |
# | |
# Friends of "Chi" | |
# {'id': 1, 'name': 'Dunn'} | |
# {'id': 2, 'name': 'Sue'} | |
# {'id': 4, 'name': 'Thor'} | |
# | |
# Users and friend counts, sorted descending | |
# ('Dunn', 3) | |
# ('Sue', 3) | |
# ('Chi', 3) | |
# ('Clive', 3) | |
# ('Kate', 3) | |
# ('Hero', 2) | |
# ('Thor', 2) | |
# ('Hicks', 2) | |
# ('Devin', 2) | |
# ('Klein', 1) | |
# | |
# Mutual friends of Hero and Chi | |
# {'id': 1, 'name': 'Dunn'} | |
# {'id': 2, 'name': 'Sue'} | |
# | |
# Ranked friends of Chi by mutual friend count | |
# ('Dunn', 1) | |
# ('Sue', 1) | |
# ('Thor', 0) | |
# | |
# Common interest rank for Chi by count | |
# ('Clive', 2) | |
# ('Hicks', 2) | |
# ('Sue', 1) | |
# ('Thor', 1) | |
# ('Hero', 0) | |
# ('Dunn', 0) | |
# ('Devin', 0) | |
# ('Kate', 0) | |
# ('Klein', 0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment