Created
December 20, 2011 17:01
-
-
Save ptn/1502310 to your computer and use it in GitHub Desktop.
Six Degrees of Separation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'set' | |
# | |
# Analyzes Twitter conversations. | |
# | |
# Usage: | |
# | |
# Instantiate a TwitterConversation::Graph object and feed it some | |
# sample tweets: | |
# | |
# > graph = TwitterConversation::Graph.new | |
# > graph.feed_tweet "username: hi this is a sample tweet /cc @my_friend" | |
# > graph.feed_tweet "husband: @wife omg I forgot the milk" | |
# > graph.feed_tweet "my_friend: @username where's my money!?" | |
# > graph.feed_tweet "username: @wife hello there ;)" | |
# > graph.feed_tweet "wife: @username not now" | |
# > graph.feed_tweet "wife: @husband It's ok I'll be home late" | |
# > graph.feed_tweet "irrelevant: hi @username" | |
# | |
# Then, you can query it about the connections of a user and receive a | |
# nested array of TwitterConversation::User objects: | |
# | |
# > graph.connections username_obj # => [["my_friend", "wife"], ["husband"]] | |
# > graph.connections husband_obj # => [["wife"], ["username"], ["my_friend"]] | |
# > graph.connections wife_obj # => [["username", "husband"], ["my_friend"]] | |
# > graph.connections irrelevant_obj # => [] | |
# | |
# Internally, this works by building a graph and counting how many | |
# links it takes to get to another user to determine the order of the | |
# connection. For the sample tweets, the graph would look like this: | |
# | |
# W -- H | |
# | I | |
# U -- M | |
# | |
module TwitterConversation | |
# | |
# A Twitter user. | |
# | |
# Keeps track of all other users he/she has mentioned. If a member | |
# of the @mentions set is found to also have mentioned this user, it | |
# is also added to the @mutual_mentions set. | |
# | |
# Essentially, a User object constitutes a node in the graph and the | |
# elements of @mutual_mentions, it's direct links. | |
# | |
class User | |
include Comparable | |
attr_accessor :username | |
attr_reader :mentions | |
def initialize(username) | |
@username = username | |
@mentions = Set.new | |
@mutual_mentions = Set.new | |
end | |
def mutual_mentions | |
@mutual_mentions.sort | |
end | |
def mentioned(user) | |
@mentions << user | |
if user.mentions.include? self | |
@mutual_mentions << user | |
user.add_mutual_mention self | |
end | |
end | |
def <=>(other) | |
@username <=> other.username | |
end | |
def to_s | |
@username | |
end | |
protected | |
# Invoked only by other users if they find a mutual mention | |
# between self and them. | |
def add_mutual_mention(user) | |
@mutual_mentions << user | |
end | |
end | |
# | |
# Builds a graph of Twitter users based on mutual mentions. | |
# | |
# After being fed all the sample tweets, this class can be queried | |
# to extract all the connections for any user. | |
# | |
# Instance variables: | |
# | |
# * all_users : array containing every user whether he/she has tweeted or not. | |
# * active_users : set containing users that have tweeted at least once. | |
# | |
class Graph | |
def initialize | |
@all_users = [] | |
@active_users = Set.new | |
end | |
# Client code only ever cares about users that have tweeted. | |
def users | |
@active_users.sort | |
end | |
def feed_tweet(tweet) | |
username, content = tweet.split(":", 2) | |
user = get_or_create_user(username) | |
@active_users << user | |
mentions = content.scan(/(?<=@)\w+/) | |
mentions.each do |username| | |
mentioned_user = get_or_create_user(username) | |
user.mentioned mentioned_user | |
end | |
end | |
# | |
# Returns the connections for the given user. | |
# | |
# Traverses the graph breadth-first in alphabetical order. | |
# | |
# Data structures: | |
# | |
# * graph: Connections for given user up to some order. | |
# * degree: Users that are at the same distance from user and not | |
# yet in graph. | |
# * next_degree: Users one degree more away. | |
# * connected: Users already inserted in the graph at some degree. | |
# | |
# Note that the combination of degree and next_degree represents | |
# those users that still have to be added to graph. | |
# | |
def connections(user) | |
graph = [] | |
degree = user.mutual_mentions.dup | |
next_degree = [] | |
connected = [user] | |
while !(degree + next_degree).empty? | |
degree.each do |connection| | |
connected << connection | |
connection.mutual_mentions.each do |candidate| | |
unless (connected.include? candidate) || | |
(degree.include? candidate) || | |
(next_degree.include? candidate) || | |
next_degree << candidate | |
end | |
end | |
end | |
graph << degree.sort | |
degree = next_degree | |
next_degree = [] | |
end | |
graph | |
end | |
private | |
def get_or_create_user(username) | |
query = @all_users.select { |u| u.username == username } | |
user = query.first | |
unless user | |
user = User.new(username) | |
@all_users << user | |
end | |
user | |
end | |
end | |
end | |
graph = TwitterConversation::Graph.new | |
File.open(ARGV[0]) do |f| | |
f.read.each_line do |line| | |
graph.feed_tweet(line) | |
end | |
end | |
graph.users.each do |user| | |
puts user.username | |
graph.connections(user).each do |degree| | |
puts degree.join(", ") | |
end | |
puts | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment