Skip to content

Instantly share code, notes, and snippets.

@sarogers
Last active August 29, 2015 14:11
Show Gist options
  • Save sarogers/8073dee01d448ef04a4a to your computer and use it in GitHub Desktop.
Save sarogers/8073dee01d448ef04a4a to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'set'
class Node
attr_accessor :email, :neighbors
def initialize(email)
@email = email
@neighbors = Set.new
end
def bidirectional_neighbors
@bidirectional_neighbors ||= Set.new(self.neighbors.select { |n| n.has_neighbor?(self) })
end
def has_neighbor?(node)
self.neighbors.include?(node)
end
def bidirectional_neighbor?(node)
self.bidirectional_neighbors.include?(node)
end
def inspect
"#<Node:#{self.email} ...>"
end
def <=>(other_email)
other_email <=> email
end
end
def run
input_path = ARGV.first
raw_edges = File.read(input_path).each_line.map do |line|
line.strip.split(/\t/)
end
graph = assemble_graph(raw_edges)
clusters = find_clusters(graph.dup)
print_clusters(clusters)
end
def assemble_graph(raw_edges)
raw_edges.each_with_object({}) do |raw_edge, nodes|
_, sender_email, receiver_email = raw_edge
node = (nodes[sender_email] ||= Node.new(sender_email))
node.neighbors.add (nodes[receiver_email] || (nodes[receiver_email] = Node.new(receiver_email)))
end.values
end
def find_clusters(graph)
used_pairs = Set.new
graph.each_with_object(Set.new) do |node, clusters|
bineighbors = node.bidirectional_neighbors.to_a.reject { |n| used_pairs.include?(Set.new([node, n])) }
while bineighbors.any?
cluster_node = bineighbors.shift
cluster = [node, cluster_node]
used_pairs.add(Set.new([node, cluster_node]))
bineighbors.each do |bineighbor|
if cluster_node.bidirectional_neighbor?(bineighbor)
used_pairs.add(Set.new([node, bineighbor]))
cluster << bineighbor
end
end
clusters.add(cluster.sort!) if cluster.length >= 3
bineighbors = bineighbors - cluster
end
end
end
def print_clusters(clusters)
clusters.each do |cluster|
puts cluster.map(&:email).join(', ')
end
end
run
Thu Dec 11 17:53:01 PST 2008 [email protected] [email protected]
Thu Dec 11 17:53:02 PST 2008 [email protected] [email protected]
Thu Dec 11 17:53:03 PST 2008 [email protected] [email protected]
Thu Dec 11 17:53:04 PST 2008 [email protected] [email protected]
Thu Dec 11 17:53:05 PST 2008 [email protected] [email protected]
Thu Dec 11 17:53:06 PST 2008 [email protected] [email protected]
Thu Dec 11 17:53:07 PST 2008 [email protected] [email protected]
Thu Dec 11 17:53:08 PST 2008 [email protected] [email protected]
Thu Dec 11 17:53:09 PST 2008 [email protected] [email protected]
Thu Dec 11 17:53:10 PST 2008 [email protected] [email protected]
Thu Dec 11 17:53:11 PST 2008 [email protected] [email protected]
Thu Dec 11 17:53:12 PST 2008 [email protected] [email protected]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment