Skip to content

Instantly share code, notes, and snippets.

@huydx
Created May 10, 2013 14:45
Show Gist options
  • Save huydx/5554849 to your computer and use it in GitHub Desktop.
Save huydx/5554849 to your computer and use it in GitHub Desktop.
recommender
# -*- coding: utf-8 -*-
$:.unshift File.dirname(__FILE__)
require 'yaml'
require 'bundler'
require 'rgl/adjacency'
require 'rgl/bidirectional'
#load all necessary environment related things
def loadenv
config = YAML.load_file('config.yml')
libdir = config['lib']
Bundler.require(config['environment'].to_sym)
Dir[File.expand_path(libdir)].each {|file| require file }
end
loadenv()
#add some ultilities method to
#manipulate directed graph better
class RGL::DirectedAdjacencyGraph
def in_degree(v)
rdg = self.reverse
rdg.adjacent_vertices(v).size
end
def out_degree(v)
self.adjacent_vertices(v).size
end
end
#alias for easy-understand name
DBHelper = Database
UsersHelper = UserFriends
DirectedGraph = RGL::DirectedAdjacencyGraph
#recommender main class
#each recommender instance relate to one *User*
#each recommender instance will recommend (friends, url) to this *User*
class FriendRecommender
attr_accessor :uid, :graph_instance, :depth
def initialize(uid, depth, walk_step=3, max_walk_loop = 1000)
@walk_step = walk_step
@uid = uid
@depth = depth
@max_walk_loop = max_walk_loop
@graph_instance = DirectedGraph.new
@recommend_list = Hash.new
end
def create_graph
bfs_each { |from, to|
#p "connect #{from}==>#{to}"
[from, to].each {|v|
@graph_instance.add_vertex(v) if !@graph_instance.has_vertex?(v)
}
@graph_instance.add_edge(from, to) if !@graph_instance.has_edge?(from, to)
}
end
#bread-first traversing, yeild each pair of node
def bfs_each
traversed_hash = Hash.new #hash to store uniqued id which is already traversed and store depth
traverse_queue = Queue.new
traverse_queue.push(@uid)
traversed_hash[@uid] = 0 #store first node with depth 0
idx = 0
while !traverse_queue.empty? do
row = nil
idx += 1
current_node = traverse_queue.pop #current traversing node
c_dep = traversed_hash[current_node] #depth of current traversing node
#p "current depth: #{c_dep} and idx is #{idx}"
#process for this node
begin
return if c_dep == @depth
rows = UsersHelper.find({'twitter_uid'=>"#{current_node}", 'is_crawled'=>true})
rows.each { |r| row = r }
friends = row["twitter_friend_ids"]
friends.each {|f|
yield current_node, f
if !traversed_hash.has_key?(f)
traverse_queue.push(f)
traversed_hash[f] = c_dep + 1
@recommend_list[f] = 0
end
}
rescue Exception=>e
end
end
end
def random_walk
max_try = 30
loop_t = @max_walk_loop
while loop_t > 0
next_v = @uid
@walk_step.times do
t = max_try
while t >= 0 #loop while has some edges with max_try
edges = @graph_instance.adjacent_vertices(next_v)
if edges.nil?
t -= 1
next
else
next_v = nil
begin
next_v = edges[rand(edges.length)]
end while !next_v
@recommend_list[next_v] += 1 #i'm walking!!
p 'i"m walking!!'
break
end
end
end
loop_t -= 1
end
end
def visualize_to_file
#@graph_instance.write_to_graphic_file
end
end
beginning_time = Time.now
r = FriendRecommender.new('4975841', 4)
r.create_graph
end_time = Time.now
p "create graph width depth 4 takes #{(end_time - beginning_time)*1000/60} minutes"
r.random_walk
r.visualize_to_file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment