Skip to content

Instantly share code, notes, and snippets.

@morr
Created May 2, 2013 20:10
Show Gist options
  • Save morr/5505016 to your computer and use it in GitHub Desktop.
Save morr/5505016 to your computer and use it in GitHub Desktop.
class Svd < ActiveRecord::Base
Full = 'full'
Partial = 'partial'
serialize :lsa
serialize :entry_ids
serialize :user_ids
attr_accessible :scale, :kind
validates :lsa, presence: true
validates :entry_ids, presence: true
validates :user_ids, presence: true
validates :scale, presence: true, inclusion: { in: [Full, Partial] }
validates :kind, presence: true, inclusion: { in: [Anime.name, Manga.name] }
class << self
def full
where(scale: Full, kind: Anime.name).last
end
def partial
where(scale: Partial, kind: Anime.name).last
end
def calculate!(scale)
new(scale: scale, kind: Anime.name).send :calculate!
end
end
def rank(rates)
scores_vector = Array.new(entry_ids.size, 0)
rates.each do |target_id, score|
entry_index = entry_indexes[target_id]
scores_vector[entry_index] = score if entry_index
end
ranks = lsa.classify_vector scores_vector
kimilar_users = ranks.each_with_object({}) {|(index,similarity),memo| memo[user_ids[index]] = similarity }
end
private
# расчёт SVD
def calculate!
self.user_ids, self.entry_ids = prepare_ids self.scale
rates = prepare_rates user_ids, entry_ids
data_matrix = prepare_matrix rates, user_indexes, entry_indexes
# вычисляем SVD
self.lsa = LSA.new data_matrix
self.save!
end
# основное условие выборки пользовательских оценок
def rate_condition
@rate_condition ||= Entry.squeel { (status.not_eq my{UserRateStatus.get UserRateStatus::Planned}) & (score.not_eq(nil)) & (score > 0) }
end
# подготовка данных для SVD матрицы
def prepare_ids(scale)
if scale == Full
entry_ids = klass.where do
(kind != 'Special')
(kind != 'Music')
end
user_ids = UserRate
.where(rate_condition)
.where(target_type: klass.name, target_id: entry_ids)
.pluck(:user_id)
.uniq
entry_ids = UserRate
.where(rate_condition)
.where(target_type: klass.name, user_id: user_ids, target_id: entry_ids)
.pluck(:target_id)
.uniq
else
entry_ids = klass.where do
(score >= 6) &
(kind != 'Special') &
(kind != 'Music') &
(duration > 5) &
(censored.eq(0)) &
(status != 'Not yet aired') &
(
(aired_at > '1995-01-01') |
((score > 7.5) & (aired_at > '1990-01-01')) |
(score > 8.0) | ((score > 7.7) & (kind.eq('Movie')))
)
end.pluck(:id)
user_ids = UserRate
.where(rate_condition)
.where(target_type: klass.name, target_id: entry_ids)
.group(:user_id)
.having("count(*) > 100 and count(*) < 1000")
.pluck(:user_id)
.uniq
entry_ids = UserRate
.where(rate_condition)
.where(target_type: klass.name, user_id: user_ids, target_id: entry_ids)
.pluck(:target_id)
.uniq
entry_ids = UserRate
.where(rate_condition)
.where(target_type: klass.name, user_id: user_ids, target_id: entry_ids)
.group(:target_id)
.having('count(*) > 4')
.pluck(:target_id)
.uniq
end
[user_ids, entry_ids]
end
# оценки конкретных пользователей по конкретным аниме
def prepare_rates(user_ids, entry_ids)
rates = entry_ids.each_with_object({}) {|v,memo| memo[v] = {} }
UserRate
.where(rate_condition)
.where(target_type: klass.name, user_id: user_ids, target_id: entry_ids)
.select([:id, :target_id, :user_id, :score])
.find_each(batch_size: 10000) do |rate|
rates[rate.target_id][rate.user_id] = rate.score
end
rates
end
# заполнение SVD матрицы
def prepare_matrix(rates, user_indexes, entry_indexes)
data_matrix = SVDMatrix.new user_indexes.size, entry_indexes.size
empty_row = Array.new user_indexes.size, 0
entry_indexes.each do |entry_id,entry_index|
row = empty_row.clone
rates[entry_id].each do |user_id, score|
user_index = user_indexes[user_id]
raise 'nil index' unless user_index # на время отладки
row[user_index] = score
end
raise 'row overflow' if row.size > user_indexes.size # на время отладки
data_matrix.set_row entry_index, row
end
data_matrix
end
def klass
@klass ||= kind.constantize
end
def user_indexes
@user_indexes ||= user_ids.each_with_index.each_with_object({}) {|(id,index),memo| memo[id] = index }
end
def entry_indexes
@entry_indexes ||= entry_ids.each_with_index.each_with_object({}) {|(id,index),memo| memo[id] = index }
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment