Skip to content

Instantly share code, notes, and snippets.

@2no
Last active July 11, 2016 02:45
Show Gist options
  • Save 2no/ac65ae1b805a80ddc19147b4841c1468 to your computer and use it in GitHub Desktop.
Save 2no/ac65ae1b805a80ddc19147b4841c1468 to your computer and use it in GitHub Desktop.
let s:dataset = {
\ '山田': {
\ 'カレー': 2.5,
\ 'ラーメン': 3.5,
\ 'チャーハン': 3.0,
\ '寿司': 3.5,
\ '牛丼': 2.5,
\ "うどん": 3.0,
\ },
\ '田中': {
\ 'カレー': 3.0,
\ 'ラーメン': 3.5,
\ 'チャーハン': 1.5,
\ '寿司': 5.0,
\ 'うどん': 3.0,
\ '牛丼': 3.5,
\ },
\ '佐藤': {
\ 'カレー': 2.5,
\ 'ラーメン': 3.0,
\ '寿司': 3.5,
\ 'うどん': 4.0,
\ },
\ '中村': {
\ 'ラーメン': 3.5,
\ 'チャーハン': 3.0,
\ 'うどん': 4.5,
\ '寿司': 4.0,
\ '牛丼': 2.5,
\ },
\ '川村': {
\ 'カレー': 3.0,
\ 'ラーメン': 4.0,
\ 'チャーハン': 2.0,
\ '寿司': 3.0,
\ 'うどん': 3.0,
\ '牛丼': 2.0,
\ },
\ '鈴木': {
\ 'カレー': 3.0,
\ 'ラーメン': 4.0,
\ 'うどん': 3.0,
\ '寿司': 5.0,
\ '牛丼': 3.5,
\ },
\ '下林': {
\ 'ラーメン': 4.5,
\ '牛丼': 1.0,
\ '寿司': 4.0,
\ },
\ }
function! s:calcSimilarityScore(person1, person2)
let l:bothViewed = {}
for l:item in items(s:dataset[a:person1])
if has_key(s:dataset[a:person2], l:item[0])
let l:bothViewed[l:item[0]] = 1
endif
endfor
if len(l:bothViewed) == 0
return 0
endif
let l:sumOfEclideanDistance = []
for l:item in items(s:dataset[a:person1])
if has_key(s:dataset[a:person2], l:item[0])
call add(l:sumOfEclideanDistance, pow(s:dataset[a:person1][l:item[0]] - s:dataset[a:person2][l:item[0]], 2))
endif
endfor
let l:totalOfEclideanDistance = eval(join(l:sumOfEclideanDistance, '+'))
return 1 / (1 + sqrt(l:totalOfEclideanDistance))
endfunction
function! s:calcPearsonCorrelation(person1, person2)
let l:bothRated = {}
for l:item in items(s:dataset[a:person1])
if has_key(s:dataset[a:person2], l:item[0])
let l:bothRated[l:item[0]] = 1
endif
endfor
let l:numberOfRatings = len(l:bothRated)
if l:numberOfRatings == 0
return 0
endif
let l:person1PreferencesSum = 0.0
let l:person2PreferencesSum = 0.0
let l:person1SquarePreferencesSum = 0.0
let l:person2SquarePreferencesSum = 0.0
let l:productSumOfBothUsers = 0.0
for l:item in items(l:bothRated)
let l:person1PreferencesSum += s:dataset[a:person1][l:item[0]]
let l:person2PreferencesSum += s:dataset[a:person2][l:item[0]]
let l:person1SquarePreferencesSum += pow(s:dataset[a:person1][l:item[0]], 2)
let l:person2SquarePreferencesSum += pow(s:dataset[a:person2][l:item[0]], 2)
let l:productSumOfBothUsers += s:dataset[a:person1][l:item[0]] * s:dataset[a:person2][l:item[0]]
endfor
let l:denominatorValue = sqrt(
\ (l:person1SquarePreferencesSum - pow(l:person1PreferencesSum, 2) / l:numberOfRatings)
\ * (l:person2SquarePreferencesSum - pow(l:person2PreferencesSum, 2) / l:numberOfRatings))
if l:denominatorValue == 0
return 0
endif
let l:numeratorValue = l:productSumOfBothUsers - (l:person1PreferencesSum * l:person2PreferencesSum / l:numberOfRatings)
return l:numeratorValue / l:denominatorValue
endfunction
function! s:getMostSimilarUsers(person, numberOfUsers)
let l:scores = {}
for l:item in items(s:dataset)
if a:person !=# l:item[0]
let l:scores[l:item[0]] = s:calcPearsonCorrelation(a:person, l:item[0])
endif
endfor
return sort(items(l:scores), 's:compareScore')[:a:numberOfUsers - 1]
endfunction
function! s:getUserRecommendations(person)
let l:totals = {}
let l:simSums = {}
for l:other in items(s:dataset)
if l:other[0] ==# a:person
continue
endif
let l:sim = s:calcPearsonCorrelation(a:person, l:other[0])
if l:sim <= 0
continue
endif
for l:item in items(s:dataset[l:other[0]])
if get(s:dataset[a:person], l:item[0]) == 0
if !has_key(l:totals, l:item[0])
let l:totals[l:item[0]] = 0
endif
let l:totals[l:item[0]] = l:totals[l:item[0]] + l:item[1] * l:sim
if !has_key(l:simSums, l:item[0])
let l:simSums[l:item[0]] = 0
endif
let l:simSums[l:item[0]] = l:simSums[l:item[0]] + l:sim
endif
endfor
endfor
let l:rankings = {}
for l:item in items(l:totals)
let l:rankings[l:item[0]] = l:item[1] / l:simSums[l:item[0]]
endfor
return map(sort(items(l:rankings), 's:compareScore'), 'v:val[0]')
endfunction
function! s:compareScore(lhs, rhs)
return a:lhs[1] < a:rhs[1] ? 1 : -1
endfunction
echo printf('山田さんと鈴木さんの類似度 (ユークリッド距離) %f', s:calcSimilarityScore('山田', '鈴木'))
echo printf('山田さんと田中さんの類似度 (ピアソン相関係数) %f', s:calcPearsonCorrelation('山田', '田中'))
echo s:getMostSimilarUsers('山田', 3)
echo s:getUserRecommendations('下林')
山田さんと鈴木さんの類似度 (ユークリッド距離) 0.340542
山田さんと田中さんの類似度 (ピアソン相関係数) 0.396059
[['下林', 0.991241], ['鈴木', 0.747018], ['川村', 0.594089]]
['うどん', 'カレー', 'チャーハン']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment