Last active
July 11, 2016 02:45
-
-
Save 2no/ac65ae1b805a80ddc19147b4841c1468 to your computer and use it in GitHub Desktop.
Vim Script へ雑移植 http://qiita.com/ynakayama/items/59beb40b7c3829cc0bf2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let s:dataset = { | |
\ '山田': { | |
\ 'カレー': 2.5, | |
\ 'ラーメン': 3.5, | |
\ 'チャーハン': 3.0, | |
\ '寿司': 3.5, | |
\ '牛丼': 2.5, | |
\ "うどん": 3.0, | |
\ }, | |
\ '田中': { | |
\ 'カレー': 3.0, | |
\ 'ラーメン': 3.5, | |
\ 'チャーハン': 1.5, | |
\ '寿司': 5.0, | |
\ 'うどん': 3.0, | |
\ '牛丼': 3.5, | |
\ }, | |
\ '佐藤': { | |
\ 'カレー': 2.5, | |
\ 'ラーメン': 3.0, | |
\ '寿司': 3.5, | |
\ 'うどん': 4.0, | |
\ }, | |
\ '中村': { | |
\ 'ラーメン': 3.5, | |
\ 'チャーハン': 3.0, | |
\ 'うどん': 4.5, | |
\ '寿司': 4.0, | |
\ '牛丼': 2.5, | |
\ }, | |
\ '川村': { | |
\ 'カレー': 3.0, | |
\ 'ラーメン': 4.0, | |
\ 'チャーハン': 2.0, | |
\ '寿司': 3.0, | |
\ 'うどん': 3.0, | |
\ '牛丼': 2.0, | |
\ }, | |
\ '鈴木': { | |
\ 'カレー': 3.0, | |
\ 'ラーメン': 4.0, | |
\ 'うどん': 3.0, | |
\ '寿司': 5.0, | |
\ '牛丼': 3.5, | |
\ }, | |
\ '下林': { | |
\ 'ラーメン': 4.5, | |
\ '牛丼': 1.0, | |
\ '寿司': 4.0, | |
\ }, | |
\ } | |
function! s:calcSimilarityScore(person1, person2) | |
let l:bothViewed = {} | |
for l:item in items(s:dataset[a:person1]) | |
if has_key(s:dataset[a:person2], l:item[0]) | |
let l:bothViewed[l:item[0]] = 1 | |
endif | |
endfor | |
if len(l:bothViewed) == 0 | |
return 0 | |
endif | |
let l:sumOfEclideanDistance = [] | |
for l:item in items(s:dataset[a:person1]) | |
if has_key(s:dataset[a:person2], l:item[0]) | |
call add(l:sumOfEclideanDistance, pow(s:dataset[a:person1][l:item[0]] - s:dataset[a:person2][l:item[0]], 2)) | |
endif | |
endfor | |
let l:totalOfEclideanDistance = eval(join(l:sumOfEclideanDistance, '+')) | |
return 1 / (1 + sqrt(l:totalOfEclideanDistance)) | |
endfunction | |
function! s:calcPearsonCorrelation(person1, person2) | |
let l:bothRated = {} | |
for l:item in items(s:dataset[a:person1]) | |
if has_key(s:dataset[a:person2], l:item[0]) | |
let l:bothRated[l:item[0]] = 1 | |
endif | |
endfor | |
let l:numberOfRatings = len(l:bothRated) | |
if l:numberOfRatings == 0 | |
return 0 | |
endif | |
let l:person1PreferencesSum = 0.0 | |
let l:person2PreferencesSum = 0.0 | |
let l:person1SquarePreferencesSum = 0.0 | |
let l:person2SquarePreferencesSum = 0.0 | |
let l:productSumOfBothUsers = 0.0 | |
for l:item in items(l:bothRated) | |
let l:person1PreferencesSum += s:dataset[a:person1][l:item[0]] | |
let l:person2PreferencesSum += s:dataset[a:person2][l:item[0]] | |
let l:person1SquarePreferencesSum += pow(s:dataset[a:person1][l:item[0]], 2) | |
let l:person2SquarePreferencesSum += pow(s:dataset[a:person2][l:item[0]], 2) | |
let l:productSumOfBothUsers += s:dataset[a:person1][l:item[0]] * s:dataset[a:person2][l:item[0]] | |
endfor | |
let l:denominatorValue = sqrt( | |
\ (l:person1SquarePreferencesSum - pow(l:person1PreferencesSum, 2) / l:numberOfRatings) | |
\ * (l:person2SquarePreferencesSum - pow(l:person2PreferencesSum, 2) / l:numberOfRatings)) | |
if l:denominatorValue == 0 | |
return 0 | |
endif | |
let l:numeratorValue = l:productSumOfBothUsers - (l:person1PreferencesSum * l:person2PreferencesSum / l:numberOfRatings) | |
return l:numeratorValue / l:denominatorValue | |
endfunction | |
function! s:getMostSimilarUsers(person, numberOfUsers) | |
let l:scores = {} | |
for l:item in items(s:dataset) | |
if a:person !=# l:item[0] | |
let l:scores[l:item[0]] = s:calcPearsonCorrelation(a:person, l:item[0]) | |
endif | |
endfor | |
return sort(items(l:scores), 's:compareScore')[:a:numberOfUsers - 1] | |
endfunction | |
function! s:getUserRecommendations(person) | |
let l:totals = {} | |
let l:simSums = {} | |
for l:other in items(s:dataset) | |
if l:other[0] ==# a:person | |
continue | |
endif | |
let l:sim = s:calcPearsonCorrelation(a:person, l:other[0]) | |
if l:sim <= 0 | |
continue | |
endif | |
for l:item in items(s:dataset[l:other[0]]) | |
if get(s:dataset[a:person], l:item[0]) == 0 | |
if !has_key(l:totals, l:item[0]) | |
let l:totals[l:item[0]] = 0 | |
endif | |
let l:totals[l:item[0]] = l:totals[l:item[0]] + l:item[1] * l:sim | |
if !has_key(l:simSums, l:item[0]) | |
let l:simSums[l:item[0]] = 0 | |
endif | |
let l:simSums[l:item[0]] = l:simSums[l:item[0]] + l:sim | |
endif | |
endfor | |
endfor | |
let l:rankings = {} | |
for l:item in items(l:totals) | |
let l:rankings[l:item[0]] = l:item[1] / l:simSums[l:item[0]] | |
endfor | |
return map(sort(items(l:rankings), 's:compareScore'), 'v:val[0]') | |
endfunction | |
function! s:compareScore(lhs, rhs) | |
return a:lhs[1] < a:rhs[1] ? 1 : -1 | |
endfunction | |
echo printf('山田さんと鈴木さんの類似度 (ユークリッド距離) %f', s:calcSimilarityScore('山田', '鈴木')) | |
echo printf('山田さんと田中さんの類似度 (ピアソン相関係数) %f', s:calcPearsonCorrelation('山田', '田中')) | |
echo s:getMostSimilarUsers('山田', 3) | |
echo s:getUserRecommendations('下林') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
山田さんと鈴木さんの類似度 (ユークリッド距離) 0.340542 | |
山田さんと田中さんの類似度 (ピアソン相関係数) 0.396059 | |
[['下林', 0.991241], ['鈴木', 0.747018], ['川村', 0.594089]] | |
['うどん', 'カレー', 'チャーハン'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment