haokaibo · May 9, 2017 08:37
diff --git a/CollectiveIntelligence_.idea_CollectiveIntelligence.iml b/CollectiveIntelligence_.idea_CollectiveIntelligence.iml
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$" />
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="PackageRequirementsSettings">
    <option name="requirementsPath" value="" />
  </component>
  <component name="TestRunnerService">
    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
  </component>
 </module>
diff --git a/CollectiveIntelligence_.idea_misc.xml b/CollectiveIntelligence_.idea_misc.xml
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (~/anaconda/bin/python)" project-jdk-type="Python SDK" />
 </project>
diff --git a/CollectiveIntelligence_.idea_modules.xml b/CollectiveIntelligence_.idea_modules.xml
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/CollectiveIntelligence.iml" filepath="$PROJECT_DIR$/.idea/CollectiveIntelligence.iml" />
    </modules>
  </component>
 </project>
diff --git a/CollectiveIntelligence_recommend_recommendations.py b/CollectiveIntelligence_recommend_recommendations.py
 # A dictionary of movie critics and their ratings of a small
 # set of movies
 from math import sqrt

 critics = {'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
                         'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,
                         'The Night Listener': 3.0},
           'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,
                            'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,
                            'You, Me and Dupree': 3.5},
           'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
                                'Superman Returns': 3.5, 'The Night Listener': 4.0},
           'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
                            'The Night Listener': 4.5, 'Superman Returns': 4.0,
                            'You, Me and Dupree': 2.5},
           'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
                            'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
                            'You, Me and Dupree': 2.0},
           'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
                             'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
           'Toby': {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman Returns': 4.0}}


 # Returns a distance-based similarity score for person1 and person2
 def sim_distance(prefs, person1, person2):
    # Get the list of shared_items
    si = {}
    for item in prefs[person1]:
        if item in prefs[person2]:
            si[item] = 1
    # if they have no ratings in common, return 0
    if len(si) == 0: return 0
    # Add up the squares of all the differences
    sum_of_squares = sum([pow(prefs[person1][item] - prefs[person2][item], 2)
                          for item in prefs[person1] if item in prefs[person2]])
    return 1 / (1 + sum_of_squares)


 # Returns the Pearson correlation coefficient for p1 and p2
 def sim_pearson(prefs, p1, p2):
    # Get the list of mutually rated items
    si = {}
    for item in prefs[p1]:
        if item in prefs[p2]: si[item] = 1
    # Find the number of elements
    n = len(si)
    # if they are no ratings in common, return 0
    if n == 0: return 0
    # Add up all the preferences
    sum1 = sum([prefs[p1][it] for it in si])
    sum2 = sum([prefs[p2][it] for it in si])
    # Sum up the squares
    sum1Sq = sum([pow(prefs[p1][it], 2) for it in si])
    sum2Sq = sum([pow(prefs[p2][it], 2) for it in si])
    # Sum up the products
    pSum = sum([prefs[p1][it] * prefs[p2][it] for it in si])
    # Calculate Pearson score
    num = pSum - (sum1 * sum2 / n)
    den = sqrt((sum1Sq - pow(sum1, 2) / n) * (sum2Sq - pow(sum2, 2) / n))
    if den == 0: return 0
    r = num / den
    return r

 # Returns the best matches for person from the prefs dictionary.
 # Number of results and similarity function are optional params.
 def topMatches(prefs, person, n=5, similarity=sim_pearson):
    scores = [(similarity(prefs, person, other), other)
              for other in prefs if other != person]

    # Sort the list so the highest scores appear at the top
    scores.sort(reverse=True)
    return scores[0:n]
diff --git a/CollectiveIntelligence_recommend_tests.py b/CollectiveIntelligence_recommend_tests.py
 import os
 import unittest

 import logging
 from recommend.recommendations import critics, sim_distance, sim_pearson, topMatches


 class RecommendationsTest(unittest.TestCase):
    def setUp(self):
        logging.basicConfig(level=logging.INFO)
        self.base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)))
        if not os.path.exists(self.base_dir):
            os.makedirs(self.base_dir)

    def tearDown(self):
        pass

    def test_sim_distance(self):
        r = sim_distance(critics, 'Lisa Rose', 'Gene Seymour')
        self.assertEqual(0.14814814814814814, r)

    def test_sim_pearson(self):
        r = sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')
        self.assertEqual(0.39605901719066977, r)

    def test_topMatches_with_sim_pearson_method(self):
        r = topMatches(critics, 'Toby', n=3)
        self.assertEqual(
            "[(0.9912407071619299, 'Lisa Rose'), (0.9244734516419049, 'Mick LaSalle'), (0.8934051474415647, 'Claudia Puig')]",
            str(r))


 if __name__ == '__main__':
    unittest.main(warnings='ignore')
	<?xml version="1.0" encoding="UTF-8"?>
	<module type="PYTHON_MODULE" version="4">
	<component name="NewModuleRootManager">
	<content url="file://$MODULE_DIR$" />
	<orderEntry type="inheritedJdk" />
	<orderEntry type="sourceFolder" forTests="false" />
	</component>
	<component name="PackageRequirementsSettings">
	<option name="requirementsPath" value="" />
	</component>
	<component name="TestRunnerService">
	<option name="PROJECT_TEST_RUNNER" value="Unittests" />
	</component>
	</module>
	<?xml version="1.0" encoding="UTF-8"?>
	<project version="4">
	<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5.2 (~/anaconda/bin/python)" project-jdk-type="Python SDK" />
	</project>
	<?xml version="1.0" encoding="UTF-8"?>
	<project version="4">
	<component name="ProjectModuleManager">
	<modules>
	<module fileurl="file://$PROJECT_DIR$/.idea/CollectiveIntelligence.iml" filepath="$PROJECT_DIR$/.idea/CollectiveIntelligence.iml" />
	</modules>
	</component>
	</project>
	# A dictionary of movie critics and their ratings of a small
	# set of movies
	from math import sqrt

	critics = {'Lisa Rose': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.5,
	'Just My Luck': 3.0, 'Superman Returns': 3.5, 'You, Me and Dupree': 2.5,
	'The Night Listener': 3.0},
	'Gene Seymour': {'Lady in the Water': 3.0, 'Snakes on a Plane': 3.5,
	'Just My Luck': 1.5, 'Superman Returns': 5.0, 'The Night Listener': 3.0,
	'You, Me and Dupree': 3.5},
	'Michael Phillips': {'Lady in the Water': 2.5, 'Snakes on a Plane': 3.0,
	'Superman Returns': 3.5, 'The Night Listener': 4.0},
	'Claudia Puig': {'Snakes on a Plane': 3.5, 'Just My Luck': 3.0,
	'The Night Listener': 4.5, 'Superman Returns': 4.0,
	'You, Me and Dupree': 2.5},
	'Mick LaSalle': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
	'Just My Luck': 2.0, 'Superman Returns': 3.0, 'The Night Listener': 3.0,
	'You, Me and Dupree': 2.0},
	'Jack Matthews': {'Lady in the Water': 3.0, 'Snakes on a Plane': 4.0,
	'The Night Listener': 3.0, 'Superman Returns': 5.0, 'You, Me and Dupree': 3.5},
	'Toby': {'Snakes on a Plane': 4.5, 'You, Me and Dupree': 1.0, 'Superman Returns': 4.0}}


	# Returns a distance-based similarity score for person1 and person2
	def sim_distance(prefs, person1, person2):
	# Get the list of shared_items
	si = {}
	for item in prefs[person1]:
	if item in prefs[person2]:
	si[item] = 1
	# if they have no ratings in common, return 0
	if len(si) == 0: return 0
	# Add up the squares of all the differences
	sum_of_squares = sum([pow(prefs[person1][item] - prefs[person2][item], 2)
	for item in prefs[person1] if item in prefs[person2]])
	return 1 / (1 + sum_of_squares)


	# Returns the Pearson correlation coefficient for p1 and p2
	def sim_pearson(prefs, p1, p2):
	# Get the list of mutually rated items
	si = {}
	for item in prefs[p1]:
	if item in prefs[p2]: si[item] = 1
	# Find the number of elements
	n = len(si)
	# if they are no ratings in common, return 0
	if n == 0: return 0
	# Add up all the preferences
	sum1 = sum([prefs[p1][it] for it in si])
	sum2 = sum([prefs[p2][it] for it in si])
	# Sum up the squares
	sum1Sq = sum([pow(prefs[p1][it], 2) for it in si])
	sum2Sq = sum([pow(prefs[p2][it], 2) for it in si])
	# Sum up the products
	pSum = sum([prefs[p1][it] * prefs[p2][it] for it in si])
	# Calculate Pearson score
	num = pSum - (sum1 * sum2 / n)
	den = sqrt((sum1Sq - pow(sum1, 2) / n) * (sum2Sq - pow(sum2, 2) / n))
	if den == 0: return 0
	r = num / den
	return r

	# Returns the best matches for person from the prefs dictionary.
	# Number of results and similarity function are optional params.
	def topMatches(prefs, person, n=5, similarity=sim_pearson):
	scores = [(similarity(prefs, person, other), other)
	for other in prefs if other != person]

	# Sort the list so the highest scores appear at the top
	scores.sort(reverse=True)
	return scores[0:n]
	import os
	import unittest

	import logging
	from recommend.recommendations import critics, sim_distance, sim_pearson, topMatches


	class RecommendationsTest(unittest.TestCase):
	def setUp(self):
	logging.basicConfig(level=logging.INFO)
	self.base_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)))
	if not os.path.exists(self.base_dir):
	os.makedirs(self.base_dir)

	def tearDown(self):
	pass

	def test_sim_distance(self):
	r = sim_distance(critics, 'Lisa Rose', 'Gene Seymour')
	self.assertEqual(0.14814814814814814, r)

	def test_sim_pearson(self):
	r = sim_pearson(critics, 'Lisa Rose', 'Gene Seymour')
	self.assertEqual(0.39605901719066977, r)

	def test_topMatches_with_sim_pearson_method(self):
	r = topMatches(critics, 'Toby', n=3)
	self.assertEqual(
	"[(0.9912407071619299, 'Lisa Rose'), (0.9244734516419049, 'Mick LaSalle'), (0.8934051474415647, 'Claudia Puig')]",
	str(r))


	if __name__ == '__main__':
	unittest.main(warnings='ignore')