Last active
December 21, 2015 06:29
-
-
Save bbengfort/6264904 to your computer and use it in GitHub Desktop.
A data structure for five star ranking preference expressions wrapped in a reader for TSV data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import csv | |
import json | |
class PreferenceReader(object): | |
""" | |
A reader that can read the recommended format for storing preferences | |
of books in a tab-separated file. The recommended format is: | |
critic title rating | |
This class reads the file and gives access to each property. Note that | |
the data is primarily read only, only providing public access to the | |
data, not allowing modifications to it. | |
""" | |
def __init__(self, path): | |
""" | |
Supply the path to the TSV file. | |
""" | |
self._path = path | |
self._data = {} | |
@property | |
def data(self): | |
""" | |
On access, loads the internal data structure from the file. | |
""" | |
if not self._data: | |
for critic, title, rating in self.readlines(): | |
if critic in self._data: | |
self._data[critic][title] = rating | |
else: | |
self._data[critic] = {title: rating} | |
return self._data | |
@property | |
def critics(self): | |
""" | |
Returns the set of critics in the data. | |
""" | |
return self.data.keys() | |
@property | |
def titles(self): | |
""" | |
Returns the set of titles in the data. | |
""" | |
titles = [] | |
for value in self.data.values(): | |
titles.extend(value.keys()) | |
return set(titles) | |
def readlines(self): | |
""" | |
Reads the TSV file and adds fieldnames to our data structure, as | |
well as ensures that the rating is the correct `float` type. | |
Yields a tuple- (critic, title, rating) | |
""" | |
with open(self._path, 'rb') as data: | |
reader = csv.reader(data, delimiter='\t') | |
for row in reader: | |
yield row[0], row[1], float(row[2]) | |
def __str__(self): | |
""" | |
Returns a JSON represntation of the data structure. | |
""" | |
return json.dumps(self.data, indent=2) | |
def __getitem__(self, critic): | |
""" | |
Quickly get access to a particular critic. | |
""" | |
return self.data[critic] | |
def __contains__(self, critic): | |
""" | |
Determine if the critic is in the data set. | |
""" | |
return critic in self.data | |
def __iter__(self): | |
""" | |
Loop through all critics and return their ratings, this behaves | |
similarly to the iteration context of a Python dictionary. | |
""" | |
for critic in self.data: yield critic | |
def __len__(self): | |
""" | |
Returns the number of critics in the data. | |
""" | |
return len(self.critics) | |
if __name__ == "__main__": | |
reader = PreferenceReader('../data/book_ratings.tsv') | |
print reader | |
print reader['Jenny Kim'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# tests.reader_tests | |
# Tests the reader module in collabtut | |
# | |
# Author: Benjamin Bengfort <[email protected]> | |
# Created: Fri Aug 23 09:16:09 2013 -0400 | |
# | |
# Copyright (C) 2013 Cobrain Company | |
# For license information, see LICENSE.txt | |
# | |
# ID: reader_tests.py.py [] [email protected] $ | |
""" | |
Tests the preference reader module in collabtut. | |
""" | |
########################################################################## | |
## Imports | |
########################################################################## | |
import os | |
import unittest | |
from collabtut.reader import PreferenceReader | |
########################################################################## | |
## Test Cases | |
########################################################################## | |
class ReaderTest(unittest.TestCase): | |
FIXTURE_PATH = 'test_ratings_fixture.tsv' | |
def setUp(self): | |
""" | |
Create test data fixture and write to file. | |
""" | |
data = ( | |
("Lisa", "Lady in the Water", 2.5), | |
("Lisa", "Snakes on a Plane", 3.5), | |
("Lisa", "Just My Luck", 3.0), | |
("Lisa", "Superman Returns", 3.5), | |
("Lisa", "You, Me, and Dupree", 2.5), | |
("Lisa", "The Night Listener", 3.0), | |
("Gene", "Lady in the Water", 3.0), | |
("Gene", "Snakes on a Plane", 3.5), | |
("Gene", "Just My Luck", 1.5), | |
("Gene", "Superman Returns", 5.0), | |
("Gene", "The Night Listener", 3.0), | |
("Gene", "You, Me, and Dupree", 3.5), | |
) | |
with open(self.FIXTURE_PATH, 'w') as testdata: | |
for item in data: | |
testdata.write("\t".join([str(i) for i in item]) + "\n") | |
self.reader = PreferenceReader(self.FIXTURE_PATH) | |
def tearDown(self): | |
""" | |
Remove test data fixture | |
""" | |
os.remove(self.FIXTURE_PATH) | |
self.reader = None | |
def test_lazy_load(self): | |
""" | |
Ensure data only loaded at access. | |
""" | |
self.assertEqual(self.reader._data, {}, "Data loaded before access?") | |
self.assertTrue(self.reader.data, "No data loaded on access?") | |
self.assertNotEqual(self.reader._data, {}, "Previously accessed data empty") | |
def test_critic_inclusion(self): | |
""" | |
Ensure that reader captures critics | |
""" | |
self.assertIn('Gene', self.reader) | |
self.assertIn('Lisa', self.reader) | |
self.assertNotIn('Ben', self.reader) | |
def test_critic_set(self): | |
""" | |
Ensure that no critics are duplicates | |
""" | |
self.assertEqual(len(self.reader), 2, "Critic length mismatch") | |
self.assertEqual(len(set(self.reader)), len(self.reader), | |
"Duplicate critics exist!") | |
def test_title_inclusion(self): | |
""" | |
Ensure that the correct titles were captured | |
""" | |
self.assertIn('Superman Returns', self.reader.titles) | |
self.assertIn('Just My Luck', self.reader.titles) | |
self.assertIn('Lady in the Water', self.reader.titles) | |
self.assertIn('Snakes on a Plane', self.reader.titles) | |
self.assertIn('You, Me, and Dupree', self.reader.titles) | |
self.assertIn('The Night Listener', self.reader.titles) | |
self.assertNotIn('Transformers: Shadow of the Moon', self.reader.titles) | |
def test_title_set(self): | |
""" | |
Ensure that no titles are duplicates | |
""" | |
self.assertEqual(len(self.reader.titles), 6, "Title length mismatch") | |
self.assertEqual(len(set(self.reader.titles)), len(self.reader.titles), | |
"Duplicate titles exist!") | |
def test_getitem(self): | |
""" | |
Test direct critic access. | |
""" | |
self.assertIn('Gene', self.reader, "Contains not correct or bad fixture.") | |
self.assertTrue(bool(self.reader['Gene']), | |
"getitem returned empty or False item.") | |
self.assertRaises(KeyError, self.reader.__getitem__, 'Ben') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment