Skip to content

Instantly share code, notes, and snippets.

@lightstrike
Created May 26, 2015 03:21
Show Gist options
  • Select an option

  • Save lightstrike/316a3554d286bf614e04 to your computer and use it in GitHub Desktop.

Select an option

Save lightstrike/316a3554d286bf614e04 to your computer and use it in GitHub Desktop.
Python Rolodex Formatter

Rolodex Formatter

Run program with python rolodex.py [file_name] - python rolodex test_input.txt will show a working example

Run tests with python -m unittest discover -v

from sys import argv
import json
class Rolodex(object):
def __init__(self, input_file=None, initial_data=None):
self.file = input_file
self.valid_data = initial_data or []
self.clean_data = []
self.errors = []
def open(self):
input_file = open(self.file, 'r')
self.file_lines = input_file.readlines()
return self.file_lines
def validate_length(self, line):
'''
If the number of values in a comma-delimited line
is five, the line is of valid length
'''
values = line.split(',')
values_length = len(values)
return True if values_length is 5 else False
def validate_phone_number(self, line):
'''
Find number of digits in each value,
if one value has 10 digits the line contains
a valid phone number
'''
value_digits = [sum(c.isdigit() for c in v) for v in line.split(',')]
if 10 in value_digits:
index = value_digits.index(10)
return index
else:
return None
def validate_zipcode(self, line):
'''
Find number of digits in each value,
if one value has 5 digits the line contains
a valid zipcode
'''
value_digits = [sum(c.isdigit() for c in v) for v in line.split(',')]
if 5 in value_digits:
index = value_digits.index(5)
return index
else:
return None
def get_line_format(self, phone_index):
if phone_index == 2:
line_format = ['lastname', 'firstname', 'phonenumber', 'color', 'zipcode']
elif phone_index == 3:
line_format = ['firstname', 'lastname', 'zipcode', 'phonenumber', 'color']
elif phone_index == 4:
line_format = ['firstname', 'lastname', 'color', 'zipcode', 'phonenumber']
else:
line_format = None
return line_format
def validate_line(self, line, index):
valid_length = self.validate_length(line)
phone_index = self.validate_phone_number(line)
zipcode_index = self.validate_zipcode(line)
valid = [valid_length, phone_index, zipcode_index]
if all(valid):
line_format = self.get_line_format(phone_index)
valid_line = {}
if line_format:
line_values = line.split(',')
for index, key in enumerate(line_format):
valid_line[key] = line_values[index]
self.valid_data.append(valid_line)
return self.valid_data
else:
self.errors.append(index)
return self.errors
def validate(self):
'''
Reset valid data each time .validate() called
and run validation logic to populate
valid_data list attribute
'''
for index, line in enumerate(self.file_lines):
self.validate_line(line, index)
return self.valid_data, self.errors
def normalize_firstname(self, value):
return value.title()
def normalize_lastname(self, value):
return value.title()
def normalize_phonenumber(self, value):
number = ''.join(c for c in value if c.isdigit())
return '{area}-{first}-{second}'.format(
area=number[0:3],
first=number[3:6],
second=number[6:10]
)
def normalize_color(self, value):
return value.lower()
def normalize_zipcode(self, value):
return value[0:5]
def normalize(self):
for line in self.valid_data:
line_data = {}
for key, value in line.items():
value = value.strip()
normalize_method = 'normalize_{key}'.format(key=key)
line_data[key] = getattr(self, normalize_method)(value)
self.clean_data.append(line_data)
return self.clean_data
def transform(self):
'''
sort clean data by last name, first name,
then return formatted JSON object
'''
self.clean_data.sort(key=lambda k: (k['lastname'], k['firstname']))
self.out = {}
self.out['entries'] = self.clean_data
self.out['errors'] = self.errors
self.out = json.dumps(self.out,
sort_keys=True,
indent=2,
separators=(',', ': ')
)
return self.out
def process(self):
self.open()
self.validate()
self.normalize()
return self.transform()
if __name__ == '__main__':
if len(argv) > 1:
file_name = argv[1]
rolodex = Rolodex(file_name)
rolodex.process()
print(rolodex.out)
else:
raise Exception("You must pass a file path as an argument")
Booker T., Washington, 87360, 373 781 7380, yellow
Chandler, Kerri, (623)-668-9293, pink, 123123121
James, Murphy, yellow, 83880, 018 154 6474
asdfawefawea
import unittest
import json
from rolodex import Rolodex
TEST_DATA = [
'Booker T., Washington, 87360, 373 781 7380, yellow\n',
'Chandler, Kerri, (623)-668-9293, pink, 123123121\n',
'James, Murphy, yellow, 83880, 018 154 6474\n',
'asdfawefawea\n'
]
TEST_VALID_DATA = [
{
'color': ' yellow\n',
'firstname': 'Booker T.',
'lastname': ' Washington',
'phonenumber': ' 373 781 7380',
'zipcode': ' 87360'
},
{
'color': ' yellow',
'firstname': 'James',
'lastname': ' Murphy',
'phonenumber': ' 018 154 6474\n',
'zipcode': ' 83880'
}
]
TEST_CLEAN_DATA = [
{
'color': 'yellow',
'firstname': 'Booker T.',
'lastname': 'Washington',
'phonenumber': '373-781-7380',
'zipcode': '87360'
},
{
'color': 'yellow',
'firstname': 'James',
'lastname': 'Murphy',
'phonenumber': '018-154-6474',
'zipcode': '83880'
}
]
class RolodexTest(unittest.TestCase):
def setUp(self):
self.rolodex = Rolodex()
def test_open_method_assigns_file_lines_attribute(self):
'''
Tests to ensure the .open() method assigns and returns
the file_lines attribute with correct length
'''
rolodex = Rolodex('test_input.txt')
file_lines = rolodex.open()
actual = len(file_lines)
expected = 4
self.assertEqual(actual, expected)
def test_input_file_line_length_validation_passing(self):
'''
Tests to ensure an input line with five comma-delimited
values is considered valid
'''
actual = self.rolodex.validate_length(TEST_DATA[0])
expected = True
self.assertEqual(actual, expected)
def test_input_file_line_length_validation_failing(self):
'''
Tests to ensure an input line not with five
comma-delimited values is considered invalid
'''
actual = self.rolodex.validate_length(TEST_DATA[3])
expected = False
self.assertEqual(actual, expected)
def test_phone_number_length_validation_passing(self):
'''
Tests that a phone number with 10 digits is considered valid
'''
actual = self.rolodex.validate_phone_number(TEST_DATA[2])
expected = 4 # index of phone number
self.assertEqual(actual, expected)
def test_phone_number_length_validation_failing(self):
'''
Tests that a phone number not with 10 digits is considered invalid
'''
actual = self.rolodex.validate_phone_number(TEST_DATA[3])
expected = None
self.assertEqual(actual, expected)
def test_zipcode_length_validation_passing(self):
'''
Tests that a zipcode with 5 digits is considered valid
'''
actual = self.rolodex.validate_zipcode(TEST_DATA[2])
expected = 3 # index of zipcode
self.assertEqual(actual, expected)
def test_zipcode_length_validation_failing(self):
'''
Tests that a zipcode not with 5 digits is considered invalid
'''
actual = self.rolodex.validate_zipcode(TEST_DATA[1])
expected = None
self.assertEqual(actual, expected)
def test_get_line_format_format_one(self):
'''
Tests for correct format when phone index == 2 (third position)
'''
actual = self.rolodex.get_line_format(2)
expected = ['lastname', 'firstname', 'phonenumber', 'color', 'zipcode']
self.assertEqual(actual, expected)
def test_get_line_format_format_two(self):
'''
Tests for correct format when phone index == 3 (fourth position)
'''
actual = self.rolodex.get_line_format(3)
expected = ['firstname', 'lastname', 'zipcode', 'phonenumber', 'color']
self.assertEqual(actual, expected)
def test_get_line_format_format_three(self):
'''
Tests for correct format when phone index == 4 (fifth position)
'''
actual = self.rolodex.get_line_format(4)
expected = ['firstname', 'lastname', 'color', 'zipcode', 'phonenumber']
self.assertEqual(actual, expected)
def test_get_line_format_format_none(self):
'''
Tests for correct format when phone index not 2, 3 or 4
'''
actual = self.rolodex.get_line_format(0)
expected = None
self.assertEqual(actual, expected)
def test_line_validation_passing(self):
'''
Tests that a full line is considered valid with correct data
'''
actual = len(self.rolodex.validate_line(TEST_DATA[0], 0))
expected = 1 # length of valid_data
self.assertEqual(actual, expected)
def test_line_validation_failing(self):
'''
Tests that a full line is considered valid with correct data
'''
actual = self.rolodex.validate_line(TEST_DATA[3], 3)
expected = [3] # value of errors
self.assertEqual(actual, expected)
def test_full_validation(self):
'''
Tests that valid_data and errors set correctly on set of lines
'''
self.rolodex.file_lines = TEST_DATA
valid_data, errors = self.rolodex.validate()
actual = len(valid_data)
expected = 2
self.assertEqual(actual, expected)
actual = len(errors)
expected = 2
self.assertEqual(actual, expected)
def test_normalize_firstname(self):
'''
Tests first name normalize logic
'''
value = 'booKer T.'
actual = self.rolodex.normalize_firstname(value)
expected = 'Booker T.'
self.assertEqual(actual, expected)
def test_normalize_lastname(self):
'''
Tests last name normalize logic
'''
value = 'wAshington'
actual = self.rolodex.normalize_lastname(value)
expected = 'Washington'
self.assertEqual(actual, expected)
def test_normalize_phonenumber(self):
'''
Tests phone number normalize logic
'''
value = '3737817380'
actual = self.rolodex.normalize_phonenumber(value)
expected = '373-781-7380'
self.assertEqual(actual, expected)
def test_normalize_color(self):
'''
Tests color normalize logic
'''
value = 'BLUE'
actual = self.rolodex.normalize_color(value)
expected = 'blue'
self.assertEqual(actual, expected)
def test_normalize_zipcode(self):
'''
Tests zip code normalize logic
'''
value = '123456677788'
actual = self.rolodex.normalize_zipcode(value)
expected = '12345'
self.assertEqual(actual, expected)
def test_full_normalize(self):
'''
Tests that applying all normalize methods work in sequence
'''
self.rolodex.valid_data = TEST_VALID_DATA
actual = len(self.rolodex.normalize())
expected = 2
self.assertEqual(actual, expected)
def test_transform(self):
'''
Tests that transform returns a JSON-formatted object with
entries sorted by last name, first name
'''
self.rolodex.clean_data = TEST_CLEAN_DATA
self.rolodex.errors = [1, 3]
raw_json = self.rolodex.transform()
json_dict = json.loads(raw_json)
actual = json_dict['entries'][0]['lastname']
expected = 'Murphy'
self.assertEqual(actual, expected)
actual = json_dict['errors'][0]
expected = 1
self.assertEqual(actual, expected)
def test_full_process(self):
'''
Tests that entire process for validating, normalizing and
transforming an input file returns expected data
'''
rolodex = Rolodex('test_input.txt')
raw_json = rolodex.process()
json_dict = json.loads(raw_json)
actual = json_dict['entries'][1]['lastname']
expected = 'Washington'
self.assertEqual(actual, expected)
actual = json_dict['errors'][1]
expected = 3
self.assertEqual(actual, expected)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment