Skip to content

Instantly share code, notes, and snippets.

@AppliedDataS
Last active July 13, 2018 20:28
Show Gist options
  • Save AppliedDataS/2eaea2f684bf1c18093402a6125e2c2e to your computer and use it in GitHub Desktop.
Save AppliedDataS/2eaea2f684bf1c18093402a6125e2c2e to your computer and use it in GitHub Desktop.
Assignment 4 Q1 test code
import re
import pandas as pd
import numpy as np
# list of unique states
stateStr = """
Ohio, Kentucky, American Samoa, Nevada, Wyoming
,National, Alabama, Maryland, Alaska, Utah
,Oregon, Montana, Illinois, Tennessee, District of Columbia
,Vermont, Idaho, Arkansas, Maine, Washington
,Hawaii, Wisconsin, Michigan, Indiana, New Jersey
,Arizona, Guam, Mississippi, Puerto Rico, North Carolina
,Texas, South Dakota, Northern Mariana Islands, Iowa, Missouri
,Connecticut, West Virginia, South Carolina, Louisiana, Kansas
,New York, Nebraska, Oklahoma, Florida, California
,Colorado, Pennsylvania, Delaware, New Mexico, Rhode Island
,Minnesota, Virgin Islands, New Hampshire, Massachusetts, Georgia
,North Dakota, Virginia
"""
#list of regionName entries string length
regNmLenStr = """
06,08,12,10,10,04,10,08,09,09,05,06,11,06,12,09,08,10,12,06,
06,06,08,05,09,06,05,06,10,28,06,06,09,06,08,09,10,35,09,15,
13,10,07,21,08,07,07,07,12,06,14,07,08,16,09,10,11,09,10,06,
11,05,06,09,10,12,06,06,11,07,08,13,07,11,05,06,06,07,10,08,
11,08,13,12,06,04,08,10,08,07,12,05,06,09,07,10,16,10,06,12,
08,07,06,06,06,11,14,11,07,06,06,12,08,10,11,06,10,14,04,11,
18,07,07,08,09,06,13,11,12,10,07,12,07,04,08,09,09,13,08,10,
16,09,10,08,06,08,12,07,11,09,07,09,06,12,06,09,07,10,09,10,
09,06,15,05,10,09,11,12,10,10,09,13,06,09,11,06,11,09,13,37,
06,13,06,09,49,07,11,12,09,11,11,07,12,10,06,06,09,04,09,15,
10,12,05,09,08,09,09,07,14,06,07,16,12,09,07,09,06,32,07,08,
08,06,10,36,09,10,09,06,09,11,09,06,10,07,14,08,07,06,10,09,
05,11,07,06,08,07,05,07,07,04,06,05,09,04,25,06,07,08,05,08,
06,05,11,09,07,07,06,13,09,05,16,05,10,09,08,11,06,06,06,10,
09,07,06,07,10,05,08,07,06,08,06,30,09,07,06,11,07,12,08,09,
16,12,11,08,06,04,10,10,15,05,11,11,09,08,06,04,10,10,07,09,
11,08,26,07,13,05,11,03,08,07,06,05,08,13,10,08,08,08,07,07,
09,05,04,11,11,07,06,10,11,03,04,06,06,08,08,06,10,09,05,11,
07,09,06,12,13,09,10,11,08,07,07,08,09,10,08,10,08,56,07,12,
07,16,08,04,10,10,10,10,07,09,08,09,09,10,07,09,09,09,12,14,
10,29,19,07,11,12,13,13,09,10,12,12,12,08,10,07,10,07,07,08,
08,08,09,10,09,11,09,07,09,10,11,11,10,09,09,12,09,06,08,07,
12,09,07,07,06,06,08,06,15,08,06,06,10,10,10,07,05,10,07,11,
09,12,10,12,04,10,05,05,04,14,07,10,09,07,11,10,10,10,11,15,
09,14,12,09,09,07,12,04,10,10,06,10,07,28,06,10,08,09,10,10,
10,13,12,08,10,09,09,07,09,09,07,11,11,13,08,10,07
"""
df = get_list_of_university_towns()
cols = ["State", "RegionName"]
print('Shape test: ', "Passed" if df.shape ==
(517, 2) else 'Failed')
print('Index test: ',
"Passed" if df.index.tolist() == list(range(517))
else 'Failed')
print('Column test: ',
"Passed" if df.columns.tolist() == cols else 'Failed')
print('\\n test: ',
"Failed" if any(df[cols[0]].str.contains(
'\n')) or any(df[cols[1]].str.contains('\n'))
else 'Passed')
print('Trailing whitespace test:',
"Failed" if any(df[cols[0]].str.contains(
'\s+$')) or any(df[cols[1]].str.contains(
'\s+$'))
else 'Passed')
print('"(" test:',
"Failed" if any(df[cols[0]].str.contains(
'\(')) or any(df[cols[1]].str.contains(
'\('))
else 'Passed')
print('"[" test:',
"Failed" if any(df[cols[0]].str.contains(
'\[')) or any(df[cols[1]].str.contains(
'\]'))
else 'Passed')
states_vlist = [st.strip() for st in stateStr.split(',')]
mismatchedStates = df[~df['State'].isin(
states_vlist)].loc[:, 'State'].unique()
print('State test: ', "Passed" if len(
mismatchedStates) == 0 else "Failed")
if len(mismatchedStates) > 0:
print()
print('The following states failed the equality test:')
print()
print('\n'.join(mismatchedStates))
df['expected_length'] = [int(s.strip())
for s in regNmLenStr.split(',')
if s.strip().isdigit()]
regDiff = df[df['RegionName'].str.len() != df['expected_length']].loc[
:, ['RegionName', 'expected_length']]
regDiff['actual_length'] = regDiff['RegionName'].str.len()
print('RegionName test: ', "Passed" if len(regDiff) ==
0 else ' \nMismatching regionNames\n {}'.format(regDiff))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment