Skip to content

Instantly share code, notes, and snippets.

@lsloan
Forked from AppliedDataS/test.py
Created July 13, 2018 20:28
Show Gist options
  • Save lsloan/edad450d6bc6fcbecda9c9b48c205214 to your computer and use it in GitHub Desktop.
Save lsloan/edad450d6bc6fcbecda9c9b48c205214 to your computer and use it in GitHub Desktop.
Assignment 4 Q1 test code
import re
import pandas as pd
import numpy as np
# list of unique states
stateStr = """
Ohio, Kentucky, American Samoa, Nevada, Wyoming
,National, Alabama, Maryland, Alaska, Utah
,Oregon, Montana, Illinois, Tennessee, District of Columbia
,Vermont, Idaho, Arkansas, Maine, Washington
,Hawaii, Wisconsin, Michigan, Indiana, New Jersey
,Arizona, Guam, Mississippi, Puerto Rico, North Carolina
,Texas, South Dakota, Northern Mariana Islands, Iowa, Missouri
,Connecticut, West Virginia, South Carolina, Louisiana, Kansas
,New York, Nebraska, Oklahoma, Florida, California
,Colorado, Pennsylvania, Delaware, New Mexico, Rhode Island
,Minnesota, Virgin Islands, New Hampshire, Massachusetts, Georgia
,North Dakota, Virginia
"""
#list of regionName entries string length
regNmLenStr = """
06,08,12,10,10,04,10,08,09,09,05,06,11,06,12,09,08,10,12,06,
06,06,08,05,09,06,05,06,10,28,06,06,09,06,08,09,10,35,09,15,
13,10,07,21,08,07,07,07,12,06,14,07,08,16,09,10,11,09,10,06,
11,05,06,09,10,12,06,06,11,07,08,13,07,11,05,06,06,07,10,08,
11,08,13,12,06,04,08,10,08,07,12,05,06,09,07,10,16,10,06,12,
08,07,06,06,06,11,14,11,07,06,06,12,08,10,11,06,10,14,04,11,
18,07,07,08,09,06,13,11,12,10,07,12,07,04,08,09,09,13,08,10,
16,09,10,08,06,08,12,07,11,09,07,09,06,12,06,09,07,10,09,10,
09,06,15,05,10,09,11,12,10,10,09,13,06,09,11,06,11,09,13,37,
06,13,06,09,49,07,11,12,09,11,11,07,12,10,06,06,09,04,09,15,
10,12,05,09,08,09,09,07,14,06,07,16,12,09,07,09,06,32,07,08,
08,06,10,36,09,10,09,06,09,11,09,06,10,07,14,08,07,06,10,09,
05,11,07,06,08,07,05,07,07,04,06,05,09,04,25,06,07,08,05,08,
06,05,11,09,07,07,06,13,09,05,16,05,10,09,08,11,06,06,06,10,
09,07,06,07,10,05,08,07,06,08,06,30,09,07,06,11,07,12,08,09,
16,12,11,08,06,04,10,10,15,05,11,11,09,08,06,04,10,10,07,09,
11,08,26,07,13,05,11,03,08,07,06,05,08,13,10,08,08,08,07,07,
09,05,04,11,11,07,06,10,11,03,04,06,06,08,08,06,10,09,05,11,
07,09,06,12,13,09,10,11,08,07,07,08,09,10,08,10,08,56,07,12,
07,16,08,04,10,10,10,10,07,09,08,09,09,10,07,09,09,09,12,14,
10,29,19,07,11,12,13,13,09,10,12,12,12,08,10,07,10,07,07,08,
08,08,09,10,09,11,09,07,09,10,11,11,10,09,09,12,09,06,08,07,
12,09,07,07,06,06,08,06,15,08,06,06,10,10,10,07,05,10,07,11,
09,12,10,12,04,10,05,05,04,14,07,10,09,07,11,10,10,10,11,15,
09,14,12,09,09,07,12,04,10,10,06,10,07,28,06,10,08,09,10,10,
10,13,12,08,10,09,09,07,09,09,07,11,11,13,08,10,07
"""
df = get_list_of_university_towns()
cols = ["State", "RegionName"]
print('Shape test: ', "Passed" if df.shape ==
(517, 2) else 'Failed')
print('Index test: ',
"Passed" if df.index.tolist() == list(range(517))
else 'Failed')
print('Column test: ',
"Passed" if df.columns.tolist() == cols else 'Failed')
print('\\n test: ',
"Failed" if any(df[cols[0]].str.contains(
'\n')) or any(df[cols[1]].str.contains('\n'))
else 'Passed')
print('Trailing whitespace test:',
"Failed" if any(df[cols[0]].str.contains(
'\s+$')) or any(df[cols[1]].str.contains(
'\s+$'))
else 'Passed')
print('"(" test:',
"Failed" if any(df[cols[0]].str.contains(
'\(')) or any(df[cols[1]].str.contains(
'\('))
else 'Passed')
print('"[" test:',
"Failed" if any(df[cols[0]].str.contains(
'\[')) or any(df[cols[1]].str.contains(
'\]'))
else 'Passed')
states_vlist = [st.strip() for st in stateStr.split(',')]
mismatchedStates = df[~df['State'].isin(
states_vlist)].loc[:, 'State'].unique()
print('State test: ', "Passed" if len(
mismatchedStates) == 0 else "Failed")
if len(mismatchedStates) > 0:
print()
print('The following states failed the equality test:')
print()
print('\n'.join(mismatchedStates))
df['expected_length'] = [int(s.strip())
for s in regNmLenStr.split(',')
if s.strip().isdigit()]
regDiff = df[df['RegionName'].str.len() != df['expected_length']].loc[
:, ['RegionName', 'expected_length']]
regDiff['actual_length'] = regDiff['RegionName'].str.len()
print('RegionName test: ', "Passed" if len(regDiff) ==
0 else ' \nMismatching regionNames\n {}'.format(regDiff))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment