AppliedDataS · November 25, 2017 07:22
diff --git a/gdp.py b/gdp.py
 def test_gdp(countries):
    """
    Input: a series/ the Country column in GDP
    utf-8 encoded i.e. when reading GDP use
    encoding='utf-8'
    """
    encodedC = '5,7,11,6,7,10,20,9,7,14,19,9,7,10,7,7,5,12,10,8,7,12,22,7,6,7,7,6,8,17,6,8,24,6,30,11,15,5,5,13,8,11,8,7,10,10,22,4,7,14,6,14,7,8,8,7,18,7,43,26,19,45,21,7,16,9,7,5,7,8,14,40,7,4,6,13,21,5,14,7,5,9,6,11,13,17,6,7,9,9,4,6,11,9,8,38,7,5,7,9,16,9,9,9,8,11,5,14,7,4,4,7,6,5,7,6,5,10,5,15,8,8,19,11,6,6,49,7,7,7,5,9,25,44,10,13,9,19,19,7,25,9,10,6,16,24,7,6,7,10,8,26,6,16,13,14,4,5,7,50,10,8,24,10,10,9,6,8,13,7,13,5,7,9,11,6,5,5,11,12,4,18,8,6,4,11,5,16,6,24,11,25,8,8,27,25,16,5,7,18,6,10,12,5,7,9,15,12,11,10,7,6,42,11,18,12,21,8,15,8,6,9,25,10,20,24,4,42,44,4,8,10,12,52,49,11,5,23,41,19,7,6,6,8,6,7,19,7,13,10,30,13,22,21,7,7,18,5,5,11,12,16,6,8'
    outcome = ['Failed\n', 'Passed\n']
    
    
    
    GDP = pd.DataFrame()
    GDP['original'] = pd.read_csv('world_bank.csv',
                                  usecols=[0],encoding='utf-8',
                                  index_col=0).loc['Aruba':'Zimbabwe'].index.tolist()
    GDP['tested'] = countries.str.len()
    GDP['actual'] = encodedC.split(',')
    GDP['actual'] = GDP['actual'].astype(int)
    try:
        GDP['Country'] = countries
    except Exception as e:
        print('Failed, error: ',e)
    
    res = 'Test number of records: '
    res += outcome[len(countries)==len(GDP)]
    
    res += 'Test the column name: '
    res += outcome [countries.name == 'Country']
    
    res += 'Equality Test: '
    res += outcome[GDP['tested'].equals(GDP['actual'])]
    
    if not GDP['tested'].equals(GDP['actual']):
        res += '\nMismatched countries:\n'
        mismatch = GDP.loc[GDP['tested'] != (GDP['actual']), [
            'original', 'Country', 'tested', 'actual']].values.tolist()
        res += '\n'.join('"{:}" miss-cleaned as  "{:}"'.format(o, r)
                         for o, r, s, v in mismatch)
    return res
 print(test_gdp(GDP['Country']))
	def test_gdp(countries):
	"""
	Input: a series/ the Country column in GDP
	utf-8 encoded i.e. when reading GDP use
	encoding='utf-8'
	"""
	encodedC = '5,7,11,6,7,10,20,9,7,14,19,9,7,10,7,7,5,12,10,8,7,12,22,7,6,7,7,6,8,17,6,8,24,6,30,11,15,5,5,13,8,11,8,7,10,10,22,4,7,14,6,14,7,8,8,7,18,7,43,26,19,45,21,7,16,9,7,5,7,8,14,40,7,4,6,13,21,5,14,7,5,9,6,11,13,17,6,7,9,9,4,6,11,9,8,38,7,5,7,9,16,9,9,9,8,11,5,14,7,4,4,7,6,5,7,6,5,10,5,15,8,8,19,11,6,6,49,7,7,7,5,9,25,44,10,13,9,19,19,7,25,9,10,6,16,24,7,6,7,10,8,26,6,16,13,14,4,5,7,50,10,8,24,10,10,9,6,8,13,7,13,5,7,9,11,6,5,5,11,12,4,18,8,6,4,11,5,16,6,24,11,25,8,8,27,25,16,5,7,18,6,10,12,5,7,9,15,12,11,10,7,6,42,11,18,12,21,8,15,8,6,9,25,10,20,24,4,42,44,4,8,10,12,52,49,11,5,23,41,19,7,6,6,8,6,7,19,7,13,10,30,13,22,21,7,7,18,5,5,11,12,16,6,8'
	outcome = ['Failed\n', 'Passed\n']



	GDP = pd.DataFrame()
	GDP['original'] = pd.read_csv('world_bank.csv',
	usecols=[0],encoding='utf-8',
	index_col=0).loc['Aruba':'Zimbabwe'].index.tolist()
	GDP['tested'] = countries.str.len()
	GDP['actual'] = encodedC.split(',')
	GDP['actual'] = GDP['actual'].astype(int)
	try:
	GDP['Country'] = countries
	except Exception as e:
	print('Failed, error: ',e)

	res = 'Test number of records: '
	res += outcome[len(countries)==len(GDP)]

	res += 'Test the column name: '
	res += outcome [countries.name == 'Country']

	res += 'Equality Test: '
	res += outcome[GDP['tested'].equals(GDP['actual'])]

	if not GDP['tested'].equals(GDP['actual']):
	res += '\nMismatched countries:\n'
	mismatch = GDP.loc[GDP['tested'] != (GDP['actual']), [
	'original', 'Country', 'tested', 'actual']].values.tolist()
	res += '\n'.join('"{:}" miss-cleaned as "{:}"'.format(o, r)
	for o, r, s, v in mismatch)
	return res
	print(test_gdp(GDP['Country']))
No results found