Skip to content

Instantly share code, notes, and snippets.

@tomatau
Created August 2, 2015 18:21
Show Gist options
  • Save tomatau/e6ebd1a44e5337e5202a to your computer and use it in GitHub Desktop.
Save tomatau/e6ebd1a44e5337e5202a to your computer and use it in GitHub Desktop.
import pandas as pd
import matplotlib.pyplot as plt
loansData = pd.read_csv(
'https://spark-public.s3.amazonaws.com/dataanalysis/loansData.csv'
)
loansData['Interest.Rate'] = loansData['Interest.Rate'].map(
lambda x: float(x.rstrip('%'))
)
loansData['Loan.Length'] = loansData['Loan.Length'].map(
lambda x: float(x.rstrip(' months'))
)
loansData['FICO.Score'] = loansData['FICO.Range'].map(
lambda x: float(x.split('-')[0])
)
plt.figure()
p = loansData['FICO.Score'].hist()
plt.show()
plt.figure()
a = pd.scatter_matrix(loansData, alpha=0.05, figsize=(16,18))
plt.show()
import numpy as np
import pandas as pd
import statsmodels.api as sm
intrate = loansData['Interest.Rate']
loanamt = loansData['Amount.Requested']
fico = loansData['FICO.Score']
# The dependent variable
y = np.matrix(intrate).transpose()
# The independent variables shaped as columns
x1 = np.matrix(fico).transpose()
x2 = np.matrix(loanamt).transpose()
x = np.column_stack([x1,x2])
X = sm.add_constant(x)
model = sm.OLS(y,X)
f = model.fit()
print f.summary()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment