Skip to content

Instantly share code, notes, and snippets.

@matmoody
Created April 26, 2016 03:45
Show Gist options
  • Save matmoody/b8e12a5e08bf167240ec3b825c27a9a0 to your computer and use it in GitHub Desktop.
Save matmoody/b8e12a5e08bf167240ec3b825c27a9a0 to your computer and use it in GitHub Desktop.
Logistic regression analysis
import numpy as np
import pandas as pd
import statsmodels.api as sm
import matplotlib.pyplot as plt
%matplotlib inline
loansData = pd.read_csv('https://github.com/Thinkful-Ed/curric-data-001-data-sets/raw/master/loans/loansData.csv')
# Clean Interest.Rate
loansData['Interest.Rate'] = loansData['Interest.Rate'].map(lambda x: float(x.rstrip('%')))
# Clean Loan.Length
loansData['Loan.Length'] = loansData['Loan.Length'].map(lambda x: x.strip(' months'))
# Clean and convert FICO.Range
loansData['FICO.Score'] = loansData['FICO.Range'].map(lambda x: int(x[:3]))
intrate = loansData['Interest.Rate']
loanamt = loansData['Amount.Requested']
fico = loansData['FICO.Score']
# Dependent variable
y = np.matrix(intrate).transpose()
# Independent variables
x1 = np.matrix(fico).transpose()
x2 = np.matrix(loanamt).transpose()
Put the two indepedent variable columns together to make input matrix
# One column for each indepdent variable
x = np.column_stack([x1,x2])
# Now create linear model
X = sm.add_constant(x)
model = sm.OLS(y, X)
f = model.fit()
# Add Column
loansData['IR_TF'] = loansData['Interest.Rate'].map(lambda x: 1 if x >= 12.0 else 0)
#Statsmodels needs an intercept column in dataframe, so add a column with a constant intercept of 1.0.
intercept = [1] * len(loansData)
loansData['Intercept'] = intercept
# Create list of column names for independent variables (including intercept)
ind_vars = ['Intercept', 'FICO.Score', 'Amount.Requested']
# Define logistic regression model
logit = sm.Logit(loansData['IR_TF'], loansData[ind_vars])
# Fit the model
result = logit.fit()
# Get fitted coefficients from results
coeff = result.params
print(coeff)
interest_rate = 60.125 + 0.097432 - 0.000174
# Takes FICO Score and loan amount and returns p.
def logistic_function(FicoScore, LoanAmount):
prob = 1/(1 + math.exp(coeff[0] + (coeff[2] * FicoScore[1]) + (coeff[1] * LoanAmount)))
if prob > 0.7:
p = 1
else:
p = 0
return prob, p
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment