Skip to content

Instantly share code, notes, and snippets.

View ParthNipunDave's full-sized avatar

Parth Nipun Dave ParthNipunDave

  • NA
  • Ahmedabad
View GitHub Profile
# Number of unique values
data['MonthlyCharges'].nunique()
# Distribution of Monthly Charges
sns.distplot(data["MonthlyCharges"])
# MonthlyCharges
sns.boxplot(data=data,y='MonthlyCharges')
# Churn Rate with Monthly Charges
sns.boxplot(data=data,y="MonthlyCharges",x="Churn")
# Number of Unique Value
data["TotalCharges"].nunique()
# Fixing TotalCharges Value
data["TotalCharges"]=data["TotalCharges"].str.replace(" ",str(0))
data['TotalCharges']=data['TotalCharges'].astype(float)
# Distribution of TotalCharges
sns.distplot(data["TotalCharges"])
sns.boxplot(data=data,y="TotalCharges")
# TotalCharges with Churn
sns.boxplot(data=data,y="TotalCharges",x="Churn")
# Unique Value
data['Churn'].unique()
# Value Count
print("Churn \n",data["Churn"].value_counts())
sns.countplot(data['Churn'])
len1=len(data)
len2=0
while len1!=len2:
q1=data[data['Churn']=="Yes"]["TotalCharges"].describe()[4]
q3=data[data['Churn']=="Yes"]["TotalCharges"].describe()[6]
iqr=q3-q1
up=q3+iqr*1.5
low=q1-iqr*1.5
data.loc[data[data['Churn']=="Yes"]["TotalCharges"].index,"TotalCharges"]=data[(data['Churn']=="Yes")&(data["TotalCharges"]<up)&(data["TotalCharges"]>low)]["TotalCharges"]
data.dropna(inplace=True)
# Finding values whose data type is object
cols=[i for i in data.columns if data[i].dtypes==object][1:]
encoder={}
for col in cols:
temp_dict={}
for val,ind in zip(sorted(data[col].unique()),range(data[col].nunique())):
temp_dict[val]=ind
encoder.__setitem__(col,temp_dict)
# Storing in our dataframe
# converting str to int type
data=data.astype(int)
# Ploting Values
plt.figure(figsize=(12,7))
sns.heatmap(data.corr(),annot=True,fmt=".1f")
from sklearn.feature_selection import SelectKBest, f_classif
classifier = SelectKBest(score_func=f_classif,k=5)
fits = classifier.fit(data.drop('Churn',axis=1),data['Churn'])
x=pd.DataFrame(fits.scores_)
columns = pd.DataFrame(data.drop('Churn',axis=1).columns)
fscores = pd.concat([columns,x],axis=1)
fscores.columns = ['Attribute','Score']
fscores.sort_values(by='Score',ascending=False)
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score,roc_auc_score
train_x,test_x,train_y,test_y=train_test_split(data[cols],data['Churn'],test_size=0.17,random_state=101)
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression()
lr.fit(train_x,train_y)
predict=lr.predict(test_x)
print("Accuracy Score ",accuracy_score(predict,test_y)*100)
print("Confusion Matrix\n",confusion_matrix(predict,test_y))
print("Classification Report\n",classification_report(predict,test_y))
print("ROC AUC Curve ",roc_auc_score(predict,test_y))