Skip to content

Instantly share code, notes, and snippets.

@neerajvashistha
Created September 16, 2018 18:10
Show Gist options
  • Save neerajvashistha/96952890536c755ff49757e7bce853c0 to your computer and use it in GitHub Desktop.
Save neerajvashistha/96952890536c755ff49757e7bce853c0 to your computer and use it in GitHub Desktop.
# coding: utf-8
# In[29]:
import pandas as pd
import numpy as np
# In[52]:
emp = pd.read_csv("train_LZdllcl.csv")
emp1 = pd.read_csv("test_2umaH9m.csv")
# In[53]:
def transformations(dataframe):
dataframe['previous_year_rating'].fillna(0,inplace=True)
cleanup_nums = {"education":{"Master's & above":3,"Bachelor's":2,"Below Secondary":1}}
dataframe.replace(cleanup_nums,inplace=True)
dataframe['education'].fillna(2.0,inplace=True)
cleanup_nums = {"department":{"Sales & Marketing":9, "Operations":8, "Technology":7, "Analytics":6, "R&D":5,"Procurement":4, "Finance":3, "HR":2, "Legal":1}}
dataframe.replace(cleanup_nums,inplace=True)
cleanup_nums = {"region":{"region_7":7, "region_22":22, "region_19":19, "region_23":23, "region_26":26,
"region_2":2, "region_20":20, "region_34":34, "region_1":1, "region_4":4,
"region_29":29, "region_31":31, "region_15":15, "region_14":14, "region_11":11,
"region_5":5, "region_28":28, "region_17":17, "region_13":13, "region_16":16,
"region_25":25, "region_10":10, "region_27":27, "region_30":30, "region_12":12,
"region_21":21, "region_8":8, "region_32":32, "region_6":6, "region_33":33,
"region_24":24, "region_3":3, "region_9":9, "region_18":18}}
dataframe.replace(cleanup_nums,inplace=True)
cleanup_nums ={"gender":{"m":0,"f":1}}
dataframe.replace(cleanup_nums,inplace=True)
cleanup_nums ={"recruitment_channel":{"sourcing":0, "other":2, "referred":1}}
dataframe.replace(cleanup_nums,inplace=True)
dataframe['training_scr_35_50'] = np.where(np.logical_and(np.greater_equal(dataframe['avg_training_score'],35),np.less(dataframe['avg_training_score'],50)) , 1, 0)
dataframe['training_scr_50_65'] = np.where(np.logical_and(np.greater_equal(dataframe['avg_training_score'],50),np.less(dataframe['avg_training_score'],65)) , 1, 0)
dataframe['training_scr_65_80'] = np.where(np.logical_and(np.greater_equal(dataframe['avg_training_score'],65),np.less(dataframe['avg_training_score'],80)) , 1, 0)
dataframe['training_scr_85_100'] = np.where(np.logical_and(np.greater_equal(dataframe['avg_training_score'],85),np.less(dataframe['avg_training_score'],100)) , 1, 0)
dataframe['age_18_25'] = np.where(np.logical_and(np.greater_equal(dataframe['age'],18),np.less(dataframe['age'],25)) , 1, 0)
dataframe['age_25_35'] = np.where(np.logical_and(np.greater_equal(dataframe['age'],25),np.less(dataframe['age'],35)) , 1, 0)
dataframe['age_35_45'] = np.where(np.logical_and(np.greater_equal(dataframe['age'],35),np.less(dataframe['age'],45)) , 1, 0)
dataframe['age_45_60'] = np.where(np.logical_and(np.greater_equal(dataframe['age'],45),np.less(dataframe['age'],60)) , 1, 0)
dataframe['age_training'] = np.where(dataframe['no_of_trainings'] > 5 & dataframe['age_25_35'],1,0)
dataframe['KPI_training'] = np.where(0.3*np.array(dataframe['KPIs_met >80%']) + 0.7 * np.array(dataframe['training_scr_50_65']) > 0.5 ,1,0)
dataframe['education_age'] = np.where((dataframe['education']==2) & (dataframe['age_25_35']==1),1,0)
dataframe['region_max'] = np.where(((dataframe['region'] == 2) | (dataframe['region'] == 22) | (dataframe['region'] == 7)),1,0)
dataframe['len_serv_0_5'] = np.where(np.logical_and(np.greater_equal(dataframe['length_of_service'],0),np.less(dataframe['length_of_service'],5)) , 1, 0)
dataframe['len_serv_5_10'] = np.where(np.logical_and(np.greater_equal(dataframe['length_of_service'],5),np.less(dataframe['length_of_service'],10)) , 1, 0)
dataframe['len_serv_10_15'] = np.where(np.logical_and(np.greater_equal(dataframe['length_of_service'],10),np.less(dataframe['length_of_service'],15)) , 1, 0)
dataframe['len_serv_15_20'] = np.where(np.logical_and(np.greater_equal(dataframe['length_of_service'],15),np.less(dataframe['length_of_service'],20)) , 1, 0)
dataframe['len_serv_25'] = np.where(dataframe['length_of_service']>25 , 1, 0)
return dataframe
# In[54]:
emp_train = transformations(emp)
print(list(emp_train))
emp_train.to_csv('train_feature.csv',index=False)
emp_test = transformations(emp1)
print(list(emp_test))
emp_test['is_promoted'] = 0
emp_test.to_csv('test_feature.csv',index=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment