Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
hardbounce_2[['emailDomain_cat', 'emailDomainPiece1', 'emailDomainPiece2', 'regDate_n', 'birthDate_n' | |
,'monthsSinceRegDate', 'age', 'percNumbersInEmailUser', 'hasNumberInEmailUser', 'emailUserCharQty' | |
,'flgHardBounce_n']].to_csv('datasets/hardbounce_featureEngineering.csv', sep=';',index=False) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import libraries | |
import pandas as pd | |
import numpy as np | |
from functions import aux_functions | |
# Calculate character qty in e-mail user | |
hardbounce_2['emailUserCharQty'] = hardbounce_2['email'].apply(lambda x : len(aux_functions.getEmailUser(x))) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import libraries | |
import pandas as pd | |
import numpy as np | |
import seaborn as sns | |
from functions import aux_functions | |
# Extract e-mail domain | |
hardbounce_2['emailDomain'] = hardbounce_2['email'].apply(aux_functions.getEmailDomain) | |
# count by domain |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib.pyplot as plt | |
def plot_roc_curve(fpr, tpr, label=None): | |
""" | |
The ROC curve, modified from | |
Hands-On Machine learning with Scikit-Learn and TensorFlow; p.91 | |
tpr = true positive rate | |
fpr = false positive rate | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from datetime import date | |
from functions import aux_functions | |
curr_date = pd.to_datetime(date.today()) | |
hardbounce_2['age'] = (curr_date - hardbounce_2['birthDate_n']) / np.timedelta64(1, 'Y') | |
# There are ages missing |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from datetime import date | |
from functions import aux_functions | |
curr_date = pd.to_datetime(date.today()) | |
hardbounce_2['monthsSinceRegDate'] = (curr_date - hardbounce_2['regDate_n']) / np.timedelta64(1, 'M') | |
# There are rows with monthsSinceRegDate missing |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
hardbounce_2['flgHardBounce_n'] = hardbounce_2.flgHardBounce.astype(int) | |
hardbounce_2['regDate_n'] = pd.to_datetime(hardbounce_2['regDate']) | |
hardbounce_2['birthDate_n'] = pd.to_datetime(hardbounce_2['birthDate'], errors = 'coerce') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import libraries | |
import pandas as pd | |
import numpy as np | |
from functions import aux_functions | |
hardbounce = pd.read_csv('datasets/hardbounce_sample.csv',sep=';') | |
# Run the function on dataset | |
aux_functions.percMissing(hardbounce) |
NewerOlder