Created
August 10, 2021 02:54
-
-
Save Gclabbe/666f73e98651f0f46acc1150cadc8f9e to your computer and use it in GitHub Desktop.
Fourth Brain MLE week 1 -- using Pivot instead of GroupBy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_features_and_targets(df, scale_y=None): | |
df = df.drop(columns=['MSRP']) | |
counts = df.pivot_table(columns=list(df.columns), aggfunc='size') | |
X = counts.index | |
Y = counts.values[:][:, None] | |
if scale_y != None: | |
if scale_y == 'by_volume': | |
Y = Y / len(df) | |
elif scale_y == 'by_normal': | |
Y = (Y - min(Y)) / (max(Y) - min(Y)) | |
elif scale_y == 'by_stdev': | |
Y = (Y - np.average(Y)) / np.std(Y) | |
else: | |
assert scale_by == None, "scale_by needs to be set to 'None', 'by_volume', 'by_normal' or 'by_standard'" | |
return X, Y | |
# this version allows for the resulting multiindex to be treated as a set ... | |
all_models = set(train_X.append(test_X)) | |
q12_only = set(train_X) - set(test_X) | |
q34_only = set(test_X) - set(train_X) | |
print(len(q12_only)) | |
all_quarters = set(train_X) - q12_only | |
print(len(all_quarters)) | |
# How many cars that were sold in Q12 were discontinued by Q34? | |
len(q12_only) | |
# How many cars were launched in Q34? | |
len(q34_only) | |
# Now it's time to convert train_X and test_X to a dataframe for the rest of the assignment | |
train_X_df = train_X.to_frame(index=False) | |
test_X_df = test_X.to_frame(index=False) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment