Created
November 28, 2018 01:38
-
-
Save karthick18/d1ad974f1a5290631dc31b997522b442 to your computer and use it in GitHub Desktop.
Determine features and targets to use for your model using pearson correlation coefficient
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_features_targets(df, target_correlation_map, | |
num_features = 2, correlation_method = 'pearson', correlation_barrier = 0.6): | |
from itertools import combinations | |
targets = list(df.columns.values) | |
target_map = {} | |
#construct a target map using the correlation matrix | |
for features in combinations(targets, num_features + 1): | |
#target is first one, remaining are feature combinations | |
target = features[0] | |
if target not in target_map: | |
target_map[target] = [] | |
target_map[target].append(features[1:]) | |
#populate the target map for remaining features in this combination | |
for feature_index in range(1, num_features+1): | |
target = features[feature_index] | |
if target not in target_map: | |
target_map[target] = [] | |
target_map[target].append(features[:feature_index] + features[feature_index+1:]) | |
#now get the correlation matrix and apply the barrier against the feature combination for the targets | |
correlation_matrix = df.corr(method = correlation_method) | |
for target, features in target_map.items(): | |
for feature_set in features: | |
feature_correlations = [ f for f in feature_set if abs(correlation_matrix[target][f]) >= correlation_barrier ] | |
if len(feature_correlations) == len(feature_set): | |
#acceptance | |
if target not in target_correlation_map: | |
target_correlation_map[target] = [] | |
target_correlation_map[target].append(feature_set) | |
for target, features in target_correlation_map.items(): | |
print('Target:%s' %target) | |
print('Selected features') | |
print('-'*80) | |
for f in features: | |
print('Features:{}'.format(f)) | |
print('-'*40) | |
print('-'*80) | |
return target_correlation_map |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment