Created
July 17, 2025 07:42
-
-
Save sscdotopen/753878c3fbb873bbcf927e08e8d96f98 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import ... | |
| def hpo(train_data, test_data, feature_transformation): | |
| best_accuracy = 0.0 | |
| best_regularizer = None | |
| for regularizer in [None, 'l1', 'l2']: | |
| pipeline = Pipeline([ | |
| ('features', feature_transformation), | |
| ('learner', SGDClassifier(loss='log', penalty=regularizer))]) | |
| model = pipeline.fit(train_data, train_data[target_column]) | |
| accuracy = model.score(test_data, test_data[target_column]) | |
| if accuracy > best_accuracy: | |
| best_accuracy = accuracy | |
| best_regularizer = regularizer | |
| return best_regularizer | |
| data_file = ... | |
| zip_codes_for_training = ... | |
| numerical_columns = ['age_in_years', 'monthly_income', 'total_savings'] | |
| categorical_columns = ['job_level', 'education_level', 'zip_code'] | |
| target_column = 'is_eligible_for_loan' | |
| data = pd.read_csv(data_file) | |
| data = data.drop(columns=['race', 'gender']) | |
| data[numerical_columns] = data[numerical_columns].apply(lambda col: col - col.mean()) | |
| train_data, test_data = train_test_split(data, test_size=0.2) | |
| train_data = train_data[train_data.zip_code.isin(zip_codes_for_training)) | |
| feature_transformation = ColumnTransformer(transformers=[ | |
| ('num_features', StandardScaler(with_mean=False), numerical_columns), | |
| ('cat_features', OneHotEncoder(handle_unknown='ignore'), categorical_columns) | |
| ]) | |
| best_regularizer = hpo(train_data, test_data, feature_transformation) | |
| final_pipeline = Pipeline([ | |
| ('features', feature_transformation), | |
| ('learner', SGDClassifier(loss='log', penalty=best_regularizer)) | |
| ]) | |
| model = final_pipeline.fit(train_data, train[target_column]) | |
| accuracy = model.score(test_data, test[target_column]) | |
| print(f'Final accuracy on test set is {accuracy}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment